Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: m-labs/artiq
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: cb9e7d15bf2a
Choose a base ref
...
head repository: m-labs/artiq
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 50e7b44d0471
Choose a head ref
  • 2 commits
  • 5 files changed
  • 1 contributor

Commits on Nov 20, 2015

  1. Reformat.

    whitequark committed Nov 20, 2015
    Copy the full SHA
    73c358a View commit details
  2. compiler: actually implement interleaving correctly (calls are still …

    …broken).
    
    The previous implementation was completely wrong: it always advanced
    the global timeline by the same amount as the non-interleaved basic
    block did.
    
    The new implementation only advances the global timeline by
    the difference between its current time and the virtual time of
    the branch, which requires it to adjust the delay instructions.
    
    Previously, the delay expression was present in the IR twice: once
    as the iodelay.Expr transformation-visible form, and once as regular
    IR instructions, with the latter form being passed to the delay_mu
    builtin and advancing the runtime timeline.
    
    As a result of this change, this strategy is no longer valid:
    we can meaningfully mutate the iodelay.Expr form but not the IR
    instruction form. Thus, IR instructions are no longer generated for
    delay expressions, and the LLVM lowering pass now has to lower
    the iodelay.Expr objects as well.
    
    This works OK for flat `with parallel:` expressions, but breaks down
    outside of `with parallel:` or when calls are present. The reasons
    it breaks down are as follows:
    
      * Outside of `with parallel:`, delay() and delay_mu() must accept
        any expression, but iodelay.Expr's are not nearly expressive
        enough. So, the IR instruction form must actually be kept as well.
    
      * A delay instruction is currently inserted after a call to
        a user-defined function; this delay instruction introduces
        a point where basic block reordering is possible as well as
        provides delay information. However, the callee knows nothing
        about the context in which it is called, which means that
        the runtime timeline is advanced twice. So, a new terminator
        instruction must be added that combines the properties of delay
        and call instructions (and another for delay and invoke as well).
    whitequark committed Nov 20, 2015
    Copy the full SHA
    50e7b44 View commit details
135 changes: 73 additions & 62 deletions artiq/compiler/transforms/artiq_ir_generator.py
Original file line number Diff line number Diff line change
@@ -1418,6 +1418,15 @@ def alloc_exn(self, typ, message=None, param0=None, param1=None, param2=None):

return self.append(ir.Alloc(attributes, typ))

def _make_delay(self, delay):
if not iodelay.is_const(delay, 0):
after_delay = self.add_block()
self.append(ir.Delay(delay,
{var_name: self.current_args[var_name]
for var_name in delay.free_vars()},
after_delay))
self.current_block = after_delay

def visit_builtin_call(self, node):
# A builtin by any other name... Ignore node.func, just use the type.
typ = node.func.type
@@ -1520,16 +1529,15 @@ def body_gen(index):
return self.append(ir.Arith(ast.Mult(loc=None), now_mu_float, self.ref_period))
else:
assert False
elif types.is_builtin(typ, "delay") or types.is_builtin(typ, "at"):
elif types.is_builtin(typ, "at"):
if len(node.args) == 1 and len(node.keywords) == 0:
arg = self.visit(node.args[0])
arg_mu_float = self.append(ir.Arith(ast.Div(loc=None), arg, self.ref_period))
arg_mu = self.append(ir.Coerce(arg_mu_float, builtins.TInt(types.TValue(64))))
self.append(ir.Builtin(typ.name + "_mu", [arg_mu], builtins.TNone()))
else:
assert False
elif types.is_builtin(typ, "now_mu") or types.is_builtin(typ, "delay_mu") \
or types.is_builtin(typ, "at_mu"):
elif types.is_builtin(typ, "now_mu") or types.is_builtin(typ, "at_mu"):
return self.append(ir.Builtin(typ.name,
[self.visit(arg) for arg in node.args], node.type))
elif types.is_builtin(typ, "mu_to_seconds"):
@@ -1546,6 +1554,9 @@ def body_gen(index):
return self.append(ir.Coerce(arg_mu, builtins.TInt(types.TValue(64))))
else:
assert False
elif types.is_builtin(typ, "delay") or types.is_builtin(typ, "delay_mu"):
assert node.iodelay is not None
self._make_delay(node.iodelay)
elif types.is_exn_constructor(typ):
return self.alloc_exn(node.type, *[self.visit(arg_node) for arg_node in node.args])
elif types.is_constructor(typ):
@@ -1557,69 +1568,69 @@ def visit_CallT(self, node):
typ = node.func.type.find()

if types.is_builtin(typ):
insn = self.visit_builtin_call(node)
return self.visit_builtin_call(node)

if types.is_function(typ):
func = self.visit(node.func)
self_arg = None
fn_typ = typ
offset = 0
elif types.is_method(typ):
method = self.visit(node.func)
func = self.append(ir.GetAttr(method, "__func__"))
self_arg = self.append(ir.GetAttr(method, "__self__"))
fn_typ = types.get_method_function(typ)
offset = 1
else:
if types.is_function(typ):
func = self.visit(node.func)
self_arg = None
fn_typ = typ
offset = 0
elif types.is_method(typ):
method = self.visit(node.func)
func = self.append(ir.GetAttr(method, "__func__"))
self_arg = self.append(ir.GetAttr(method, "__self__"))
fn_typ = types.get_method_function(typ)
offset = 1
else:
assert False
assert False

args = [None] * (len(fn_typ.args) + len(fn_typ.optargs))
args = [None] * (len(fn_typ.args) + len(fn_typ.optargs))

for index, arg_node in enumerate(node.args):
arg = self.visit(arg_node)
if index < len(fn_typ.args):
args[index + offset] = arg
else:
args[index + offset] = self.append(ir.Alloc([arg], ir.TOption(arg.type)))

for keyword in node.keywords:
arg = self.visit(keyword.value)
if keyword.arg in fn_typ.args:
for index, arg_name in enumerate(fn_typ.args):
if keyword.arg == arg_name:
assert args[index] is None
args[index] = arg
break
elif keyword.arg in fn_typ.optargs:
for index, optarg_name in enumerate(fn_typ.optargs):
if keyword.arg == optarg_name:
assert args[len(fn_typ.args) + index] is None
args[len(fn_typ.args) + index] = \
self.append(ir.Alloc([arg], ir.TOption(arg.type)))
break

for index, optarg_name in enumerate(fn_typ.optargs):
if args[len(fn_typ.args) + index] is None:
args[len(fn_typ.args) + index] = \
self.append(ir.Alloc([], ir.TOption(fn_typ.optargs[optarg_name])))

if self_arg is not None:
assert args[0] is None
args[0] = self_arg

assert None not in args

if self.unwind_target is None:
insn = self.append(ir.Call(func, args))
for index, arg_node in enumerate(node.args):
arg = self.visit(arg_node)
if index < len(fn_typ.args):
args[index + offset] = arg
else:
after_invoke = self.add_block()
insn = self.append(ir.Invoke(func, args, after_invoke, self.unwind_target))
self.current_block = after_invoke

method_key = None
if isinstance(node.func, asttyped.AttributeT):
attr_node = node.func
self.method_map[(attr_node.value.type, attr_node.attr)].append(insn)
args[index + offset] = self.append(ir.Alloc([arg], ir.TOption(arg.type)))

for keyword in node.keywords:
arg = self.visit(keyword.value)
if keyword.arg in fn_typ.args:
for index, arg_name in enumerate(fn_typ.args):
if keyword.arg == arg_name:
assert args[index] is None
args[index] = arg
break
elif keyword.arg in fn_typ.optargs:
for index, optarg_name in enumerate(fn_typ.optargs):
if keyword.arg == optarg_name:
assert args[len(fn_typ.args) + index] is None
args[len(fn_typ.args) + index] = \
self.append(ir.Alloc([arg], ir.TOption(arg.type)))
break

for index, optarg_name in enumerate(fn_typ.optargs):
if args[len(fn_typ.args) + index] is None:
args[len(fn_typ.args) + index] = \
self.append(ir.Alloc([], ir.TOption(fn_typ.optargs[optarg_name])))

if self_arg is not None:
assert args[0] is None
args[0] = self_arg

assert None not in args

if self.unwind_target is None:
insn = self.append(ir.Call(func, args))
else:
after_invoke = self.add_block()
insn = self.append(ir.Invoke(func, args, after_invoke, self.unwind_target))
self.current_block = after_invoke

method_key = None
if isinstance(node.func, asttyped.AttributeT):
attr_node = node.func
self.method_map[(attr_node.value.type, attr_node.attr)].append(insn)

if node.iodelay is not None and not iodelay.is_const(node.iodelay, 0):
after_delay = self.add_block()
16 changes: 13 additions & 3 deletions artiq/compiler/transforms/interleaver.py
Original file line number Diff line number Diff line change
@@ -77,6 +77,9 @@ def time_after_block(pair):
index, source_block = min(enumerate(source_blocks), key=time_after_block)
source_block_delay = iodelay_of_block(source_block)

new_target_time = source_times[index] + source_block_delay
target_time_delta = new_target_time - target_time

target_terminator = target_block.terminator()
if isinstance(target_terminator, (ir.Delay, ir.Branch)):
target_terminator.set_target(source_block)
@@ -85,8 +88,15 @@ def time_after_block(pair):
else:
assert False

target_block = source_block
target_time += source_block_delay
source_terminator = source_block.terminator()
if target_time_delta > 0:
assert isinstance(source_terminator, ir.Delay)
source_terminator.expr = iodelay.Const(target_time_delta)
else:
source_terminator.replace_with(ir.Branch(source_terminator.target()))

target_block = source_block
target_time = new_target_time

new_source_block = postdom_tree.immediate_dominator(source_block)
assert (new_source_block is not None)
@@ -98,4 +108,4 @@ def time_after_block(pair):
del source_times[index]
else:
source_blocks[index] = new_source_block
source_times[index] = target_time
source_times[index] = new_target_time
23 changes: 14 additions & 9 deletions artiq/compiler/transforms/llvm_ir_generator.py
Original file line number Diff line number Diff line change
@@ -7,7 +7,7 @@
from pythonparser import ast, diagnostic
from llvmlite_artiq import ir as ll
from ...language import core as language_core
from .. import types, builtins, ir
from .. import types, builtins, ir, iodelay


llvoid = ll.VoidType()
@@ -784,12 +784,6 @@ def get_outer(llenv, env_ty):
return self.map(insn.operands[0])
elif insn.op == "now_mu":
return self.llbuilder.load(self.llbuiltin("now"), name=insn.name)
elif insn.op == "delay_mu":
interval, = insn.operands
llnowptr = self.llbuiltin("now")
llnow = self.llbuilder.load(llnowptr)
lladjusted = self.llbuilder.add(llnow, self.map(interval))
return self.llbuilder.store(lladjusted, llnowptr)
elif insn.op == "at_mu":
time, = insn.operands
return self.llbuilder.store(self.map(time), self.llbuiltin("now"))
@@ -1068,8 +1062,6 @@ def process_Select(self, insn):
def process_Branch(self, insn):
return self.llbuilder.branch(self.map(insn.target()))

process_Delay = process_Branch

def process_BranchIf(self, insn):
return self.llbuilder.cbranch(self.map(insn.condition()),
self.map(insn.if_true()), self.map(insn.if_false()))
@@ -1150,3 +1142,16 @@ def process_LandingPad(self, insn):

return llexn

def process_Delay(self, insn):
def map_delay(expr):
if isinstance(expr, iodelay.Const):
return ll.Constant(lli64, int(expr.value))
else:
assert False

llnowptr = self.llbuiltin("now")
llnow = self.llbuilder.load(llnowptr)
lladjusted = self.llbuilder.add(llnow, map_delay(insn.expr))
self.llbuilder.store(lladjusted, llnowptr)

return self.llbuilder.branch(self.map(insn.target()))
25 changes: 25 additions & 0 deletions lit-test/test/interleaving/nonoverlapping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# RUN: %python -m artiq.compiler.testbench.jit %s >%t
# RUN: OutputCheck %s --file-to-check=%t

def g():
with parallel:
with sequential:
print("A", now_mu())
delay_mu(2)
#
print("B", now_mu())
with sequential:
print("C", now_mu())
delay_mu(2)
#
print("D", now_mu())
delay_mu(2)
#
print("E", now_mu())

# CHECK-L: A 0
# CHECK-L: B 2
# CHECK-L: C 2
# CHECK-L: D 2
# CHECK-L: E 4
g()
Original file line number Diff line number Diff line change
@@ -6,17 +6,20 @@ def g():
with sequential:
print("A", now_mu())
delay_mu(3)
#
print("B", now_mu())
with sequential:
print("C", now_mu())
delay_mu(2)
#
print("D", now_mu())
delay_mu(2)
#
print("E", now_mu())

# CHECK-L: C 0
# CHECK-L: A 2
# CHECK-L: D 5
# CHECK-L: B 7
# CHECK-L: E 7
# CHECK-L: B 3
# CHECK-L: D 3
# CHECK-L: E 4
g()