Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: m-labs/artiq
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 2a210d74fb59
Choose a base ref
...
head repository: m-labs/artiq
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 7213984330ad
Choose a head ref
  • 2 commits
  • 4 files changed
  • 1 contributor

Commits on Mar 27, 2016

  1. compiler: extract runtime checks into separate cold functions.

    This reduces register pressure as well as function size, which
    favorably affects the inliner.
    whitequark committed Mar 27, 2016
    Copy the full SHA
    e75ad3d View commit details
  2. Copy the full SHA
    7213984 View commit details
Showing with 111 additions and 40 deletions.
  1. +9 −0 artiq/compiler/ir.py
  2. +3 −2 artiq/compiler/targets.py
  3. +65 −27 artiq/compiler/transforms/artiq_ir_generator.py
  4. +34 −11 artiq/compiler/transforms/llvm_ir_generator.py
9 changes: 9 additions & 0 deletions artiq/compiler/ir.py
Original file line number Diff line number Diff line change
@@ -423,6 +423,8 @@ class Function:
:ivar is_internal:
(bool) if True, the function should not be accessible from outside
the module it is contained in
:ivar is_cold:
(bool) if True, the function should be considered rarely called
"""

def __init__(self, typ, name, arguments, loc=None):
@@ -431,6 +433,7 @@ def __init__(self, typ, name, arguments, loc=None):
self.next_name = 1
self.set_arguments(arguments)
self.is_internal = False
self.is_cold = False

def _remove_name(self, name):
self.names.remove(name)
@@ -922,6 +925,8 @@ class Call(Instruction):
iodelay expressions for values of arguments
:ivar static_target_function: (:class:`Function` or None)
statically resolved callee
:ivar is_cold: (bool)
the callee function is cold
"""

"""
@@ -938,6 +943,7 @@ def __init__(self, func, args, arg_exprs, name=""):
super().__init__([func] + args, func.type.ret, name)
self.arg_exprs = arg_exprs
self.static_target_function = None
self.is_cold = False

def copy(self, mapper):
self_copy = super().copy(mapper)
@@ -1186,6 +1192,8 @@ class Invoke(Terminator):
iodelay expressions for values of arguments
:ivar static_target_function: (:class:`Function` or None)
statically resolved callee
:ivar is_cold: (bool)
the callee function is cold
"""

"""
@@ -1206,6 +1214,7 @@ def __init__(self, func, args, arg_exprs, normal, exn, name=""):
super().__init__([func] + args + [normal, exn], func.type.ret, name)
self.arg_exprs = arg_exprs
self.static_target_function = None
self.is_cold = False

def copy(self, mapper):
self_copy = super().copy(mapper)
5 changes: 3 additions & 2 deletions artiq/compiler/targets.py
Original file line number Diff line number Diff line change
@@ -100,9 +100,10 @@ def optimize(self, llmodule):
llpassmgr.add_global_optimizer_pass()

# Now, actually optimize the code.
llpassmgr.add_function_inlining_pass(70)
llpassmgr.add_cfg_simplification_pass()
llpassmgr.add_function_inlining_pass(275)
llpassmgr.add_instruction_combining_pass()
llpassmgr.add_cfg_simplification_pass()
llpassmgr.add_dead_arg_elimination_pass()
llpassmgr.add_gvn_pass()
llpassmgr.add_global_dce_pass()

92 changes: 65 additions & 27 deletions artiq/compiler/transforms/artiq_ir_generator.py
Original file line number Diff line number Diff line change
@@ -302,7 +302,8 @@ def codegen_default(default_node):
for index, (arg_name, codegen_default) in enumerate(zip(typ.optargs, defaults)):
default = codegen_default()
value = self.append(ir.Builtin("unwrap_or", [optargs[index], default],
typ.optargs[arg_name]))
typ.optargs[arg_name],
name="DEF.{}".format(arg_name)))
self.append(ir.SetLocal(env, arg_name, value))

result = self.visit(node.body)
@@ -574,9 +575,7 @@ def raise_exn(self, exn=None, loc=None):
self.current_block = raise_proxy

if exn is not None:
if loc is None:
loc = self.current_loc

assert loc is not None
loc_file = ir.Constant(loc.source_buffer.name, builtins.TStr())
loc_line = ir.Constant(loc.line(), builtins.TInt32())
loc_column = ir.Constant(loc.column(), builtins.TInt32())
@@ -598,7 +597,7 @@ def raise_exn(self, exn=None, loc=None):
self.append(ir.Reraise())

def visit_Raise(self, node):
self.raise_exn(self.visit(node.exc))
self.raise_exn(self.visit(node.exc), loc=self.current_loc)

def visit_Try(self, node):
dispatcher = self.add_block("try.dispatch")
@@ -927,6 +926,55 @@ def visit_AttributeT(self, node):
else:
return self.append(ir.SetAttr(obj, node.attr, self.current_assign))

def _make_check(self, cond, exn_gen, loc=None, params=[]):
if loc is None:
loc = self.current_loc

try:
name = "check:{}:{}".format(loc.line(), loc.column())
args = [ir.EnvironmentArgument(self.current_env.type, "ARG.ENV")] + \
[ir.Argument(param.type, "ARG.{}".format(index))
for index, param in enumerate(params)]
typ = types.TFunction(OrderedDict([("arg{}".format(index), param.type)
for index, param in enumerate(params)]),
OrderedDict(),
builtins.TNone())
func = ir.Function(typ, ".".join(self.name + [name]), args, loc=loc)
func.is_internal = True
func.is_cold = True
self.functions.append(func)
old_func, self.current_function = self.current_function, func

entry = self.add_block("entry")
old_block, self.current_block = self.current_block, entry

old_final_branch, self.final_branch = self.final_branch, None
old_unwind, self.unwind_target = self.unwind_target, None
self.raise_exn(exn_gen(*args[1:]), loc=loc)
finally:
self.current_function = old_func
self.current_block = old_block
self.final_branch = old_final_branch
self.unwind_target = old_unwind

# cond: bool Value, condition
# exn_gen: lambda()->exn Value, exception if condition not true
cond_block = self.current_block

self.current_block = body_block = self.add_block("check.body")
closure = self.append(ir.Closure(func, ir.Constant(None, ir.TEnvironment("check", {}))))
if self.unwind_target is None:
insn = self.append(ir.Call(closure, params, {}))
else:
after_invoke = self.add_block("check.invoke")
insn = self.append(ir.Invoke(closure, params, {}, after_invoke, self.unwind_target))
self.current_block = after_invoke
insn.is_cold = True
self.append(ir.Unreachable())

self.current_block = tail_block = self.add_block("check.tail")
cond_block.append(ir.BranchIf(cond, tail_block, body_block))

def _map_index(self, length, index, one_past_the_end=False, loc=None):
lt_0 = self.append(ir.Compare(ast.Lt(loc=None),
index, ir.Constant(0, index.type)))
@@ -940,28 +988,16 @@ def _map_index(self, length, index, one_past_the_end=False, loc=None):
ir.Constant(False, builtins.TBool())))
head = self.current_block

self.current_block = out_of_bounds_block = self.add_block("index.outofbounds")
exn = self.alloc_exn(builtins.TException("IndexError"),
ir.Constant("index {0} out of bounds 0:{1}", builtins.TStr()),
index, length)
self.raise_exn(exn, loc=loc)

self.current_block = in_bounds_block = self.add_block("index.inbounds")
head.append(ir.BranchIf(in_bounds, in_bounds_block, out_of_bounds_block))
self._make_check(
in_bounds,
lambda index, length: self.alloc_exn(builtins.TException("IndexError"),
ir.Constant("index {0} out of bounds 0:{1}", builtins.TStr()),
index, length),
params=[index, length],
loc=loc)

return mapped_index

def _make_check(self, cond, exn_gen, loc=None, name="check"):
# cond: bool Value, condition
# exn_gen: lambda()->exn Value, exception if condition not true
cond_block = self.current_block

self.current_block = body_block = self.add_block("{}.body".format(name))
self.raise_exn(exn_gen(), loc=loc)

self.current_block = tail_block = self.add_block("{}.tail".format(name))
cond_block.append(ir.BranchIf(cond, tail_block, body_block))

def _make_loop(self, init, cond_gen, body_gen, name="loop"):
# init: 'iter Value, initial loop variable value
# cond_gen: lambda('iter Value)->bool Value, loop condition
@@ -1064,10 +1100,11 @@ def visit_SubscriptT(self, node):
name="slice.size"))
self._make_check(
self.append(ir.Compare(ast.LtE(loc=None), slice_size, length)),
lambda: self.alloc_exn(builtins.TException("ValueError"),
lambda slice_size, length: self.alloc_exn(builtins.TException("ValueError"),
ir.Constant("slice size {0} is larger than iterable length {1}",
builtins.TStr()),
slice_size, length),
params=[slice_size, length],
loc=node.slice.loc)

if self.current_assign is None:
@@ -1147,9 +1184,10 @@ def visit_ListT(self, node):
self._make_check(
self.append(ir.Compare(ast.Eq(loc=None), length,
ir.Constant(len(node.elts), self._size_type))),
lambda: self.alloc_exn(builtins.TException("ValueError"),
lambda length: self.alloc_exn(builtins.TException("ValueError"),
ir.Constant("list must be {0} elements long to decompose", builtins.TStr()),
length))
length),
params=[length])

for index, elt_node in enumerate(node.elts):
elt = self.append(ir.GetElem(self.current_assign,
45 changes: 34 additions & 11 deletions artiq/compiler/transforms/llvm_ir_generator.py
Original file line number Diff line number Diff line change
@@ -313,7 +313,10 @@ def llstr_of_str(self, value, name=None, linkage="private", unnamed_addr=True):
def llconst_of_const(self, const):
llty = self.llty_of_type(const.type)
if const.value is None:
return ll.Constant(llty, [])
if isinstance(llty, ll.PointerType):
return ll.Constant(llty, None)
else:
return ll.Constant(llty, [])
elif const.value is True:
return ll.Constant(llty, True)
elif const.value is False:
@@ -539,6 +542,10 @@ def process_function(self, func):

if func.is_internal:
self.llfunction.linkage = 'private'
if func.is_cold:
self.llfunction.calling_convention = 'coldcc'
self.llfunction.attributes.add('cold')
self.llfunction.attributes.add('noinline')

self.llfunction.attributes.add('uwtable')

@@ -1039,7 +1046,7 @@ def get_outer(llenv, env_ty):

def process_Closure(self, insn):
llenv = self.map(insn.environment())
llenv = self.llbuilder.bitcast(llenv, llptr, name="ptr.{}".format(llenv.name))
llenv = self.llbuilder.bitcast(llenv, llptr)
llfun = self.map(insn.target_function)
llvalue = ll.Constant(self.llty_of_type(insn.target_function.type), ll.Undefined)
llvalue = self.llbuilder.insert_value(llvalue, llenv, 0)
@@ -1244,15 +1251,17 @@ def process_Call(self, insn):
llstackptr = self.llbuilder.call(self.llbuiltin("llvm.stacksave"), [])

llresultslot = self.llbuilder.alloca(llfun.type.pointee.args[0].pointee)
self.llbuilder.call(llfun, [llresultslot] + llargs)
llcall = self.llbuilder.call(llfun, [llresultslot] + llargs)
llresult = self.llbuilder.load(llresultslot)

self.llbuilder.call(self.llbuiltin("llvm.stackrestore"), [llstackptr])

return llresult
else:
return self.llbuilder.call(llfun, llargs,
name=insn.name)
llcall = llresult = self.llbuilder.call(llfun, llargs, name=insn.name)

if insn.is_cold:
llcall.cconv = 'coldcc'

return llresult

def process_Invoke(self, insn):
llnormalblock = self.map(insn.normal_target())
@@ -1264,12 +1273,26 @@ def process_Invoke(self, insn):
llnormalblock, llunwindblock)
elif types.is_c_function(insn.target_function().type):
llfun, llargs = self._prepare_ffi_call(insn)
return self.llbuilder.invoke(llfun, llargs, llnormalblock, llunwindblock,
name=insn.name)
else:
llfun, llargs = self._prepare_closure_call(insn)
return self.llbuilder.invoke(llfun, llargs, llnormalblock, llunwindblock,
name=insn.name)

if self.has_sret(insn.target_function().type):
llstackptr = self.llbuilder.call(self.llbuiltin("llvm.stacksave"), [])

llresultslot = self.llbuilder.alloca(llfun.type.pointee.args[0].pointee)
llcall = self.llbuilder.invoke(llfun, llargs, llnormalblock, llunwindblock,
name=insn.name)
llresult = self.llbuilder.load(llresultslot)

self.llbuilder.call(self.llbuiltin("llvm.stackrestore"), [llstackptr])
else:
llcall = self.llbuilder.invoke(llfun, llargs, llnormalblock, llunwindblock,
name=insn.name)

if insn.is_cold:
llcall.cconv = 'coldcc'

return llcall

def _quote(self, value, typ, path):
value_id = id(value)