#!/bin/sh """:" . exec python "$0" "$@" """ from __future__ import print_function import argparse import contextlib import copy import math import os import re import subprocess import sys __doc__ = """\ Generate form.set suited for the local machine. Example ------- $ formset.py -o $ tform `formset.py -f` calcdia.frm $ minos `formset.py -m` minos.file Python versions --------------- 2.7, 3.2, 3.3, 3.4, 3.5 """ if 'check_output' not in dir(subprocess): # For old systems where Python 2.6 + argparse available. def check_output(*popenargs, **kwargs): """Run a command.""" if 'stdout' in kwargs: # pragma: no cover raise ValueError('stdout argument not allowed, ' 'it will be overridden.') process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs) output, _ = process.communicate() retcode = process.poll() if retcode: cmd = kwargs.get('args') if cmd is None: cmd = popenargs[0] # `output` keyword is not available in 2.6. raise subprocess.CalledProcessError(retcode, cmd) return output subprocess.check_output = check_output @contextlib.contextmanager def open_w_or_stdout(filename=None): """Context manager for a file or stdout.""" if filename: # See https://stackoverflow.com/a/2333979. tmpfilename = '{0}.tmp{1}'.format(filename, os.getpid()) f = open(tmpfilename, 'w') try: yield f finally: f.flush() os.fsync(f.fileno()) f.close() os.rename(tmpfilename, filename) else: yield sys.stdout def round_down(x, n): """Round down `x` to nearest `n`.""" return x // n * n def round_up(x, n): """Round up `x` to nearest `n`.""" return (x + (n - 1)) // n * n def metric_prefix(s): """Parse a metric prefix as a number.""" s = s.lower() if s == '': return 1 if s == 'k': return 1000 if s == 'm': return 1000**2 if s == 'g': return 1000**3 if s == 't': return 1000**4 return None def parse_number(s): """Parse a string as a number with a possible metric prefix.""" scale = 1 m = re.match(r'(.*)([kmgtKMGT])$', s) if m: s = m.group(1) scale = metric_prefix(m.group(2)) # May raise ValueError. return int(float(s) * scale) def round_human_readable(x, up=False, tostring=True): """Round off `x` within a human readable form.""" round_off = round_up if up else round_down # Take 3 significant figures. n = 10**(int(math.floor(math.log10(x))) - 2) x = round_off(x, n) # Find a good suffix which doesn't change the value. xx = round_off(x, 1000**4) if xx == x: return '{0}T'.format(xx // 1000**4) if tostring else xx xx = round_off(x, 1000**3) if xx == x: return '{0}G'.format(xx // 1000**3) if tostring else xx xx = round_off(x, 1000**2) if xx == x: return '{0}M'.format(xx // 1000**2) if tostring else xx xx = round_off(x, 1000) if xx == x: return '{0}K'.format(xx // 1000) if tostring else xx return x class classproperty(property): # noqa """Decorator to make a property of a class.""" def __get__(self, cls, owner): """Getter.""" return classmethod(self.fget).__get__(None, owner)() class SystemInfo(object): """System information.""" _cpu_info = None _mem_info = None verbose = False @classproperty def number_of_nodes(cls): # noqa """Return the number of nodes.""" info = cls._get_cpu_info() if 'NUMA node(s)' in info: return int(info['NUMA node(s)']) else: return 1 @classproperty def number_of_cpus(cls): # noqa """Return the number of cpus.""" info = cls._get_cpu_info() return int(info['CPU(s)']) @classproperty def number_of_physical_cores(cls): # noqa """Return the number of physical cores.""" info = cls._get_cpu_info() return int(info['Socket(s)']) * int(info['Core(s) per socket']) @classproperty def total_memory(cls): # noqa """Return the total physical memory in bytes.""" info = cls._get_mem_info() return int(info['Mem'][0]) @classmethod def _get_cpu_info(cls): if cls._cpu_info is None: if cls.verbose: sys.stderr.write('running lscpu...\n') info = subprocess.check_output(['lscpu']) info = info.decode('utf-8') info = info.strip().split('\n') info = [[ss.strip() for ss in s.split(':')] for s in info] info = dict(info) cls._cpu_info = info return cls._cpu_info @classmethod def _get_mem_info(cls): if cls._mem_info is None: if cls.verbose: sys.stderr.write('running free...\n') info = subprocess.check_output(['free', '-b']) info = info.decode('utf-8') info = info.strip().split('\n') info = [[ss.strip() for ss in s.split(':')] for s in info] info = [s for s in info if len(s) == 2] info = [[s[0], s[1].split()] for s in info] info = dict(info) cls._mem_info = info return cls._mem_info class Setup(object): """Setup parameters.""" def __init__(self): """Construct a set of setup parameters.""" self.compresssize = 90000 self.filepatches = 256 self.hidesize = 0 self.largepatches = 256 self.largesize = 50000000 self.maxtermsize = 40000 # 64-bit self.numstorecaches = 4 self.scratchsize = 50000000 self.sizestorecache = 32768 self.smallextension = 20000000 self.smallsize = 10000000 self.sortiosize = 100000 self.termsinsmall = 100000 self.threadbucketsize = 500 self.threads = -1 # form self.threadscratchoutsize = 2500000 self.threadscratchsize = 100000 self.workspace = 40000000 # 64-bit self.bracketindexsize = 200000 self.constindex = 128 self.continuationlines = 15 self.functionlevels = 30 self.maxnumbersize = 200 self.maxwildcards = 100 self.parentheses = 100 self.processbucketsize = 1000 self.subfilepatches = 64 self.sublargepatches = 64 self.sublargesize = 4000000 self.subsmallextension = 800000 self.subsmallsize = 500000 self.subsortiosize = 32768 self.subtermsinsmall = 10000 # 64-bit self._ptrsize = 8 self._possize = 8 self._wordsize = 4 def items(self): """Return pairs of parameters and values.""" items = [(k, v) for (k, v) in self.__dict__.items() if k[0] != '_'] items.sort() return tuple(items) def __str__(self): """Return the string representaiton.""" mem = self.calc() params = ['{0}: {1}'.format(k, v) for (k, v) in self.items()] return ''.format(mem, ', '.join(params)) def copy(self): """Return a shallow copy.""" return copy.copy(self) def calc(self): """Return an estimation of memory usage.""" self.maxtermsize = max(self.maxtermsize, 200) self.compresssize = max(self.compresssize, 2 * self.maxtermsize * self._wordsize) self.sortiosize = max(self.sortiosize, self.maxtermsize * self._wordsize) # The strange factor WordSize**2 is used in the FORM source... self.scratchsize = max(self.scratchsize, 4 * self.maxtermsize * self._wordsize**2) if self.hidesize > 0: self.hidesize = max(self.hidesize, 4 * self.maxtermsize * self._wordsize**2) self.threadscratchsize = max(self.threadscratchsize, 4 * self.maxtermsize * self._wordsize**2) self.threadscratchoutsize = max(self.threadscratchoutsize, 4 * self.maxtermsize * self._wordsize**2) # constraints in RecalcSetups() self.filepatches = max(self.filepatches, self.threads) self.termsinsmall = round_up(self.termsinsmall, 16) numberofblocksinsort = 10 minimumnumberofterms = 10 n = numberofblocksinsort * minimumnumberofterms if self.threads >= 0: minbufsize = (self.threads * (1 + n) * self.maxtermsize * self._wordsize) if self.largesize + self.smallextension < minbufsize: self.largesize = minbufsize - self.smallextension # constraints in AllocSort() self.filepatches = max(self.filepatches, 4) self.smallsize = max(self.smallsize, 16 * self.maxtermsize * self._wordsize) self.smallextension = max(self.smallextension, self.smallsize * 3 // 2) if self.largesize > 0: self.largesize = max(self.largesize, 2 * self.smallsize) compinc = 2 minbufsize = self.filepatches * (self.sortiosize + (compinc + 2 * self.maxtermsize) * self._wordsize) if self.largesize + self.smallextension < minbufsize: if self.largesize == 0: self.smallextension = minbufsize else: self.largesize = minbufsize - self.smallextension iotry = (((self.largesize + self.smallextension) // self.filepatches // self._wordsize) - 2 * self.maxtermsize - compinc) # in words self.sortiosize = max(self.sortiosize, iotry) # bytes vs. words?? # Compute the memory usage. mem = 0 mem += (self.scratchsize * 2 + (self.hidesize if self.hidesize > 0 else self.scratchsize)) mem += self.workspace * self._wordsize mem += (self.compresssize + 10) * self._wordsize mem += (self.largesize + self.smallextension + 3 * self.termsinsmall * self._ptrsize + self.sortiosize) storecachesize = self._possize * 2 * self._ptrsize + self._wordsize # ignore the padding storecachesize += self.sizestorecache mem += storecachesize * self.numstorecaches if self.threads >= 1: mem += ((self.threadscratchoutsize + self.threadscratchsize * 2) * self.threads) mem += self.workspace * self._wordsize * self.threads mem += (self.compresssize + 10) * self._wordsize * self.threads mem += self._thread_alloc_sort(self.largesize // self.threads, self.smallsize // self.threads, self.smallextension // self.threads, self.termsinsmall, self.largepatches, self.filepatches // self.threads, self.sortiosize) * self.threads mem += storecachesize * self.numstorecaches * self.threads sizethreadbuckets = ((self.threadbucketsize + 1) * self.maxtermsize + 2) * self._wordsize if self.threadbucketsize >= 250: sizethreadbuckets //= 4 elif self.threadbucketsize >= 90: sizethreadbuckets //= 3 elif self.threadbucketsize >= 40: sizethreadbuckets //= 2 sizethreadbuckets //= self._wordsize mem += ((2 * sizethreadbuckets * self._wordsize + (self.threadbucketsize + 1) * self._possize) * 2 * self.threads) if self.threads >= 3: mem += ((self.workspace * self._wordsize // 8 + 2 * self.maxtermsize * self._wordsize) * (self.threads - 2)) return mem def _thread_alloc_sort(self, largesize, smallsize, smallextension, termsinsmall, largepatches, filepatches, sortiosize): filepatches = max(filepatches, 4) smallsize = max(smallsize, 16 * self.maxtermsize * self._wordsize) smallextension = max(smallextension, smallsize * 3 // 2) if largesize > 0: largesize = max(largesize, 2 * smallsize) compinc = 2 minbufsize = filepatches * (sortiosize + (compinc + 2 * self.maxtermsize) * self._wordsize) if largesize + smallextension < minbufsize: if largesize == 0: smallextension = minbufsize else: largesize = minbufsize - smallextension iotry = (((largesize + smallextension) // filepatches // self._wordsize) - 2 * self.maxtermsize - compinc) # in words sortiosize = max(sortiosize, iotry) # bytes vs. words?? return (largesize + smallextension + 3 * termsinsmall * self._ptrsize + sortiosize) def main(): """Entry point.""" # Parse the command line arguments. parser = argparse.ArgumentParser( usage=('%(prog)s [options] [--] ' '[par=val].. [par+=int].. [par*=float]..'), epilog=('On non-Linux systems, the number of physical CPUs and memory ' 'available on the machine may be not automatically detected. ' 'In such a case, one cannot use the default parameters ' 'depending on those values and needs to explicitly specify ' '--ncpus, --total-cpus and --total-memory.'), add_help=False ) parser.add_argument('-h', '--help', action='store_const', const=True, help='show this help message and exit') parser.add_argument('-o', '--output', action='store', nargs='?', const='form.set', help=('output to FILE (default: no (stdout), ' 'FILE=form.set)'), metavar='FILE') parser.add_argument('-f', '--form', action='store_const', const=True, help='print tform options (e.g., -w4) and exit') parser.add_argument('-m', '--minos', action='store_const', const=True, help='print minos options (e.g., -m2x4) and exit') parser.add_argument('-u', '--usage', action='store_const', const=True, help='print expected initial memory usage and exit') parser.add_argument('-H', '--human-readable', action='store_const', const=True, help=('adjust to human-readable numbers ' '(e.g., 1K, 23M, 456G)')) parser.add_argument('-1', '--one', action='store_const', const=-1, dest='ncpus', help='use cpus in a node on the machine (default)') parser.add_argument('--full', action='store_const', const=-99999, dest='ncpus', help='use cpus in all nodes on the machine') parser.add_argument('-n', '--ncpus', action='store', type=int, help='use N cpus', metavar='N') parser.add_argument('-p', '--percentage', action='store', default=75.0, type=float, help=('percentage of initial memory usage ' '(default: 75.0)'), metavar='N') parser.add_argument('--total-cpus', action='store', type=int, help='specify the total cpus on the machine', metavar='N') parser.add_argument('--total-memory', action='store', help='specify the total memory on the machine', metavar='N') parser.add_argument('-v', '--verbose', action='store_const', const=True, help='verbose output') parser.add_argument('args', nargs='*', help=argparse.SUPPRESS) args = parser.parse_args() pars = {} # NOTE: when all of `--ncpus`, `--total-cpus` and `--total-memory` are # specified, we don't need to access the system information. if args.verbose: SystemInfo.verbose = True if args.total_cpus: total_cpus = args.total_cpus else: total_cpus = SystemInfo.number_of_physical_cores if args.total_memory: try: total_memory = parse_number(args.total_memory) except ValueError: parser.error('non-integer value for total memory: {0}'.format( args.total_memory)) else: total_memory = SystemInfo.total_memory # Help message. if args.help: parser.print_help() exit(0) # Number of CPUs. if args.ncpus is not None: ncpus = args.ncpus else: # Use 1 node for each job by default. ncpus = -1 if ncpus < 0: # Use (-ncpus) nodes. ncpus = -ncpus * (total_cpus // SystemInfo.number_of_nodes) ncpus = max(ncpus, 1) ncpus = min(ncpus, total_cpus) sp = Setup() sp.threads = ncpus if ncpus >= 2 else -1 for a in args.args: m = re.match(r'([a-zA-Z][a-zA-Z0-9]*)([+*]?)=(.*)', a) if m: par = m.group(1).lower() ope = m.group(2) val = m.group(3) if par in sp.__dict__: # Known parameter. if ope == '' or ope == '+': # We have par=val or par+=int. try: val = parse_number(val) except ValueError: parser.error( 'non-integer value for parameter: {0}'.format(a)) if ope == '': setattr(sp, par, val) else: setattr(sp, par, getattr(sp, par) + val) continue else: # We have par*=float. try: val = float(val) except ValueError: parser.error( 'non-float value for parameter: {0}'.format(a)) setattr(sp, par, int(getattr(sp, par) * val)) continue elif ope == '': # Unknown parameter given by par=val. Add it to the dictionary. pars[par] = val continue parser.error('unrecognized argument: {0}'.format(a)) # Our resource. cpus = max(sp.threads, 1) memory = int(total_memory * args.percentage / 100.0 * cpus / total_cpus) # For --form option. if args.form: print('-w{0}'.format(cpus)) exit() # For --minos option. if args.minos: print('-m{0}x{1}'.format(total_cpus // cpus, cpus)) exit() # Presumably increasing MaxTermSize requires increasing WorkSpace, too. sp.workspace = max(sp.workspace, sp.maxtermsize * 250) # Optimize the memory usage by bisection. max_iteration = 50 sp0 = sp.copy() def f(x): # Hopefully monochrome increasing. sp = sp0.copy() sp.smallsize = int(sp.smallsize * x) sp.largesize = int(sp.largesize * x) sp.termsinsmall = int(sp.termsinsmall * x) sp.scratchsize = int(sp.scratchsize * x) m = sp.calc() if args.human_readable: m = round_human_readable(m, True, False) return (- (memory - m), sp) x1 = 1.0 x2 = None y1 = f(x1)[0] y2 = None for _i in range(max_iteration): if x2 is None: if y1 < 0: x = x1 * 2.0 y = f(x)[0] if y > 0: x2 = x y2 = y else: x1 = x y1 = y else: x = x1 * 0.5 y = f(x)[0] if y < 0: x2 = x1 y2 = y1 x1 = x y1 = y else: x1 = x y1 = y else: x = (x1 + x2) * 0.5 y = f(x)[0] if y < 0: x1 = x y1 = y else: x2 = x y2 = y if x2 is not None: assert x1 < x2 and y1 < y2 if x2 is None: if x1 < 1.0e-12: x1 = 0 parser.exit(('failed to find parameters: memory({0}) = {1} ' 'bytes shortage').format(x1, y1)) # For --usage option. if args.usage: m = f(x1)[1].calc() if args.human_readable: m = round_human_readable(m, True) print(m) exit() # Output. with open_w_or_stdout(args.output) as fi: def round_memory(m): return (round_human_readable(m, False) if args.human_readable else m) print(('# {0}{1} (cpu: {2}, mem: {3}; ' 'total cpu: {4}, total mem: {5}; {6}x{7})').format( parser.prog, (' ' if len(sys.argv) >= 2 else '') + ' '.join(sys.argv[1:]), cpus, round_memory(memory), total_cpus, round_memory(total_memory), total_cpus // cpus, cpus, ), file=fi) sp = f(x1)[1] sp0 = Setup() # default value dic0 = dict(sp0.items()) for k, v in sp.items(): if k == 'threads': # 'threads N' doesn't work, must be given by tform option -wN. continue if v == dic0[k]: # Don't write when same as the default value. continue if args.human_readable: v = round_human_readable(v, False) print('{0} {1}'.format(k, v), file=fi) for k, v in pars.items(): print('{0} {1}'.format(k, v), file=fi) if __name__ == '__main__': main()