[Truffle] Moved benchmarks out to a separate repository.

jruby · Oct 25, 2016 · ec486f9 · ec486f9
1 parent 9bb416f
commit ec486f9
Showing 2 changed files with 37 additions and 550 deletions.
diff --git a/ci.hocon b/ci.hocon
@@ -236,88 +236,91 @@ weekly-bench-caps: {
   timelimit: "02:00:00"
 }
 
+bench: {
+  cmd: [mx, --dynamicimport, ruby-benchmarks, benchmark]
+  setup: [
+    [mx, sclone, --kind, git, "https://github.com/jruby/benchmark-interface.git", benchmark-interface],
+    [mx, sclone, --kind, git, --revision, mx-config, "https://github.com/graalvm/ruby-benchmarks.git", ruby-benchmarks]
+  ]
+}
+
 post-process-and-upload-results: [
   [tool/truffle/post-process-results-json.rb, bench-results.json, bench-results-processed.json],
   [cat, bench-results-processed.json],
   [bench-uploader.py, bench-results-processed.json]
 ]
 
-metrics: {
+metrics: ${bench} {
   run: [
-    [mx, benchmark, allocation]
+    ${bench.cmd} [allocation]
   ] ${post-process-and-upload-results} [
-    [mx, benchmark, minheap]
+    ${bench.cmd} [minheap]
   ] ${post-process-and-upload-results} [
-    [mx, benchmark, time]
+    ${bench.cmd} [time]
   ] ${post-process-and-upload-results}
   timelimit: "00:25:00"
 }
 
-compiler-metrics: {
+compiler-metrics: ${bench} {
   run: [
-    [mx, benchmark, "allocation:compile-mandelbrot"]
+    ${bench.cmd} ["allocation:compile-mandelbrot"]
   ] ${post-process-and-upload-results} [
-    [mx, benchmark, "minheap:compile-mandelbrot"]
+    ${bench.cmd} ["minheap:compile-mandelbrot"]
   ] ${post-process-and-upload-results} [
-    [mx, benchmark, "time:compile-mandelbrot"]
+    ${bench.cmd} ["time:compile-mandelbrot"]
   ] ${post-process-and-upload-results}
   timelimit: "00:50:00"
 }
 
-setup-benchmarks: [
-  [mx, sclone, --kind, git, "https://github.com/jruby/benchmark-interface.git", benchmark-interface],
-  [mx, sclone, --kind, git, "https://github.com/jruby/all-ruby-benchmarks.git", all-ruby-benchmarks]
-]
-
-classic-benchmarks: {
-  run: ${setup-benchmarks} [
-    [mx, benchmark, classic]
+classic-benchmarks: ${bench} {
+  run: [
+    ${bench.cmd} [classic]
   ] ${post-process-and-upload-results}
   timelimit: "00:35:00"
 }
 
-chunky-benchmarks: {
-  run: ${setup-benchmarks} [
-    [mx, benchmark, chunky]
+chunky-benchmarks: ${bench} {
+  run: [
+    ${bench.cmd} [chunky]
   ] ${post-process-and-upload-results}
   timelimit: "00:40:00"
 }
 
-psd-benchmarks: {
-  run: ${setup-benchmarks} [
-    [mx, benchmark, psd]
+psd-benchmarks: ${bench} {
+  run: [
+    ${bench.cmd} [psd]
   ] ${post-process-and-upload-results}
   timelimit: "01:15:00"
 }
 
-asciidoctor-benchmarks: {
-  run: ${setup-benchmarks} [
-    [mx, benchmark, asciidoctor]
+asciidoctor-benchmarks: ${bench} {
+  run: [
+    ${bench.cmd} [asciidoctor]
   ] ${post-process-and-upload-results}
   timelimit: "00:35:00"
 }
 
-other-benchmarks: {
-  run: ${setup-benchmarks} [
-    [mx, benchmark, image-demo]
+other-benchmarks: ${bench} {
+  run: [
+    ${bench.cmd} [image-demo]
   ] ${post-process-and-upload-results} [
-    [mx, benchmark, optcarrot]
+    ${bench.cmd} [optcarrot]
   ] ${post-process-and-upload-results} [
-    [mx, benchmark, savina]
+    ${bench.cmd} [savina]
   ] ${post-process-and-upload-results} [
-    [mx, benchmark, synthetic]
+    ${bench.cmd} [synthetic]
   ] ${post-process-and-upload-results} [
-    [mx, benchmark, micro]
+    ${bench.cmd} [micro]
   ] ${post-process-and-upload-results}
   timelimit: "00:40:00"
 }
 
-server-benchmarks: {
+server-benchmarks: ${bench} {
   packages: {
     "apache/ab": ">=2.3"
   }
 
-  run: ${setup-benchmarks} [
+  run: [
     [mx, benchmark, server]
   ] ${post-process-and-upload-results}
   timelimit: "00:20:00"

diff --git a/mx.jruby/mx_jruby.py b/mx.jruby/mx_jruby.py
@@ -305,519 +305,3 @@ def __init__(self, args):
         rubyDir = _suite.dir
         jt = join(rubyDir, 'tool', 'jt.rb')
         BackgroundServerTask.__init__(self, ['ruby', jt] + args)
-
-##############
-# BENCHMARKS #
-##############
-
-class RubyBenchmarkSuite(mx_benchmark.BenchmarkSuite):
-    def group(self):
-        return 'Graal'
-
-    def subgroup(self):
-        return 'jrubytruffle'
-
-    def vmArgs(self, bmSuiteArgs):
-        return mx_benchmark.splitArgs(bmSuiteArgs, bmSuiteArgs)[0]
-
-    def runArgs(self, bmSuiteArgs):
-        return mx_benchmark.splitArgs(bmSuiteArgs, bmSuiteArgs)[1]
-
-    def default_benchmarks(self):
-        return self.benchmarks()
-
-    def run(self, benchmarks, bmSuiteArgs):
-        def fixUpResult(result):
-            result.update({
-                'host-vm': os.environ.get('HOST_VM', 'host-vm'),
-                'host-vm-config': os.environ.get('HOST_VM_CONFIG', 'host-vm-config'),
-                'guest-vm': os.environ.get('GUEST_VM', 'guest-vm'),
-                'guest-vm-config': os.environ.get('GUEST_VM_CONFIG', 'guest-vm-config')
-            })
-            return result
-
-        return [fixUpResult(r) for b in benchmarks or self.default_benchmarks() for r in self.runBenchmark(b, bmSuiteArgs)]
-
-    def runBenchmark(self, benchmark, bmSuiteArgs):
-        raise NotImplementedError()
-
-metrics_benchmarks = {
-    'hello': ['-e', "puts 'hello'"],
-    'compile-mandelbrot': ['--graal', 'bench/truffle/metrics/mandelbrot.rb']
-}
-
-default_metrics_benchmarks = ['hello']
-
-class MetricsBenchmarkSuite(RubyBenchmarkSuite):
-    def benchmarks(self):
-        return metrics_benchmarks.keys()
-
-    def default_benchmarks(self):
-        return default_metrics_benchmarks
-
-class AllocationBenchmarkSuite(MetricsBenchmarkSuite):
-    def name(self):
-        return 'allocation'
-
-    def runBenchmark(self, benchmark, bmSuiteArgs):
-        out = mx.OutputCapture()
-
-        jt(['metrics', 'alloc', '--json'] + metrics_benchmarks[benchmark] + bmSuiteArgs, out=out)
-
-        data = json.loads(out.data)
-
-        return [{
-            'benchmark': benchmark,
-            'metric.name': 'memory',
-            'metric.value': sample,
-            'metric.unit': 'B',
-            'metric.better': 'lower',
-            'metric.iteration': n,
-            'extra.metric.human': '%d/%d %s' % (n, len(data['samples']), data['human'])
-        } for n, sample in enumerate(data['samples'])]
-
-class MinHeapBenchmarkSuite(MetricsBenchmarkSuite):
-    def name(self):
-        return 'minheap'
-
-    def runBenchmark(self, benchmark, bmSuiteArgs):
-        out = mx.OutputCapture()
-
-        jt(['metrics', 'minheap', '--json'] + metrics_benchmarks[benchmark] + bmSuiteArgs, out=out)
-
-        data = json.loads(out.data)
-
-        return [{
-            'benchmark': benchmark,
-            'metric.name': 'memory',
-            'metric.value': data['min'],
-            'metric.unit': 'MiB',
-            'metric.better': 'lower',
-            'extra.metric.human': data['human']
-        }]
-
-class TimeBenchmarkSuite(MetricsBenchmarkSuite):
-    def name(self):
-        return 'time'
-
-    def runBenchmark(self, benchmark, bmSuiteArgs):
-        out = mx.OutputCapture()
-
-        jt(['metrics', 'time', '--json'] + metrics_benchmarks[benchmark] + bmSuiteArgs, out=out)
-
-        data = json.loads(out.data)
-
-        return [{
-            'benchmark': benchmark,
-            'extra.metric.region': region,
-            'metric.name': 'time',
-            'metric.value': sample,
-            'metric.unit': 's',
-            'metric.better': 'lower',
-            'metric.iteration': n,
-            'extra.metric.human': '%d/%d %s' % (n, len(region_data['samples']), region_data['human'])
-        } for region, region_data in data.items() for n, sample in enumerate(region_data['samples'])]
-
-class AllBenchmarksBenchmarkSuite(RubyBenchmarkSuite):
-    def benchmarks(self):
-        raise NotImplementedError()
-
-    def name(self):
-        raise NotImplementedError()
-
-    def time(self):
-        raise NotImplementedError()
-
-    def directory(self):
-        return self.name()
-
-    def filterLines(self, lines):
-        data = []
-        for line in lines:
-            try:
-                data.append(float(line))
-            except ValueError:
-                log(line)
-        if len(data) % 2 != 0:
-            raise AssertionError("Odd number of values")
-        return data
-
-    def runBenchmark(self, benchmark, bmSuiteArgs):
-        arguments = ['benchmark']
-        if 'MX_NO_GRAAL' in os.environ:
-            arguments.extend(['--no-graal'])
-        arguments.extend(['--simple', '--elapsed'])
-        arguments.extend(['--time', str(self.time())])
-        if ':' in benchmark:
-            benchmark_file, benchmark_name = benchmark.split(':')
-            benchmark_names = [benchmark_name]
-        else:
-            benchmark_file = benchmark
-            benchmark_names = []
-        if '.rb' in benchmark_file:
-            arguments.extend([benchmark_file])
-        else:
-            arguments.extend([self.directory() + '/' + benchmark_file + '.rb'])
-        arguments.extend(benchmark_names)
-        arguments.extend(bmSuiteArgs)
-        out = mx.OutputCapture()
-
-        if jt(arguments, out=out, nonZeroIsFatal=False) == 0:
-            lines = out.data.split('\n')[1:-1]
-
-            if lines[-1] == 'optimised away':
-                data = self.filterLines(lines)
-                elapsed = [d for n, d in enumerate(data) if n % 2 == 0]
-                samples = [d for n, d in enumerate(data) if n % 2 == 1]
-
-                return [{
-                    'benchmark': benchmark,
-                    'metric.name': 'throughput',
-                    'metric.value': sample,
-                    'metric.unit': 'op/s',
-                    'metric.better': 'higher',
-                    'metric.iteration': n,
-                    'extra.metric.warmedup': 'false',
-                    'extra.metric.elapsed-num': e,
-                    'extra.metric.human': 'optimised away'
-                } for n, (e, sample) in enumerate(zip(elapsed, samples))] + [{
-                    'benchmark': benchmark,
-                    'metric.name': 'throughput',
-                    'metric.value': 2147483647, # arbitrary high value (--simple won't run more than this many ips)
-                    'metric.unit': 'op/s',
-                    'metric.better': 'higher',
-                    'metric.iteration': len(samples),
-                    'extra.metric.warmedup': 'true',
-                    'extra.metric.elapsed-num': elapsed[-1] + 2.0, # just put the data point beyond the last one a bit
-                    'extra.metric.human': 'optimised away',
-                    'extra.error': 'optimised away'
-                }]
-            else:
-                data = self.filterLines(lines)
-                elapsed = [d for n, d in enumerate(data) if n % 2 == 0]
-                samples = [d for n, d in enumerate(data) if n % 2 == 1]
-
-                if len(samples) > 1:
-                    warmed_up_samples = [sample for n, sample in enumerate(samples) if n / float(len(samples)) >= 0.5]
-                else:
-                    warmed_up_samples = samples
-
-                warmed_up_mean = sum(warmed_up_samples) / float(len(warmed_up_samples))
-
-                return [{
-                    'benchmark': benchmark,
-                    'metric.name': 'throughput',
-                    'metric.value': sample,
-                    'metric.unit': 'op/s',
-                    'metric.better': 'higher',
-                    'metric.iteration': n,
-                    'extra.metric.warmedup': 'true' if n / float(len(samples)) >= 0.5 else 'false',
-                    'extra.metric.elapsed-num': e,
-                    'extra.metric.human': '%d/%d %f op/s' % (n, len(samples), warmed_up_mean)
-                } for n, (e, sample) in enumerate(zip(elapsed, samples))]
-        else:
-            sys.stderr.write(out.data)
-
-            return [{
-                'benchmark': benchmark,
-                'metric.name': 'throughput',
-                'metric.value': 0,
-                'metric.unit': 'op/s',
-                'metric.better': 'higher',
-                'extra.metric.warmedup': 'true',
-                'extra.error': 'failed'
-            }]
-
-classic_benchmarks = [
-    'binary-trees',
-    'deltablue',
-    'fannkuch',
-    'mandelbrot',
-    'matrix-multiply',
-    'n-body',
-    'neural-net',
-    'pidigits',
-    'red-black',
-    'richards',
-    'spectral-norm'
-]
-
-classic_benchmark_time = 120
-
-class ClassicBenchmarkSuite(AllBenchmarksBenchmarkSuite):
-    def name(self):
-        return 'classic'
-
-    def directory(self):
-        return 'classic'
-
-    def benchmarks(self):
-        return classic_benchmarks
-
-    def time(self):
-        return classic_benchmark_time
-
-chunky_benchmarks = [
-    'chunky-color-r',
-    'chunky-color-g',
-    'chunky-color-b',
-    'chunky-color-a',
-    'chunky-color-compose-quick',
-    'chunky-canvas-resampling-bilinear',
-    'chunky-canvas-resampling-nearest-neighbor',
-    'chunky-canvas-resampling-steps-residues',
-    'chunky-canvas-resampling-steps',
-    'chunky-decode-png-image-pass',
-    'chunky-encode-png-image-pass-to-stream',
-    'chunky-operations-compose',
-    'chunky-operations-replace'
-]
-
-chunky_benchmark_time = 120
-
-class ChunkyBenchmarkSuite(AllBenchmarksBenchmarkSuite):
-    def name(self):
-        return 'chunky'
-
-    def directory(self):
-        return 'chunky_png'
-
-    def benchmarks(self):
-        return chunky_benchmarks
-
-    def time(self):
-        return chunky_benchmark_time
-
-psd_benchmarks = [
-    'psd-color-cmyk-to-rgb',
-    'psd-compose-color-burn',
-    'psd-compose-color-dodge',
-    'psd-compose-darken',
-    'psd-compose-difference',
-    'psd-compose-exclusion',
-    'psd-compose-hard-light',
-    'psd-compose-hard-mix',
-    'psd-compose-lighten',
-    'psd-compose-linear-burn',
-    'psd-compose-linear-dodge',
-    'psd-compose-linear-light',
-    'psd-compose-multiply',
-    'psd-compose-normal',
-    'psd-compose-overlay',
-    'psd-compose-pin-light',
-    'psd-compose-screen',
-    'psd-compose-soft-light',
-    'psd-compose-vivid-light',
-    'psd-imageformat-layerraw-parse-raw',
-    'psd-imageformat-rle-decode-rle-channel',
-    'psd-imagemode-cmyk-combine-cmyk-channel',
-    'psd-imagemode-greyscale-combine-greyscale-channel',
-    'psd-imagemode-rgb-combine-rgb-channel',
-    'psd-renderer-blender-compose',
-    'psd-renderer-clippingmask-apply',
-    'psd-renderer-mask-apply',
-    'psd-util-clamp',
-    'psd-util-pad2',
-    'psd-util-pad4'
-]
-
-psd_benchmark_time = 120
-
-class PSDBenchmarkSuite(AllBenchmarksBenchmarkSuite):
-    def name(self):
-        return 'psd'
-
-    def directory(self):
-        return 'psd.rb'
-
-    def benchmarks(self):
-        return psd_benchmarks
-
-    def time(self):
-        return psd_benchmark_time
-
-image_demo_benchmarks = [
-    'image-demo-conv',
-    'image-demo-sobel',
-]
-
-image_demo_benchmark_time = 120
-
-class ImageDemoBenchmarkSuite(AllBenchmarksBenchmarkSuite):
-    def name(self):
-        return 'image-demo'
-
-    def directory(self):
-        return 'image-demo'
-
-    def benchmarks(self):
-        return image_demo_benchmarks
-
-    def time(self):
-        return image_demo_benchmark_time
-
-asciidoctor_benchmarks = [
-    'asciidoctor:file-lines',
-    'asciidoctor:string-lines',
-    'asciidoctor:read-line',
-    'asciidoctor:restore-line',
-    'asciidoctor:load-string',
-    'asciidoctor:load-file',
-    'asciidoctor:quote-match',
-    'asciidoctor:quote-sub',
-    'asciidoctor:join-lines',
-    'asciidoctor:convert'
-]
-
-asciidoctor_benchmark_time = 120
-
-class AsciidoctorBenchmarkSuite(AllBenchmarksBenchmarkSuite):
-    def name(self):
-        return 'asciidoctor'
-
-    def benchmarks(self):
-        return asciidoctor_benchmarks
-
-    def time(self):
-        return asciidoctor_benchmark_time
-
-class OptcarrotBenchmarkSuite(AllBenchmarksBenchmarkSuite):
-    def name(self):
-        return 'optcarrot'
-
-    def directory(self):
-        return 'optcarrot'
-
-    def benchmarks(self):
-        return ['optcarrot']
-
-    def time(self):
-        return 200
-
-synthetic_benchmarks = [
-    'acid'
-]
-
-synthetic_benchmark_time = 120
-
-class SyntheticBenchmarkSuite(AllBenchmarksBenchmarkSuite):
-    def name(self):
-        return 'synthetic'
-
-    def benchmarks(self):
-        return synthetic_benchmarks
-
-    def time(self):
-        return synthetic_benchmark_time
-
-micro_benchmark_time = 30
-
-class MicroBenchmarkSuite(AllBenchmarksBenchmarkSuite):
-    def name(self):
-        return 'micro'
-
-    def benchmarks(self):
-        out = mx.OutputCapture()
-        jt(['where', 'repos', 'all-ruby-benchmarks'], out=out)
-        all_ruby_benchmarks = out.data.strip()
-        benchmarks = []
-        for root, dirs, files in os.walk(join(all_ruby_benchmarks, 'micro')):
-            for name in files:
-                if name.endswith('.rb'):
-                    benchmark_file = join(root, name)[len(all_ruby_benchmarks)+1:]
-                    out = mx.OutputCapture()
-                    jt(['benchmark', 'list', benchmark_file], out=out)
-                    benchmarks.extend([benchmark_file + ':' + b.strip() for b in out.data.split('\n') if len(b.strip()) > 0])
-        return benchmarks
-
-    def time(self):
-        return micro_benchmark_time
-
-savina_benchmarks = [
-    'savina-apsp',
-    'savina-radix-sort',
-    'savina-trapezoidal',
-]
-
-class SavinaBenchmarkSuite(AllBenchmarksBenchmarkSuite):
-    def name(self):
-        return 'savina'
-
-    def directory(self):
-        return 'parallel/savina'
-
-    def benchmarks(self):
-        return savina_benchmarks
-
-    def time(self):
-        return 120
-
-server_benchmarks = [
-    'tcp-server',
-    'webrick'
-]
-
-server_benchmark_time = 60 * 4 # Seems unstable otherwise
-
-class ServerBenchmarkSuite(RubyBenchmarkSuite):
-    def benchmarks(self):
-        return server_benchmarks
-
-    def name(self):
-        return 'server'
-
-    def runBenchmark(self, benchmark, bmSuiteArgs):
-        arguments = ['run', '--exec']
-        if 'MX_NO_GRAAL' not in os.environ:
-            arguments.extend(['--graal', '-J-G:+TruffleCompilationExceptionsAreFatal'])
-        arguments.extend(['all-ruby-benchmarks/servers/' + benchmark + '.rb'])
-
-        server = BackgroundJT(arguments)
-
-        with server:
-            time.sleep(10)
-            out = mx.OutputCapture()
-            if mx.run(
-                    ['ruby', 'all-ruby-benchmarks/servers/harness.rb', str(server_benchmark_time)],
-                    out=out,
-                    nonZeroIsFatal=False) == 0 and server.is_running():
-                samples = [float(s) for s in out.data.split('\n')[0:-1]]
-                print samples
-                half_samples = len(samples) / 2
-                used_samples = samples[len(samples)-half_samples-1:]
-                ips = sum(used_samples) / float(len(used_samples))
-
-                return [{
-                    'benchmark': benchmark,
-                    'metric.name': 'throughput',
-                    'metric.value': ips,
-                    'metric.unit': 'op/s',
-                    'metric.better': 'higher',
-                    'extra.metric.human': str(used_samples)
-                }]
-            else:
-                sys.stderr.write(out.data)
-
-                # TODO CS 24-Jun-16, how can we fail the wider suite?
-                return [{
-                    'benchmark': benchmark,
-                    'metric.name': 'throughput',
-                    'metric.value': 0,
-                    'metric.unit': 'op/s',
-                    'metric.better': 'higher',
-                    'extra.error': 'failed'
-                }]
-
-mx_benchmark.add_bm_suite(AllocationBenchmarkSuite())
-mx_benchmark.add_bm_suite(MinHeapBenchmarkSuite())
-mx_benchmark.add_bm_suite(TimeBenchmarkSuite())
-mx_benchmark.add_bm_suite(ClassicBenchmarkSuite())
-mx_benchmark.add_bm_suite(ChunkyBenchmarkSuite())
-mx_benchmark.add_bm_suite(PSDBenchmarkSuite())
-mx_benchmark.add_bm_suite(ImageDemoBenchmarkSuite())
-mx_benchmark.add_bm_suite(AsciidoctorBenchmarkSuite())
-mx_benchmark.add_bm_suite(OptcarrotBenchmarkSuite())
-mx_benchmark.add_bm_suite(SyntheticBenchmarkSuite())
-mx_benchmark.add_bm_suite(MicroBenchmarkSuite())
-mx_benchmark.add_bm_suite(SavinaBenchmarkSuite())
-mx_benchmark.add_bm_suite(ServerBenchmarkSuite())