--- /dev/null
+#!/usr/bin/env python
+
+"""Measure how a sorting executable scales with N.
+
+The executable should support the following usage:
+ executable path/to/data/file
+Where the data file is of the format output by data.py.
+"""
+
+import subprocess
+import time
+import sys
+
+import numpy
+import matplotlib
+matplotlib.use('Agg') # select backend that doesn't require X Windows
+import pylab
+
+
+def generate_data(stream, N, ordered=False):
+ print >> sys.stderr, 'generate %d data points (ordered? %s)' % (
+ N, ordered)
+ stream.seek(0)
+ stream.truncate()
+ args = ['./data.py', str(N)]
+ if ordered:
+ args.insert(1, '--ordered')
+ q = subprocess.Popen(args, stdout=stream)
+ status = q.wait()
+ assert status == 0, status
+ stream.flush()
+
+def run_test(executable, data_filename):
+ print >> sys.stderr, 'run %s' % executable
+ start = time.time()
+ q = subprocess.Popen([executable, data_filename],
+ stdout=open('/dev/null', 'w'))
+ status = q.wait()
+ stop = time.time()
+ assert status == 0, status
+ return stop - start
+
+def run_tests(executable, data_file, ordered=False, repeats=10, max_time=1e2):
+ times = {}
+ prev_time = 0
+ N = 2
+ while prev_time < max_time:
+ print
+ ts = numpy.zeros((repeats,), dtype=numpy.double)
+ for i in range(repeats):
+ generate_data(data_file, N, ordered=ordered)
+ ts[i] = run_test(executable, data_file.name)
+ times[N] = ts
+ prev_time = ts.mean()
+ N *= 2
+ return times
+
+
+if __name__ == '__main__':
+ import optparse
+ import tempfile
+
+ p = optparse.OptionParser(
+ usage='%prog [options] executable', epilog=__doc__)
+ p.add_option('-r', '--repeats', dest='repeats', default=10, type='int',
+ help='Number of repeats to run at each N (%default).')
+ p.add_option('-m', '--max-time', dest='max_time', default=1e2,type='float',
+ help='Number of repeats to run at each N (%default).')
+ p.add_option('-p', '--plot', dest='plot', default=None,
+ help='Filename for a scaling plot (no plot is generated if this option is not set).')
+
+ options,args = p.parse_args()
+
+ executable = args[0]
+
+ data_file = tempfile.NamedTemporaryFile()
+ kwargs = {
+ 'executable': executable,
+ 'data_file': data_file,
+ 'repeats': options.repeats,
+ 'max_time': options.max_time,
+ }
+ try:
+ times = run_tests(ordered=False, **kwargs)
+ ordered_times = run_tests(ordered=True, **kwargs)
+ except:
+ data_file.close()
+ raise
+
+ columns = ['N',
+ 'ordered mean (s)', 'ordered std. dev. (s)',
+ 'random mean (s)', 'random std. dev. (s)']
+ plots = dict([(c, []) for c in columns])
+
+ print '# sort times for %s' % executable
+ print '# %d repeats' % options.repeats
+ print '#%s' % '\t'.join(columns)
+ invalid = numpy.array(numpy.inf)
+ for key in sorted(set(times.keys() + ordered_times.keys())):
+ om = ordered_times.get(key, invalid).mean()
+ os = ordered_times.get(key, invalid).std()
+ m = times.get(key, invalid).mean()
+ s = times.get(key, invalid).std()
+ print '\t'.join([str(x) for x in [key, om, os, m, s]])
+ for c,x in zip(columns, [key, om, os, m, s]):
+ plots[c].append(x)
+
+ if options.plot:
+ f = pylab.figure()
+ a = pylab.axes()
+ a.hold(True)
+ for c,color in zip(['ordered', 'random'], 'br'):
+ a.errorbar(
+ x=plots['N'],
+ y=plots['%s mean (s)' % c],
+ yerr=plots['%s std. dev. (s)' % c],
+ fmt='%so-' % color, label=c)
+ a.set_title('sort times for %s' % executable)
+ a.set_xscale('log')
+ a.set_yscale('log')
+ a.set_xlabel('N')
+ a.set_ylabel('t (s)')
+ a.legend(loc='best')
+ f.savefig(options.plot)