From edbdb07b1553471d4ab0247679cb34c138f3e44a Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Sun, 24 Oct 2010 15:52:06 -0400 Subject: [PATCH] Added src/sorting/scaling.py to test sort algorithm scaling vs. N. --- src/sorting/Makefile | 11 +++- src/sorting/README | 8 +++ src/sorting/data.py | 1 + src/sorting/scaling.py | 124 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 141 insertions(+), 3 deletions(-) create mode 100755 src/sorting/scaling.py diff --git a/src/sorting/Makefile b/src/sorting/Makefile index 4d84d0f..d41127f 100644 --- a/src/sorting/Makefile +++ b/src/sorting/Makefile @@ -14,13 +14,18 @@ EXECS = bubble quicksort # Top level targets -all: $(EXECS) $(DATA) +all : $(EXECS) $(DATA) -clean: - $(RM) -f *.o $(EXECS) $(DATA) +clean : + $(RM) -f *.o $(EXECS) $(EXECS:%=%-scaling.*) $(DATA) + +scaling : $(EXECS:%=%-scaling.dat) # Lower level rules +%-scaling.dat : % scaling.py data.py + ./scaling.py --max-time 1 --repeats 3 --plot $(@:%.dat=%.png) ./$< > $@ + data : data.py ./$< $(DATA_SIZE) > $@ diff --git a/src/sorting/README b/src/sorting/README index e000dae..d69b5d5 100644 --- a/src/sorting/README +++ b/src/sorting/README @@ -53,3 +53,11 @@ On ordered data bubble does much better $ time ./quicksort ordered-data > /dev/null quicksort takes 0.048 s and bubble takes 0.046 s. + +You can generate scaling graphs for all executables built by the +Makefile with + + $ make scaling + +which generates `*-scaling.dat` and `*-scaling.png` for each +executable. diff --git a/src/sorting/data.py b/src/sorting/data.py index b3aa13c..5382df4 100755 --- a/src/sorting/data.py +++ b/src/sorting/data.py @@ -16,6 +16,7 @@ import optparse import random import sys + p = optparse.OptionParser(usage='%prog [options] N_POINTS', epilog=__doc__) p.add_option('-o', '--ordered', dest='shuffle', default=True, action='store_false', help="Don't shuffle the output data.") diff --git a/src/sorting/scaling.py b/src/sorting/scaling.py new file mode 100755 index 0000000..814836e --- /dev/null +++ b/src/sorting/scaling.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python + +"""Measure how a sorting executable scales with N. + +The executable should support the following usage: + executable path/to/data/file +Where the data file is of the format output by data.py. +""" + +import subprocess +import time +import sys + +import numpy +import matplotlib +matplotlib.use('Agg') # select backend that doesn't require X Windows +import pylab + + +def generate_data(stream, N, ordered=False): + print >> sys.stderr, 'generate %d data points (ordered? %s)' % ( + N, ordered) + stream.seek(0) + stream.truncate() + args = ['./data.py', str(N)] + if ordered: + args.insert(1, '--ordered') + q = subprocess.Popen(args, stdout=stream) + status = q.wait() + assert status == 0, status + stream.flush() + +def run_test(executable, data_filename): + print >> sys.stderr, 'run %s' % executable + start = time.time() + q = subprocess.Popen([executable, data_filename], + stdout=open('/dev/null', 'w')) + status = q.wait() + stop = time.time() + assert status == 0, status + return stop - start + +def run_tests(executable, data_file, ordered=False, repeats=10, max_time=1e2): + times = {} + prev_time = 0 + N = 2 + while prev_time < max_time: + print + ts = numpy.zeros((repeats,), dtype=numpy.double) + for i in range(repeats): + generate_data(data_file, N, ordered=ordered) + ts[i] = run_test(executable, data_file.name) + times[N] = ts + prev_time = ts.mean() + N *= 2 + return times + + +if __name__ == '__main__': + import optparse + import tempfile + + p = optparse.OptionParser( + usage='%prog [options] executable', epilog=__doc__) + p.add_option('-r', '--repeats', dest='repeats', default=10, type='int', + help='Number of repeats to run at each N (%default).') + p.add_option('-m', '--max-time', dest='max_time', default=1e2,type='float', + help='Number of repeats to run at each N (%default).') + p.add_option('-p', '--plot', dest='plot', default=None, + help='Filename for a scaling plot (no plot is generated if this option is not set).') + + options,args = p.parse_args() + + executable = args[0] + + data_file = tempfile.NamedTemporaryFile() + kwargs = { + 'executable': executable, + 'data_file': data_file, + 'repeats': options.repeats, + 'max_time': options.max_time, + } + try: + times = run_tests(ordered=False, **kwargs) + ordered_times = run_tests(ordered=True, **kwargs) + except: + data_file.close() + raise + + columns = ['N', + 'ordered mean (s)', 'ordered std. dev. (s)', + 'random mean (s)', 'random std. dev. (s)'] + plots = dict([(c, []) for c in columns]) + + print '# sort times for %s' % executable + print '# %d repeats' % options.repeats + print '#%s' % '\t'.join(columns) + invalid = numpy.array(numpy.inf) + for key in sorted(set(times.keys() + ordered_times.keys())): + om = ordered_times.get(key, invalid).mean() + os = ordered_times.get(key, invalid).std() + m = times.get(key, invalid).mean() + s = times.get(key, invalid).std() + print '\t'.join([str(x) for x in [key, om, os, m, s]]) + for c,x in zip(columns, [key, om, os, m, s]): + plots[c].append(x) + + if options.plot: + f = pylab.figure() + a = pylab.axes() + a.hold(True) + for c,color in zip(['ordered', 'random'], 'br'): + a.errorbar( + x=plots['N'], + y=plots['%s mean (s)' % c], + yerr=plots['%s std. dev. (s)' % c], + fmt='%so-' % color, label=c) + a.set_title('sort times for %s' % executable) + a.set_xscale('log') + a.set_yscale('log') + a.set_xlabel('N') + a.set_ylabel('t (s)') + a.legend(loc='best') + f.savefig(options.plot) -- 2.26.2