From e8bc8e94bd36ab5bf59f0f9a918bd452c9549c0e Mon Sep 17 00:00:00 2001 From: Paul Brossier Date: Wed, 10 Apr 2013 11:45:59 -0500 Subject: [PATCH] python/tests/eval_pitch: add evaluation script against TONAS database --- python/tests/eval_pitch | 143 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100755 python/tests/eval_pitch diff --git a/python/tests/eval_pitch b/python/tests/eval_pitch new file mode 100755 index 00000000..226b927d --- /dev/null +++ b/python/tests/eval_pitch @@ -0,0 +1,143 @@ +#! /usr/bin/env python + +""" +Script to evaluate pitch algorithms against TONAS database. + +See http://mtg.upf.edu/download/datasets/tonas/ + +Example run: + + $ ./eval_pitch /path/to/TONAS/*/*.wav + OK: 94.74% vx r: 96.87% vx f: 15.83% f0: 96.02% %12: 0.50% /path/to/TONAS/Deblas/01-D_AMairena.wav + OK: 89.89% vx r: 93.21% vx f: 13.81% f0: 90.74% %12: 1.51% /path/to/TONAS/Deblas/02-D_ChanoLobato.wav + OK: 96.02% vx r: 96.73% vx f: 10.91% f0: 96.42% %12: 0.00% /path/to/TONAS/Deblas/03-D_Chocolate.wav + [...] + OK: 82.35% vx r: 95.52% vx f: 67.09% f0: 89.80% %12: 0.95% /path/to/TONAS/Martinetes2/80-M2_Rancapinos.wav + OK: 61.97% vx r: 85.71% vx f: 22.03% f0: 55.63% %12: 8.57% /path/to/TONAS/Martinetes2/81-M2_SDonday.wav + OK: 75.26% vx r: 91.63% vx f: 27.27% f0: 75.99% %12: 5.05% /path/to/TONAS/Martinetes2/82-M2_TiaAnicalaPiriniaca.wav + OK: 82.77% vx r: 92.74% vx f: 38.27% f0: 87.33% %12: 1.67% 69 files, total_length: 1177.69s, total runtime: 25.91s + + +""" + +import sys +import time +import os.path +import numpy +from utils import array_from_text_file, array_from_yaml_file +from aubio import source, pitch, freqtomidi + +start = time.time() + +freq_tol = .50 # more or less half a tone + +methods = ["default", "yinfft", "mcomb", "yin", "fcomb", "schmitt", "specacf"] +method = methods[0] + +downsample = 1 +tolerance = 0.35 +silence = -40. +skip = 1 +if method in ["yinfft", "default"]: + downsample = 1 + tolerance = 0.45 +elif method == "mcomb": + downsample = 4 +elif method == "yin": + downsample = 4 + tolerance = 0.2 + +samplerate = 44100 / downsample +hop_s = 512 / downsample +win_s = 2048 / downsample + +def get_pitches (filename, samplerate = samplerate, win_s = win_s, hop_s = hop_s): + s = source(filename, samplerate, hop_s) + samplerate = s.samplerate + + p = pitch(method, win_s, hop_s, samplerate) + p.set_unit("freq") + p.set_tolerance(tolerance) + p.set_silence(silence) + + # list of pitches, in samples + pitches = [] + + # total number of frames read + total_frames = 0 + while True: + samples, read = s() + new_pitch = p(samples)[0] + pitches.append([total_frames/float(samplerate), new_pitch]) + total_frames += read + if read < hop_s: break + return numpy.array(pitches) + +total_correct_f0, total_correct_sil, total_missed, total_incorrect, total_fp, total_total = 0, 0, 0, 0, 0, 0 +total_correct_chroma, total_voiced = 0, 0 +for source_file in sys.argv[1:]: + ground_truth_file = source_file.replace('.wav', '.f0.Corrected') + if os.path.isfile(ground_truth_file): + ground_truth = array_from_text_file(ground_truth_file)[:,[0,2]] + experiment = get_pitches(source_file) + # check that we have the same length, more or less one frame + assert abs(len(ground_truth) - len(experiment)) < 2 + # align experiment by skipping first results + experiment = experiment[skip:] + experiment[:,0] -= experiment[0,0] + # trim to shortest list + maxlen = min(len(ground_truth), len(experiment)) + experiment = experiment[:maxlen] + ground_truth = ground_truth[:maxlen] + # get difference matrix + diffmat = abs(experiment - ground_truth) + # make sure we got the timing right + assert max(diffmat[:,0]) < 10e-4, source_file + truth_pitches = freqtomidi(ground_truth[:,1]) + exper_pitches = freqtomidi(experiment[:,1]) + + total = len(truth_pitches) + unvoiced = len(truth_pitches[truth_pitches == 0]) + voiced = total - unvoiced + correct_sil, fp, missed, correct_f0, correct_chroma, incorrect = 0, 0, 0, 0, 0, 0 + for a, b in zip(truth_pitches, exper_pitches): + if a == 0 and b == 0: + correct_sil += 1 + elif a == 0 and b != 0: + fp += 1 + elif a != 0 and b == 0: + missed += 1 + elif abs(b - a) < freq_tol: + correct_f0 += 1 + elif abs(b - a) % 12. < freq_tol: + correct_chroma += 1 + else: + incorrect += 1 + assert correct_sil + fp + missed + correct_f0 + correct_chroma + incorrect == total + assert unvoiced == correct_sil + fp + assert voiced == missed + correct_f0 + correct_chroma + incorrect + print "OK: %6s%%" % ("%.2f" % (100. * (correct_f0 + correct_sil) / total )), + print "vx r: %6s%%" % ("%.2f" % (100. - 100. * missed / voiced)), + print "vx f: %6s%%" % ("%.2f" % (100. * fp / unvoiced)), + print "f0: %6s%%" % ("%.2f" % (100. * correct_f0 / voiced)), + print "%%12: %6s%%" % ("%.2f" % (100. * correct_chroma / voiced)), + print source_file + total_correct_sil += correct_sil + total_correct_f0 += correct_f0 + total_correct_chroma += correct_chroma + total_missed += missed + total_incorrect += incorrect + total_fp += fp + total_voiced += voiced + total_total += total + else: + print "ERR", "could not find ground_truth_file", ground_truth_file + +print "OK: %6s%%" % ("%.2f" % (100. * (total_correct_f0 + total_correct_sil) / total_total )), +print "vx r: %6s%%" % ("%.2f" % (100. - 100. * total_missed / total_voiced)), +print "vx f: %6s%%" % ("%.2f" % (100. * (total_fp) / (total_correct_sil + total_fp))), +print "f0: %6s%%" % ("%.2f" % (100. * total_correct_f0 / total_voiced)), +print "%%12: %6s%%" % ("%.2f" % (100. * total_correct_chroma / total_voiced)), +print "%d files," % len(sys.argv[1:]), +print "total_length: %.2fs," % ((total_total * hop_s) / float(samplerate)), +print "total runtime: %.2fs" % (time.time() - start) -- 2.26.2