From 870855670c7d903af86e10ca5e5051a30e000f90 Mon Sep 17 00:00:00 2001 From: Amaury Hazan Date: Fri, 7 Sep 2007 15:47:55 +0200 Subject: [PATCH] 80% wrapped-up filterbank and mfcc --- src/filterbank.c | 173 ++++++++++++++++++++++++++++++++++++----- src/filterbank.h | 39 +++++++++- src/mfcc.c | 195 +++++++++++++++++++++++++++++++++++++++-------- src/mfcc.h | 70 ++++++++++++----- 4 files changed, 405 insertions(+), 72 deletions(-) diff --git a/src/filterbank.c b/src/filterbank.c index 3a63eebb..4aeecfb4 100644 --- a/src/filterbank.c +++ b/src/filterbank.c @@ -26,15 +26,43 @@ // Struct Declaration -/** \brief A structure to store a set of n_filters Mel filters */ -typedef struct aubio_mel_filter_ { - int n_filters; - smpl_t **filters; +/** \brief A structure to store a set of n_filters filters of lenghts win_s */ +struct aubio_filterbank_t_ { + uint_t win_s; + uint_t n_filters; + fvec_t *filters; }; +aubio_filterbank_t * new_aubio_filterbank(uint_t n_filters, uint_t win_s){ + + int filter_cnt; + /** allocating space for filterbank object */ + aubio_filterbank_t * fb = AUBIO_NEW(aubio_filterbank_t); + fb->win_s=win_s; + fb->n_filters=n_filters; + + /** allocating filter tables */ + fb->filters=AUBIO_ARRAY(n_filters,f_vec_t); + for (filter_cnt=0; filter_cntn_filters; filter_cnt++) + del_fvec(fb->filters[filter_cnt]); + AUBIO_FREE(fb->filters); + AUBIO_FREE(fb); + +} + // Initialization -int aubio_mfcc_init(int N, smpl_t nyquist, int style, smpl_t freq_min, smpl_t freq_max, int freq_bands, smpl_t **fft_tables){ +void aubio_filterbank_mfcc_init(aubio_filterbank_t * fb, smpl_t nyquist, int style, smpl_t freq_min, smpl_t freq_max){ int n, i, k, *fft_peak, M, next_peak; smpl_t norm, mel_freq_max, mel_freq_min, norm_fact, height, inc, val, @@ -46,33 +74,33 @@ int aubio_mfcc_init(int N, smpl_t nyquist, int style, smpl_t freq_min, smpl_t fr mel_freq_max = 1127 * log(1 + freq_max / 700); mel_freq_min = 1127 * log(1 + freq_min / 700); - freq_bw_mel = (mel_freq_max - mel_freq_min) / freq_bands; + freq_bw_mel = (mel_freq_max - mel_freq_min) / fb->n_filters; - mel_peak = (smpl_t *)malloc((freq_bands + 2) * sizeof(smpl_t)); + mel_peak = (smpl_t *)malloc((fb->n_filters + 2) * sizeof(smpl_t)); /* +2 for zeros at start and end */ - lin_peak = (smpl_t *)malloc((freq_bands + 2) * sizeof(smpl_t)); - fft_peak = (int *)malloc((freq_bands + 2) * sizeof(int)); - height_norm = (smpl_t *)malloc(freq_bands * sizeof(smpl_t)); + lin_peak = (smpl_t *)malloc((fb->n_filters + 2) * sizeof(smpl_t)); + fft_peak = (int *)malloc((fb->n_filters + 2) * sizeof(int)); + height_norm = (smpl_t *)malloc(fb->n_filters * sizeof(smpl_t)); if(mel_peak == NULL || height_norm == NULL || lin_peak == NULL || fft_peak == NULL) return XTRACT_MALLOC_FAILED; - M = N >> 1; + M = fb->win_s >> 1; mel_peak[0] = mel_freq_min; lin_peak[0] = 700 * (exp(mel_peak[0] / 1127) - 1); fft_peak[0] = lin_peak[0] / nyquist * M; - for (n = 1; n <= freq_bands; n++){ + for (n = 1; n <= fb->n_filters; n++){ /*roll out peak locations - mel, linear and linear on fft window scale */ mel_peak[n] = mel_peak[n - 1] + freq_bw_mel; lin_peak[n] = 700 * (exp(mel_peak[n] / 1127) -1); fft_peak[n] = lin_peak[n] / nyquist * M; } - for (n = 0; n < freq_bands; n++){ + for (n = 0; n < fb->n_filters; n++){ /*roll out normalised gain of each peak*/ if (style == XTRACT_EQUAL_GAIN){ height = 1; @@ -87,7 +115,7 @@ int aubio_mfcc_init(int N, smpl_t nyquist, int style, smpl_t freq_min, smpl_t fr i = 0; - for(n = 0; n < freq_bands; n++){ + for(n = 0; n < fb->n_filters; n++){ /*calculate the rise increment*/ if(n > 0) @@ -98,11 +126,13 @@ int aubio_mfcc_init(int N, smpl_t nyquist, int style, smpl_t freq_min, smpl_t fr /*zero the start of the array*/ for(k = 0; k < i; k++) - fft_tables[n][k] = 0.f; + //fft_tables[n][k] = 0.f; + fb->filters[n]->data[0][k]=0.f; /*fill in the rise */ for(; i <= fft_peak[n]; i++){ - fft_tables[n][i] = val; + // fft_tables[n][i] = val; + fb->filters[n]->data[0][k]=val; val += inc; } @@ -114,13 +144,15 @@ int aubio_mfcc_init(int N, smpl_t nyquist, int style, smpl_t freq_min, smpl_t fr /*reverse fill the 'fall' */ for(i = next_peak; i > fft_peak[n]; i--){ - fft_tables[n][i] = val; + //fft_tables[n][i] = val; + fb->filters[n]->data[0][k]=val; val += inc; } /*zero the rest of the array*/ - for(k = next_peak + 1; k < N; k++) - fft_tables[n][k] = 0.f; + for(k = next_peak + 1; k < fb->win_s; k++) + //fft_tables[n][k] = 0.f; + fb->filters[n]->data[0][k]=0.f; } free(mel_peak); @@ -128,6 +160,107 @@ int aubio_mfcc_init(int N, smpl_t nyquist, int style, smpl_t freq_min, smpl_t fr free(height_norm); free(fft_peak); - return XTRACT_SUCCESS; + //return XTRACT_SUCCESS; } + +//to be deleted code + + +// int aubio_mfcc_init(int N, smpl_t nyquist, int style, smpl_t freq_min, smpl_t freq_max, int freq_bands, smpl_t **fft_tables){ +// +// int n, i, k, *fft_peak, M, next_peak; +// smpl_t norm, mel_freq_max, mel_freq_min, norm_fact, height, inc, val, +// freq_bw_mel, *mel_peak, *height_norm, *lin_peak; +// +// mel_peak = height_norm = lin_peak = NULL; +// fft_peak = NULL; +// norm = 1; +// +// mel_freq_max = 1127 * log(1 + freq_max / 700); +// mel_freq_min = 1127 * log(1 + freq_min / 700); +// freq_bw_mel = (mel_freq_max - mel_freq_min) / freq_bands; +// +// mel_peak = (smpl_t *)malloc((freq_bands + 2) * sizeof(smpl_t)); +// /* +2 for zeros at start and end */ +// lin_peak = (smpl_t *)malloc((freq_bands + 2) * sizeof(smpl_t)); +// fft_peak = (int *)malloc((freq_bands + 2) * sizeof(int)); +// height_norm = (smpl_t *)malloc(freq_bands * sizeof(smpl_t)); +// +// if(mel_peak == NULL || height_norm == NULL || +// lin_peak == NULL || fft_peak == NULL) +// return XTRACT_MALLOC_FAILED; +// +// M = N >> 1; +// +// mel_peak[0] = mel_freq_min; +// lin_peak[0] = 700 * (exp(mel_peak[0] / 1127) - 1); +// fft_peak[0] = lin_peak[0] / nyquist * M; +// +// +// for (n = 1; n <= freq_bands; n++){ +// /*roll out peak locations - mel, linear and linear on fft window scale */ +// mel_peak[n] = mel_peak[n - 1] + freq_bw_mel; +// lin_peak[n] = 700 * (exp(mel_peak[n] / 1127) -1); +// fft_peak[n] = lin_peak[n] / nyquist * M; +// } +// +// for (n = 0; n < freq_bands; n++){ +// /*roll out normalised gain of each peak*/ +// if (style == XTRACT_EQUAL_GAIN){ +// height = 1; +// norm_fact = norm; +// } +// else{ +// height = 2 / (lin_peak[n + 2] - lin_peak[n]); +// norm_fact = norm / (2 / (lin_peak[2] - lin_peak[0])); +// } +// height_norm[n] = height * norm_fact; +// } +// +// i = 0; +// +// for(n = 0; n < freq_bands; n++){ +// +// /*calculate the rise increment*/ +// if(n > 0) +// inc = height_norm[n] / (fft_peak[n] - fft_peak[n - 1]); +// else +// inc = height_norm[n] / fft_peak[n]; +// val = 0; +// +// /*zero the start of the array*/ +// for(k = 0; k < i; k++) +// fft_tables[n][k] = 0.f; +// +// /*fill in the rise */ +// for(; i <= fft_peak[n]; i++){ +// fft_tables[n][i] = val; +// val += inc; +// } +// +// /*calculate the fall increment */ +// inc = height_norm[n] / (fft_peak[n + 1] - fft_peak[n]); +// +// val = 0; +// next_peak = fft_peak[n + 1]; +// +// /*reverse fill the 'fall' */ +// for(i = next_peak; i > fft_peak[n]; i--){ +// fft_tables[n][i] = val; +// val += inc; +// } +// +// /*zero the rest of the array*/ +// for(k = next_peak + 1; k < N; k++) +// fft_tables[n][k] = 0.f; +// } +// +// free(mel_peak); +// free(lin_peak); +// free(height_norm); +// free(fft_peak); +// +// return XTRACT_SUCCESS; +// +// } diff --git a/src/filterbank.h b/src/filterbank.h index 4b57bd12..578219ef 100644 --- a/src/filterbank.h +++ b/src/filterbank.h @@ -1,6 +1,6 @@ /* Copyright (C) 2007 Amaury Hazan - Ported to aubio from LibXtract + adapted to aubio from LibXtract http://libxtract.sourceforge.net/ @@ -19,6 +19,14 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +/** \file + + Filterbank object + + General-purpose spectral filterbank object. Comes with mel-filter initialization function. + +*/ + #ifndef AUBIOFILTERBANK_H #define AUBIOFILTERBANK_H @@ -28,7 +36,34 @@ extern "C" { -typedef struct aubio_mel_filter_ aubio_mel_filter; +typedef struct aubio_filterbank_t_ aubio_filterbank_t; + +/** create filterbank object + + \param win_s size of analysis buffer (and length the FFT transform) + \param n_filters number of filters to create + +*/ + +aubio_filterbank_t * new_aubio_filterbank(uint_t n_filters, uint_t win_s); + +/** destroy filterbank object + + \param fb filterbank, as returned by new_aubio_filterbank method + +*/ +void del_aubio_filterbank(aubio_filterbank_t * fb); + +/** filterbank initialization for mel filters + + \param fb filterbank, as returned by new_aubio_filterbank method + \param nyquist nyquist frequency, i.e. half of the sampling rate + \param style libxtract style + \param freqmin lowest filter frequency + \param freqmax highest filter frequency + +*/ +void aubio_filterbank_mfcc_init(aubio_filterbank_t * fb, smpl_t nyquist, int style, smpl_t freq_min, smpl_t freq_max); // Initialization diff --git a/src/mfcc.c b/src/mfcc.c index a92d9e61..e4de069f 100644 --- a/src/mfcc.c +++ b/src/mfcc.c @@ -23,64 +23,193 @@ #include "aubio_priv.h" #include "sample.h" #include "fft.h" +#include "filterbank.h" #include "mfcc.h" #include "math.h" -/* -new_aubio_mfcc -aubio_mfcc_do -del_aubio_mfcc -*/ -// Computation -// Added last two arguments to be able to pass from example +/** Internal structure for mfcc object **/ +struct aubio_mfcc_t_{ -int aubio_mfcc_do(const float *data, const int N, const void *argv, float *result, aubio_mfft_t * fft_dct, cvec_t * fftgrain_dct){ + /** grain length */ + uint_t win_s; + + /** sample rate (needed?) */ + uint_t samplerate; - aubio_mel_filter *f; - int n, filter; + /** number of channels */ + uint_t channels; + + /** filter bank */ + aubio_filterbank_t * fb; - f = (aubio_mel_filter *)argv; - - for(filter = 0; filter < f->n_filters; filter++){ - result[filter] = 0.f; - for(n = 0; n < N; n++){ - result[filter] += data[n] * f->filters[filter][n]; + /** number of coefficients (= fb->n_filters/2 +1) */ + uint_t n_coefs; + + /** lowest frequency for filters */ + smpl_t lowfreq; + + /** highest frequency for filters */ + smpl_t highfreq; + + /** input buffer for dct * [fb->n_filters] */ + fvec_t * in_dct; + + /** fft object for dct */ + aubio_mfft_t * fft_dct; + + /** output buffer for dct */ + cvec_t * fftgrain_dct; + +}; + + +aubio_mfcc_t * new_aubio_mfcc (uint_t win_s, uint_t samplerate ,uint_t n_coefs, smpl_t lowfreq, smpl_t highfreq, uint_t channels){ + + + /** allocating space for mfcc object */ + + aubio_mfcc_t * mfcc = AUBIO_NEW(aubio_mfcc_t); + + mfcc->win_s=win_s; + mfcc->samplerate=samplerate; + mfcc->channels=channels; + mfcc->n_coefs=n_coefs; + mfcc->lowfreq=lowfreq; + mfcc->highfreq=highfreq; + + /** filterbank allocation */ + //we need (n_coefs-1)*2 filters to obtain n_coefs coefficients after dct + mfcc->fb=new_aubio_filterbank((n_coefs-1)*2, mfcc->win_s); + + /** allocating space for fft object (used for dct) */ + mfcc->fft_dct=new_aubio_mfft(mfcc->win_s, 1); + + /** allocating buffers */ + + mfcc->in_dct=new_fvec(mfcc->win_s, 1); + + mfcc->fftgrain_dct=new_cvec(mfcc->fb->n_filters, 1); + + /** populating the filterbank */ + + aubio_filterbank_mfcc_init(mfcc->fb, (mfcc->samplerate)/2, XTRACT_EQUAL_GAIN, mfcc->lowfreq, mfcc->highfreq); + + return mfcc; + +}; + + +void del_aubio_mfcc(aubio_mfcc_t *mf){ + + /** deleting filterbank */ + del_aubio_filterbank(mf->fb); + /** deleting mfft object */ + del_aubio_mfft(mf->fft_dct); + /** deleting buffers */ + del_fvec(mf->in_dct); + del_cvec(mf->fftgrain_dct); + + /** deleting mfcc object */ + AUBIO_FREE(mf); + +} + + +// Computation + +void aubio_mfcc_do(aubio_mfcc_t * mf, cvec_t *in, fvec_t *out){ + + aubio_filterbank_t *f = mf->fb; + uint_t n, filter_cnt; + + for(filter_cnt = 0; filter_cnt < f->n_filters; filter_cnt++){ + mf->in_dct->data[0][filter_cnt] = 0.f; + for(n = 0; n < mf->win_s; n++){ + mf->in_dct->data[0][filter_cnt] += in->norm[0][n] * f->filters[filter_cnt]->data[0][n]; } - result[filter] = LOG(result[filter] < XTRACT_LOG_LIMIT ? XTRACT_LOG_LIMIT : result[filter]); + mf->in_dct->data[0][filter_cnt] = LOG(mf->in_dct->data[0][filter_cnt] < XTRACT_LOG_LIMIT ? XTRACT_LOG_LIMIT : mf->in_dct->data[0][filter_cnt]); } //TODO: check that zero padding - for(n = filter + 1; n < N; n++) result[n] = 0; + // the following line seems useless since the in_dct buffer has the correct size + //for(n = filter + 1; n < N; n++) result[n] = 0; - aubio_dct_do(result, f->n_filters, NULL, result, fft_dct, fftgrain_dct); + aubio_dct_do(mf, mf->in_dct, out); - return XTRACT_SUCCESS; + //return XTRACT_SUCCESS; } -// Added last two arguments to be able to pass from example - -int aubio_dct_do(const float *data, const int N, const void *argv, float *result, aubio_mfft_t * fft_dct, cvec_t * fftgrain_dct){ +void aubio_dct_do(aubio_mfcc_t * mf, fvec_t *in, fvec_t *out){ - //call aubio p_voc in dct setting - - //TODO: fvec as input? Remove data length, N? - - fvec_t * momo = new_fvec(20, 1); - momo->data = data; + + //fvec_t * momo = new_fvec(20, 1); + //momo->data = data; //compute mag spectrum - aubio_mfft_do (fft_dct, data, fftgrain_dct); + aubio_mfft_do (mf->fft_dct, in, mf->fftgrain_dct); int i; //extract real part of fft grain - for(i=0; inorm[0][i]*COS(fftgrain_dct->phas[0][i]); + for(i=0; in_coefs ;i++){ + out->data[0][i]= mf->fftgrain_dct->norm[0][i]*COS(mf->fftgrain_dct->phas[0][i]); } - return XTRACT_SUCCESS; + //return XTRACT_SUCCESS; } + + +///////// OLD CODE + +// int aubio_mfcc_do(const float *data, const int N, const void *argv, float *result, aubio_mfft_t * fft_dct, cvec_t * fftgrain_dct){ +// +// aubio_mel_filter *f; +// uint_t n, filter; +// +// f = (aubio_mel_filter *)argv; +// printf("%d",f->n_filters); +// +// for(filter = 0; filter < f->n_filters; filter++){ +// result[filter] = 0.f; +// for(n = 0; n < N; n++){ +// result[filter] += data[n] * f->filters[filter][n]; +// } +// result[filter] = LOG(result[filter] < XTRACT_LOG_LIMIT ? XTRACT_LOG_LIMIT : result[filter]); +// } +// +// //TODO: check that zero padding +// for(n = filter + 1; n < N; n++) result[n] = 0; +// +// aubio_dct_do(result, f->n_filters, NULL, result, fft_dct, fftgrain_dct); +// +// return XTRACT_SUCCESS; +// } + +// Added last two arguments to be able to pass from example + +// int aubio_dct_do(const float *data, const int N, const void *argv, float *result, aubio_mfft_t * fft_dct, cvec_t * fftgrain_dct){ +// +// +// //call aubio p_voc in dct setting +// +// //TODO: fvec as input? Remove data length, N? +// +// fvec_t * momo = new_fvec(20, 1); +// momo->data = data; +// +// //compute mag spectrum +// aubio_mfft_do (fft_dct, data, fftgrain_dct); +// +// int i; +// //extract real part of fft grain +// for(i=0; inorm[0][i]*COS(fftgrain_dct->phas[0][i]); +// } +// +// +// return XTRACT_SUCCESS; +// } diff --git a/src/mfcc.h b/src/mfcc.h index d4b03639..157dfa77 100644 --- a/src/mfcc.h +++ b/src/mfcc.h @@ -147,30 +147,66 @@ typedef enum xtract_vector_ { } xtract_vector_t; +typedef struct aubio_mfcc_t_ aubio_mfcc_t; +// Creation -// Computation +/** create mfcc object -/** \brief Extract Mel Frequency Cepstral Coefficients based on a method described by Rabiner - * - * \param *data: a pointer to the first element in an array of spectral magnitudes, e.g. the first half of the array pointed to by *resul from xtract_spectrum() - * \param N: the number of array elements to be considered - * \param *argv: a pointer to a data structure of type xtract_mel_filter, containing n_filters coefficient tables to make up a mel-spaced filterbank - * \param *result: a pointer to an array containing the resultant MFCC - * - * The data structure pointed to by *argv must be obtained by first calling xtract_init_mfcc - */ + \param win_s size of analysis buffer (and length the FFT transform) + \param samplerate + \param n_coefs: number of desired coefs + \param lowfreq: lowest frequency to use in filterbank + \param highfreq highest frequency to use in filterbank + \param channels number of channels +*/ +aubio_mfcc_t * new_aubio_mfcc (uint_t win_s, uint_t samplerate ,uint_t n_coefs, smpl_t lowfreq, smpl_t highfreq, uint_t channels); + +// Deletion + +/** delete mfcc object + + \param mf mfcc object as returned by new_aubio_mfcc + +*/ +void del_aubio_mfcc(aubio_mfcc_t *mf); + +// Process + +/** mfcc object processing + + \param mf mfcc object as returned by new_aubio_mfcc + \param in input spectrum (win_s long) + \param out output mel coefficients buffer (n_filters/2 +1 long) + +*/ + +void aubio_mfcc_do(aubio_mfcc_t * mf, cvec_t *in, fvec_t *out); + +/** intermediate dct involved in aubio_mfcc_do + \param mf mfcc object as returned by new_aubio_mfcc + \param in input spectrum (n_filters long) + \param out output mel coefficients buffer (n_filters/2 +1 long) + +*/ + +void aubio_dct_do(aubio_mfcc_t * mf, fvec_t *in, fvec_t *out); + + + + +//old code + + +/* int aubio_mfcc_do(const float *data, const int N, const void *argv, float *result, aubio_mfft_t *fft_dct, cvec_t *fftgrain_dct); -/** \brief Extract the Discrete Cosine transform of a time domain signal - * \param *data: a pointer to the first element in an array of floats representing an audio vector - * \param N: the number of array elements to be considered - * \param *argv: a pointer to NULL - * \param *result: a pointer to an array containing resultant dct coefficients - */ -int aubio_dct_do(const float *data, const int N, const void *argv, float *result, aubio_mfft_t *fft_dct, cvec_t *fftgrain_dct); +int aubio_dct_do(const float *data, const int N, const void *argv, float *result, aubio_mfft_t *fft_dct, cvec_t *fftgrain_dct);*/ + + + #ifdef __cplusplus } -- 2.26.2