In [1]:
import os
import numpy as np
import pickle
from tqdm import tqdm
from scipy.io import wavfile
from python_speech_features import mfcc
from keras.models import load_model
import pandas as pd
from sklearn.metrics import accuracy_score

Using TensorFlow backend.


In [2]:
def build_predictions(audio_dir):
 """
 Function which takes an audio directory as input (which should contain a series of wav files), 
 and returns true labels, predicted labels, and a dictionary of probabilites corrosponding to labels
 for each file.
 
 Arguments:
 ----------
 audio_dir : location of folder containing wav files
 
 Returns:
 --------
 y_true : A list containing the true labels for each file
 
 y_pred : A list contianing predicted labels for each file
 
 fn_prob : A dict containing the probabilities of each label for each file
 
 """
 
 y_true = [] # List containing actual labels
 y_pred = [] # List containing predicted labels
 fn_prob = {} # Dict containing probability of given label
 
 print('Extracting features from audio')
 for fn in tqdm(os.listdir(audio_dir)):
 rate, wav = wavfile.read(os.path.join(audio_dir, fn))
 label = fn2class[fn]
 c = classes.index(label) # Grab true label
 y_prob = []
 
 # Step through audio file in chunks
 for i in range(0, wav.shape[0]-config.step, config.step):
 sample = wav[i:i+config.step] # Take chunk of audio file
 x = mfcc(sample, rate, numcep=config.nfeat,
 nfilt=config.nfilt, nfft = config.nfft) # Take mfcc of our sample
 x = (x - config.min) / (config.max - config.min) # Normalize based on max/min
 
 if config.mode == 'conv':
 x = x.reshape(1, x.shape[0], x.shape[1], 1)
 elif config.mode == 'time':
 x = np.expand_dims(x, axis=0)
 y_hat = model.predict(x)
 y_prob.append(y_hat)
 y_pred.append(np.argmax(y_hat))
 y_true.append(c)
 
 fn_prob[fn] = np.mean(y_prob, axis=0).flatten()
 
 return y_true, y_pred, fn_prob

In [20]:
df = pd.read_csv('data/instruments.csv')
classes = list(np.unique(df.label))
fn2class = dict(zip(df.fname, df.label))
p_path = os.path.join('pickles', 'conv.p')

with open(p_path, 'rb') as handle:
 config = pickle.load(handle)
 
model = load_model(config.model_path)

y_true, y_pred, fn_prob = build_predictions('data/clean')


acc_score = accuracy_score(y_true=y_true, y_pred=y_pred)

y_probs = []
# Iterate through each sample
for i, row in df.iterrows():
 y_prob = fn_prob[row.fname] # Grab probabilities for each class for the sample
 y_probs.append(y_prob)
 for c, p in zip(classes, y_prob):
 df.at[i, c] = p # add probabilities to dataframe
 
y_pred = [classes[np.argmax(y)] for y in y_probs]
df['y_pred'] = y_pred

# Save as csv file which now contains all samples along with true label, predicted label, and probabilities for each class
df.to_csv('predictions.csv', index=False) 

Extracting features from audio


100%|████████████████████████████████████████████████████████████████████████████████| 300/300 [01:08<00:00, 4.36it/s]
