{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Using TensorFlow backend.\n" ] } ], "source": [ "import os\n", "import numpy as np\n", "import pickle\n", "from tqdm import tqdm\n", "from scipy.io import wavfile\n", "from python_speech_features import mfcc\n", "from keras.models import load_model\n", "import pandas as pd\n", "from sklearn.metrics import accuracy_score" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def build_predictions(audio_dir):\n", " \"\"\"\n", " Function which takes an audio directory as input (which should contain a series of wav files), \n", " and returns true labels, predicted labels, and a dictionary of probabilites corrosponding to labels\n", " for each file.\n", " \n", " Arguments:\n", " ----------\n", " audio_dir : location of folder containing wav files\n", " \n", " Returns:\n", " --------\n", " y_true : A list containing the true labels for each file\n", " \n", " y_pred : A list contianing predicted labels for each file\n", " \n", " fn_prob : A dict containing the probabilities of each label for each file\n", " \n", " \"\"\"\n", " \n", " y_true = [] # List containing actual labels\n", " y_pred = [] # List containing predicted labels\n", " fn_prob = {} # Dict containing probability of given label\n", " \n", " print('Extracting features from audio')\n", " for fn in tqdm(os.listdir(audio_dir)):\n", " rate, wav = wavfile.read(os.path.join(audio_dir, fn))\n", " label = fn2class[fn]\n", " c = classes.index(label) # Grab true label\n", " y_prob = []\n", " \n", " # Step through audio file in chunks\n", " for i in range(0, wav.shape[0]-config.step, config.step):\n", " sample = wav[i:i+config.step] # Take chunk of audio file\n", " x = mfcc(sample, rate, numcep=config.nfeat,\n", " nfilt=config.nfilt, nfft = config.nfft) # Take mfcc of our sample\n", " x = (x - config.min) / (config.max - config.min) # Normalize based on max/min\n", " \n", " if config.mode == 'conv':\n", " x = x.reshape(1, x.shape[0], x.shape[1], 1)\n", " elif config.mode == 'time':\n", " x = np.expand_dims(x, axis=0)\n", " y_hat = model.predict(x)\n", " y_prob.append(y_hat)\n", " y_pred.append(np.argmax(y_hat))\n", " y_true.append(c)\n", " \n", " fn_prob[fn] = np.mean(y_prob, axis=0).flatten()\n", " \n", " return y_true, y_pred, fn_prob" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Extracting features from audio\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████| 300/300 [01:08<00:00, 4.36it/s]\n" ] } ], "source": [ "df = pd.read_csv('data/instruments.csv')\n", "classes = list(np.unique(df.label))\n", "fn2class = dict(zip(df.fname, df.label))\n", "p_path = os.path.join('pickles', 'conv.p')\n", "\n", "with open(p_path, 'rb') as handle:\n", " config = pickle.load(handle)\n", " \n", "model = load_model(config.model_path)\n", "\n", "y_true, y_pred, fn_prob = build_predictions('data/clean')\n", "\n", "\n", "acc_score = accuracy_score(y_true=y_true, y_pred=y_pred)\n", "\n", "y_probs = []\n", "# Iterate through each sample\n", "for i, row in df.iterrows():\n", " y_prob = fn_prob[row.fname] # Grab probabilities for each class for the sample\n", " y_probs.append(y_prob)\n", " for c, p in zip(classes, y_prob):\n", " df.at[i, c] = p # add probabilities to dataframe\n", " \n", "y_pred = [classes[np.argmax(y)] for y in y_probs]\n", "df['y_pred'] = y_pred\n", "\n", "# Save as csv file which now contains all samples along with true label, predicted label, and probabilities for each class\n", "df.to_csv('predictions.csv', index=False) " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 2 }