import utils
import IPython.display as ipd
import sklearn.decomposition as decomp
from scipy.io.wavfile import write as saveaudio


# Loading the audiofiles with higher sample rate for eigensound creation

jazz_hq = utils.audiocrunch("data/jazz.mp3")
adele_hq = utils.audiocrunch("data/adele.mp3")
liszt_hq = utils.audiocrunch("data/liszt.mp3")
chopin_hq = utils.audiocrunch("data/chopin.mp3")
mozart_hq = utils.audiocrunch("data/mozart.mp3")
topbil_hq = utils.audiocrunch("data/popbil.mp3")


ipd.Audio(jazz_hq[45], rate=4500)


eigs_jazz = decomp.PCA(2).fit_transform(jazz_hq.T)
saveaudio('eigensounds_DNS/eigs_jazz.wav', 4500, eigs_jazz)
ipd.Audio(eigs_jazz.T, rate=4500)


eigs_topbil = decomp.PCA(2).fit_transform(topbil_hq.T)
saveaudio('eigensounds_DNS/eigs_topbil.wav', 4500, eigs_topbil)
ipd.Audio(eigs_topbil.T, rate=4500)


eigs_adele = decomp.PCA(2).fit_transform(adele_hq.T)
saveaudio('eigensounds_DNS/eigs_adele.wav', 4500, eigs_adele)
ipd.Audio(eigs_adele.T, rate=4500)


eigs_liszt = decomp.PCA(2).fit_transform(liszt_hq.T)
saveaudio('eigensounds_DNS/eigs_liszt.wav', 4500, eigs_liszt)
ipd.Audio(eigs_liszt.T, rate=4500)


eigs_mozart = decomp.PCA(2).fit_transform(mozart_hq.T)
saveaudio('eigensounds_DNS/eigs_mozart.wav', 4500, eigs_mozart)
ipd.Audio(eigs_mozart.T, rate=4500)


eigs_chopin = decomp.PCA(2).fit_transform(chopin_hq.T)
saveaudio('eigensounds_DNS/eigs_chopin.wav', 4500, eigs_chopin)
ipd.Audio(eigs_chopin.T, rate=4500)


import numpy as np
import matplotlib.pyplot as plt

# Preprocessing
import utils
import sklearn.preprocessing as pp
import sklearn.decomposition as decomp

# Classification Models
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

# Cross-Validation
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    ConfusionMatrixDisplay,
)
from sklearn.model_selection import train_test_split

# Defaults
seed = 42
plt.rcParams['figure.dpi'] = 300
plt.rcParams['savefig.dpi'] = 300


# Loading the audiofiles:

downrate = 50  # Down from 44,000 (a compression of 99.89%)!
samplesize = 5  # seconds
chopin = utils.audiocrunch("data/chopin.mp3", drate=downrate, ssize=samplesize)
mozart = utils.audiocrunch("data/mozart.mp3", drate=downrate, ssize=samplesize)
liszt = utils.audiocrunch("data/liszt.mp3", drate=downrate, ssize=samplesize)
jazz = utils.audiocrunch("data/jazz.mp3", drate=downrate, ssize=samplesize)
topbil = utils.audiocrunch("data/popbil.mp3", drate=downrate, ssize=samplesize)
adele = utils.audiocrunch("data/adele.mp3", drate=downrate, ssize=samplesize)


sig_all = np.vstack([chopin, mozart, liszt, jazz, topbil, adele])

X = sig_all
y = np.r_[
    np.zeros(chopin.shape[0]).astype(int),
    np.ones(mozart.shape[0]).astype(int),
    (np.ones(liszt.shape[0]) + 1).astype(int),
    (np.ones(jazz.shape[0]) + 2).astype(int),
    (np.ones(topbil.shape[0]) + 3).astype(int),
    (np.ones(adele.shape[0]) + 4).astype(int)
]


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, shuffle=True, stratify=y
)


cnames = [
    "NB -------------------",
    "SVM ------------------",
    "𝑘-NN -----------------",
    "RF -------------------",
    "LDA ------------------",
    "MLP ------------------",
]

classifiers = [
    GaussianNB(),
    SVC(kernel="linear"),
    KNeighborsClassifier(n_neighbors=4),
    RandomForestClassifier(max_depth=3),
    LinearDiscriminantAnalysis(),
    MLPClassifier(hidden_layer_sizes=(24,12,6), max_iter=1000),
]


utils.clf_xval(X_train, y_train, cnames, classifiers, repeats=2)

Averages of 2 Repetitions
- 5-folds Cross-Validated -
-- NB ---------------------
 Accuracy: 83.64% (±1.37%)
     Time: 3.93ms
---------------------------
-- SVM --------------------
 Accuracy: 86.99% (±0.68%)
     Time: 160.65ms
---------------------------
-- 𝑘-NN -------------------
 Accuracy: 63.04% (±0.63%)
     Time: 1.43ms
---------------------------
-- RF ---------------------
 Accuracy: 83.52% (±0.39%)
     Time: 600.53ms
---------------------------
-- LDA --------------------
 Accuracy: 66.12% (±1.47%)
     Time: 66.72ms
---------------------------
-- MLP --------------------
 Accuracy: 75.56% (±12.41%)
     Time: 3191.66ms
---------------------------


utils.clf_xval(X_train, y_train, cnames, classifiers, repeats=10, PCA=2)

Averages of 10 Repetitions
- 5-folds Cross-Validated -
-- NB ---------------------
 Accuracy: 96.42% (±0.67%)
     Time: 11.67ms
---------------------------
-- SVM --------------------
 Accuracy: 88.04% (±0.61%)
     Time: 184.92ms
---------------------------
-- 𝑘-NN -------------------
 Accuracy: 96.40% (±0.55%)
     Time: 11.02ms
---------------------------
-- RF ---------------------
 Accuracy: 77.95% (±2.20%)
     Time: 151.62ms
---------------------------
-- LDA --------------------
 Accuracy: 70.37% (±1.73%)
     Time: 11.56ms
---------------------------
-- MLP --------------------
 Accuracy: 94.73% (±5.73%)
     Time: 1629.26ms
---------------------------


scale = pp.StandardScaler().fit(X_train)
X_train_scale = scale.transform(X_train)
X_test_scale = scale.transform(X_test)

PCA_t = decomp.PCA(2).fit(X_train_scale)
X_train_pca = PCA_t.transform(X_train_scale)
X_test_pca = PCA_t.transform(X_test_scale)

groups = ["Chopin","Mozart","Liszt","Jazz","Pop(ish)","Adele"]


clf_NB = GaussianNB()
clf_NB.fit(X_train_pca, y_train)

y_test_pred = clf_NB.predict(X_test_pca)
print(classification_report(y_test, y_test_pred, target_names=groups))

NB_cm = confusion_matrix(y_test, y_test_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=NB_cm / np.sum(NB_cm, axis=0)[:, None], 
                              display_labels=groups)
disp.plot(cmap='magma_r', )

              precision    recall  f1-score   support

      Chopin       0.91      0.88      0.90       225
      Mozart       0.98      0.98      0.98       224
       Liszt       0.92      0.98      0.95       224
        Jazz       1.00      1.00      1.00       216
    Pop(ish)       0.99      0.98      0.99       354
       Adele       0.99      0.98      0.99       166

    accuracy                           0.97      1409
   macro avg       0.97      0.97      0.97      1409
weighted avg       0.97      0.97      0.97      1409

<sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay at 0x7f1cd86f5a00>


clf_KNN = KNeighborsClassifier(n_neighbors=4)
clf_KNN.fit(X_train_pca, y_train)

y_test_pred = clf_KNN.predict(X_test_pca)
print(classification_report(y_test, y_test_pred, target_names=groups))

KNN_cm = confusion_matrix(y_test, y_test_pred)
ConfusionMatrixDisplay(confusion_matrix=KNN_cm /
                       np.sum(KNN_cm, axis=0)[:, None],
                       display_labels=groups).plot(cmap='magma_r')

              precision    recall  f1-score   support

      Chopin       0.90      0.89      0.90       225
      Mozart       0.98      0.97      0.98       224
       Liszt       0.95      0.93      0.94       224
        Jazz       1.00      1.00      1.00       216
    Pop(ish)       0.99      0.99      0.99       354
       Adele       0.99      1.00      0.99       166

    accuracy                           0.97      1409
   macro avg       0.97      0.97      0.97      1409
weighted avg       0.97      0.97      0.97      1409

<sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay at 0x7f1cd6ce9790>


clf_MLP = MLPClassifier(hidden_layer_sizes=(216, 72, 36, 6), max_iter=1000)
clf_MLP.fit(X_train_pca, y_train)

y_test_pred = clf_MLP.predict(X_test_pca)
print(classification_report(y_test, y_test_pred, target_names=groups))

MLP_cm = confusion_matrix(y_test, y_test_pred)
ConfusionMatrixDisplay(confusion_matrix=MLP_cm /
                       np.sum(MLP_cm, axis=0)[:, None],
                       display_labels=groups).plot(cmap='magma_r')

              precision    recall  f1-score   support

      Chopin       0.98      0.79      0.87       225
      Mozart       0.98      1.00      0.99       224
       Liszt       0.90      0.99      0.94       224
        Jazz       1.00      1.00      1.00       216
    Pop(ish)       0.96      1.00      0.98       354
       Adele       0.99      1.00      0.99       166

    accuracy                           0.96      1409
   macro avg       0.97      0.96      0.96      1409
weighted avg       0.97      0.96      0.96      1409

<sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay at 0x7f1cd6d56d60>


%%timeit

clf_NB = GaussianNB()
clf_NB.fit(X_train_pca, y_train)
y_test_pred = clf_NB.predict(X_test_pca)

1.33 ms ± 11.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


%%timeit

clf_KNN = KNeighborsClassifier(n_neighbors=4)
clf_KNN.fit(X_train_pca, y_train)
y_test_pred = clf_KNN.predict(X_test_pca)

26.4 ms ± 296 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


%%timeit

clf_MLP = MLPClassifier(hidden_layer_sizes=(216, 72, 36, 6), max_iter=1000)
clf_MLP.fit(X_train_pca, y_train)
y_test_pred = clf_MLP.predict(X_test_pca)

1.94 s ± 562 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import cross_validate
from sklearn.pipeline import Pipeline

cnames = [
    "NB w 30 PCA ----------",
    "NB w 25 PCA ----------",
    "NB w 15 PCA ----------",
    "NB w 7 PCA -----------",
    "NB w 5 PCA -----------",
    "NB w 3 PCA -----------",
    "NB w 2 PCA -----------",
    "NB w 1 PCA -----------",
]

PCAvals = [30, 25, 15, 7, 5, 3, 2, 1]



rkcv = RepeatedStratifiedKFold(n_splits=5,
                               n_repeats=5)
print("Averages of " + str(5) + " Repetitions" + "\n- " +
      str(5) + "-folds Cross-Validated -")
for name, pca_ in zip(cnames, PCAvals):
    pipe = Pipeline(steps=[
        ("scale", pp.StandardScaler()),
        ("PCA", decomp.PCA(n_components=pca_)),
        (name, GaussianNB()),
    ])

    cvs = cross_validate(pipe, X_train, y_train, cv=rkcv)
    print("-- " + name + "--")
    print(
        " Accuracy: %.2f%% (±%.2f%%)" %
        (np.mean(cvs["test_score"]) * 100, np.std(cvs["test_score"]) * 100))
    print("     Time: %.2fms" % (np.mean(cvs["fit_time"]) * 1e3))
    print("---------------------------")

Averages of 5 Repetitions
- 5-folds Cross-Validated -
-- NB w 30 PCA ------------
 Accuracy: 90.57% (±1.38%)
     Time: 13.93ms
---------------------------
-- NB w 25 PCA ------------
 Accuracy: 91.43% (±1.22%)
     Time: 13.78ms
---------------------------
-- NB w 15 PCA ------------
 Accuracy: 94.24% (±1.07%)
     Time: 13.17ms
---------------------------
-- NB w 7 PCA -------------
 Accuracy: 94.86% (±0.87%)
     Time: 13.11ms
---------------------------
-- NB w 5 PCA -------------
 Accuracy: 95.24% (±0.68%)
     Time: 11.79ms
---------------------------
-- NB w 3 PCA -------------
 Accuracy: 96.14% (±0.72%)
     Time: 11.86ms
---------------------------
-- NB w 2 PCA -------------
 Accuracy: 96.41% (±0.63%)
     Time: 10.98ms
---------------------------
-- NB w 1 PCA -------------
 Accuracy: 96.75% (±0.65%)
     Time: 11.44ms
---------------------------

1. Eigensounds¶

`Eigensounds` Demonstration¶

2. Models¶

What is the lowest quality (sample rate) we can use to still be able to classify songs by composer/genre?¶

Model Selection¶

#¶

1. Eigensounds¶

Eigensounds Demonstration¶

2. Models¶

What is the lowest quality (sample rate) we can use to still be able to classify songs by composer/genre?¶

Model Selection¶

#¶

`Eigensounds` Demonstration¶