File size: 2,583 Bytes
4355910
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
055425e
4355910
055425e
8448657
 
4355910
 
 
2de8fd8
2c87e7e
4355910
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7118df6
4355910
 
 
 
 
7118df6
4355910
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219c0b5
ce81ba6
 
 
4355910
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import gradio as gr
from keras.models import model_from_json
import matplotlib.pyplot as plt
import keras 
import pickle
import pandas as pd
import numpy as np
import librosa
import librosa.display

def transform_data(audio):
    # Lets transform the dataset so we can apply the predictions
    X, sample_rate = librosa.load(audio
                                ,res_type='kaiser_fast'
                                ,duration=2.5
                                ,sr=44100
                                ,offset=0.5
                                )
    
    sample_rate = np.array(sample_rate)
    mfccs = librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=30)
    mfccs = np.expand_dims(mfccs, axis=-1)
    return mfccs

def predict(newdf, loaded_model):
    # Apply predictions
    newdf= np.expand_dims(newdf, axis=0)
    # print("***HERRRREEEEE*** ", newdf.shape )
    newpred = loaded_model.predict(newdf, 
                            batch_size=16, 
                            verbose=1)
    return newpred

    
def get_label(newpred):
    filename = 'models/labels'
    infile = open(filename,'rb')
    lb = pickle.load(infile)
    infile.close()

    # Get the final predicted label
    final = newpred.argmax(axis=1)
    final = final.astype(int).flatten()
    final = (lb.inverse_transform((final)))
    return final

def load_model():
    # loading json and model architecture 
    json_file = open('models/model_json_conv2D.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)

    # load weights into new model
    loaded_model.load_weights("models/Emotion_Model_conv2D.h5")
    print("Loaded model from disk")

    # the optimiser
    opt = keras.optimizers.RMSprop(lr=0.00001, decay=1e-6)
    loaded_model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
    return loaded_model


def main(audio):
    newdf = transform_data(audio)
    loaded_model = load_model()
    newpred = predict(newdf, loaded_model)
    final = get_label(newpred)
    return "Classification: " + final

demo = gr.Interface(
    title = "🎙️ Audio Gender/Emotion Analysis 🎙️",
    description = "<h3>A Neural Network to classify the gender of the voice (male/female) and the emotion, such as: happy, angry, sad, etc. </h3> <br> <b>Record your voice</b>",
    allow_flagging = "never",
    fn = main,
    inputs=gr.Audio(
        sources=["microphone"],
        type="filepath",
    ), 
    outputs="text"
    )
    


if __name__ == "__main__":
    demo.launch(show_api=False)