Spaces:

avsv
/

audio-emotion-analyzer

Running

avsv commited on Apr 6

Commit

db7a8ec

1 Parent(s): 936f253

✅ Fix: use correct extractor for superb/wav2vec2-base-superb-er

Files changed (1) hide show

app.py CHANGED Viewed

@@ -21,37 +21,38 @@ def convert_to_wav(uploaded_file):
 def get_emotion_label(logits):
     emotions = ["angry", "happy", "neutral", "sad"]
-    scores = torch.softmax(torch.tensor(logits), dim=0).tolist()
     top_idx = scores.index(max(scores))
     return emotions[top_idx], scores
 def analyze_emotion(audio_path):
     extractor, model = load_model()
     waveform, sr = torchaudio.load(audio_path)
     if sr != 16000:
-        waveform = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)(waveform)
-    inputs = extractor(waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt")
     with torch.no_grad():
         logits = model(**inputs).logits[0]
     emotion, scores = get_emotion_label(logits)
     return emotion.capitalize(), scores
-# Streamlit UI
 st.set_page_config(page_title="🎧 Audio Emotion Detector", layout="centered")
 st.title("🎧 Audio Emotion Analysis (Wav2Vec2)")
 uploaded_file = st.file_uploader("Upload an MP3 or WAV audio file", type=["mp3", "wav"])
 if uploaded_file:
-    st.audio(uploaded_file, format='audio/wav')
     with st.spinner("Analyzing emotion..."):
         wav_path = convert_to_wav(uploaded_file)
         emotion, scores = analyze_emotion(wav_path)
-        st.subheader("Emotion Analysis Result:")
-        st.markdown(f"🧠 **Detected Emotion:** `{emotion}`")
         st.subheader("Confidence Scores:")
         emotions = ["angry", "happy", "neutral", "sad"]

 def get_emotion_label(logits):
     emotions = ["angry", "happy", "neutral", "sad"]
+    scores = torch.softmax(logits, dim=0).tolist()
     top_idx = scores.index(max(scores))
     return emotions[top_idx], scores
 def analyze_emotion(audio_path):
     extractor, model = load_model()
     waveform, sr = torchaudio.load(audio_path)
     if sr != 16000:
+        waveform = torchaudio.transforms.Resample(sr, 16000)(waveform)
+    inputs = extractor(waveform[0].numpy(), sampling_rate=16000, return_tensors="pt")
     with torch.no_grad():
         logits = model(**inputs).logits[0]
     emotion, scores = get_emotion_label(logits)
     return emotion.capitalize(), scores
+# UI
 st.set_page_config(page_title="🎧 Audio Emotion Detector", layout="centered")
 st.title("🎧 Audio Emotion Analysis (Wav2Vec2)")
 uploaded_file = st.file_uploader("Upload an MP3 or WAV audio file", type=["mp3", "wav"])
 if uploaded_file:
+    st.audio(uploaded_file)
     with st.spinner("Analyzing emotion..."):
         wav_path = convert_to_wav(uploaded_file)
         emotion, scores = analyze_emotion(wav_path)
+        st.subheader("Detected Emotion:")
+        st.markdown(f"🧠 **{emotion}**")
         st.subheader("Confidence Scores:")
         emotions = ["angry", "happy", "neutral", "sad"]