Spaces:

avsv
/

audio-emotion-analyzer

Running

App Files Files Community

avsv commited on Apr 6

Commit

ef22f3f

1 Parent(s): 7796126

⚡ Trim audio to 30s for stable analysis on Hugging Face

Browse files

Files changed (1) hide show

app.py +18 -9

app.py CHANGED Viewed

@@ -29,32 +29,41 @@ def analyze_emotion(audio_path):
     extractor, model = load_model()
     waveform, sr = torchaudio.load(audio_path)
-    if sr != 16000:
-        waveform = torchaudio.transforms.Resample(sr, 16000)(waveform)
     inputs = extractor(waveform[0].numpy(), sampling_rate=16000, return_tensors="pt")
     with torch.no_grad():
         logits = model(**inputs).logits[0]
     emotion, scores = get_emotion_label(logits)
-    return emotion.capitalize(), scores
-# UI
 st.set_page_config(page_title="🎧 Audio Emotion Detector", layout="centered")
 st.title("🎧 Audio Emotion Analysis (Wav2Vec2)")
 uploaded_file = st.file_uploader("Upload an MP3 or WAV audio file", type=["mp3", "wav"])
 if uploaded_file:
-    st.audio(uploaded_file)
     with st.spinner("Analyzing emotion..."):
         wav_path = convert_to_wav(uploaded_file)
-        emotion, scores = analyze_emotion(wav_path)
-        st.subheader("Detected Emotion:")
-        st.markdown(f"🧠 **{emotion}**")
-        st.subheader("Confidence Scores:")
         emotions = ["angry", "happy", "neutral", "sad"]
         for i, label in enumerate(emotions):
             st.write(f"- **{label.capitalize()}**: {scores[i]*100:.2f}%")

     extractor, model = load_model()
     waveform, sr = torchaudio.load(audio_path)
+    # 💡 Trim audio to 30 seconds max to avoid slowdowns
+    max_duration_sec = 30
+    max_samples = sr * max_duration_sec
+    if waveform.size(1) > max_samples:
+        waveform = waveform[:, :max_samples]
+    duration_sec = waveform.size(1) / sr
+    # Run model
     inputs = extractor(waveform[0].numpy(), sampling_rate=16000, return_tensors="pt")
     with torch.no_grad():
         logits = model(**inputs).logits[0]
     emotion, scores = get_emotion_label(logits)
+    return emotion.capitalize(), scores, duration_sec
+# Streamlit UI
 st.set_page_config(page_title="🎧 Audio Emotion Detector", layout="centered")
 st.title("🎧 Audio Emotion Analysis (Wav2Vec2)")
 uploaded_file = st.file_uploader("Upload an MP3 or WAV audio file", type=["mp3", "wav"])
 if uploaded_file:
+    st.audio(uploaded_file, format='audio/wav')
     with st.spinner("Analyzing emotion..."):
         wav_path = convert_to_wav(uploaded_file)
+        emotion, scores, duration_sec = analyze_emotion(wav_path)
+        st.subheader("⏱ Audio Info:")
+        st.write(f"Duration analyzed: **{duration_sec:.2f} seconds**")
+        st.subheader("🧠 Detected Emotion:")
+        st.markdown(f"**{emotion}**")
+        st.subheader("🎯 Confidence Scores:")
         emotions = ["angry", "happy", "neutral", "sad"]
         for i, label in enumerate(emotions):
             st.write(f"- **{label.capitalize()}**: {scores[i]*100:.2f}%")