Kevin King commited on
Commit
4b260d9
Β·
1 Parent(s): 0e9383a

Refactor Streamlit app for audio transcription and remove facial emotion recognition logic

Browse files
requirements.txt CHANGED
@@ -1,6 +1,12 @@
 
 
 
1
  streamlit==1.35.0
2
- Pillow==10.3.0
3
- numpy==1.26.4
4
- deepface==0.0.94
5
- tensorflow-cpu==2.16.1
6
- tf-keras==2.16.0
 
 
 
 
1
+ --extra-index-url https://download.pytorch.org/whl/cpu
2
+
3
+ # Core app library
4
  streamlit==1.35.0
5
+
6
+ # AI libraries for this test
7
+ openai-whisper==20231117
8
+ torch==2.7.0
9
+ torchaudio==2.7.0
10
+
11
+ # Utility for handling audio files
12
+ soundfile
src/streamlit_app.py CHANGED
@@ -1,56 +1,60 @@
1
  import os
2
  import streamlit as st
3
-
4
- # Point the cache directory to the guaranteed writable /tmp folder
5
- os.environ['DEEPFACE_HOME'] = '/tmp/.deepface'
6
-
7
- from PIL import Image
8
- import numpy as np
9
- from deepface import DeepFace
10
  import logging
11
- import cv2
 
 
 
12
 
13
  # --- Page Configuration ---
14
  st.set_page_config(
15
- page_title="FER Test",
16
- page_icon="πŸ˜€",
17
  layout="centered"
18
  )
19
 
20
- st.title("Step 1: Facial Emotion Recognition (FER) Test")
21
- st.write("Upload an image with a face to test the DeepFace library.")
22
 
23
  # --- Logger Configuration ---
24
  logging.basicConfig(level=logging.INFO)
25
- logging.getLogger('deepface').setLevel(logging.ERROR)
 
 
 
 
 
 
 
 
 
 
26
 
27
 
28
  # --- UI and Processing Logic ---
29
- uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
30
 
31
  if uploaded_file is not None:
32
- pil_image = Image.open(uploaded_file)
33
- numpy_image = np.array(pil_image)
34
- image_bgr = cv2.cvtColor(numpy_image, cv2.COLOR_RGB2BGR)
 
35
 
36
- st.image(pil_image, caption="Image Uploaded", use_column_width=True)
37
 
38
- with st.spinner("Analyzing image for emotion..."):
39
  try:
40
- # This will now download its models to /tmp/.deepface
41
- analysis = DeepFace.analyze(
42
- img_path=image_bgr,
43
- actions=['emotion'],
44
- enforce_detection=False,
45
- silent=True
46
- )
47
-
48
- if isinstance(analysis, list) and len(analysis) > 0:
49
- dominant_emotion = analysis[0]['dominant_emotion']
50
- st.success(f"Dominant Emotion Detected: **{dominant_emotion.capitalize()}**")
51
- st.write(analysis[0]['emotion'])
52
- else:
53
- st.warning("No face detected in the image.")
54
 
55
  except Exception as e:
56
- st.error(f"An error occurred during analysis: {e}")
 
 
 
 
1
  import os
2
  import streamlit as st
3
+ import whisper
4
+ import tempfile
 
 
 
 
 
5
  import logging
6
+
7
+ # --- Set cache directory for models ---
8
+ # Point the cache to a writable directory in the cloud environment
9
+ os.environ['HF_HOME'] = '/tmp/huggingface'
10
 
11
  # --- Page Configuration ---
12
  st.set_page_config(
13
+ page_title="Audio Transcription Test",
14
+ page_icon="🎀",
15
  layout="centered"
16
  )
17
 
18
+ st.title("Step 2: Audio Transcription (Whisper) Test")
19
+ st.write("Upload a short audio file (.wav, .mp3, .m4a) to test the Whisper model.")
20
 
21
  # --- Logger Configuration ---
22
  logging.basicConfig(level=logging.INFO)
23
+ logging.getLogger('huggingface_hub').setLevel(logging.WARNING)
24
+
25
+ # --- Model Loading ---
26
+ @st.cache_resource
27
+ def load_whisper_model():
28
+ with st.spinner("Loading Whisper model... This may take a moment."):
29
+ # Load the base model and specify a writable cache directory
30
+ model = whisper.load_model("base", download_root="/tmp/whisper_cache")
31
+ return model
32
+
33
+ whisper_model = load_whisper_model()
34
 
35
 
36
  # --- UI and Processing Logic ---
37
+ uploaded_file = st.file_uploader("Choose an audio file...", type=["wav", "mp3", "m4a"])
38
 
39
  if uploaded_file is not None:
40
+ # Save the uploaded file to a temporary location
41
+ with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tfile:
42
+ tfile.write(uploaded_file.read())
43
+ temp_audio_path = tfile.name
44
 
45
+ st.audio(temp_audio_path)
46
 
47
+ with st.spinner("Transcribing audio..."):
48
  try:
49
+ # Transcribe the audio file using Whisper
50
+ result = whisper_model.transcribe(temp_audio_path, fp16=False)
51
+ transcribed_text = result.get("text", "No text transcribed.")
52
+
53
+ st.success("Transcription Complete!")
54
+ st.markdown(f"> {transcribed_text}")
 
 
 
 
 
 
 
 
55
 
56
  except Exception as e:
57
+ st.error(f"An error occurred during transcription: {e}")
58
+ finally:
59
+ # Clean up the temporary file
60
+ os.unlink(temp_audio_path)
src/streamlit_app_imageFER.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+
4
+ # Point the cache directory to the guaranteed writable /tmp folder
5
+ os.environ['DEEPFACE_HOME'] = '/tmp/.deepface'
6
+
7
+ from PIL import Image
8
+ import numpy as np
9
+ from deepface import DeepFace
10
+ import logging
11
+ import cv2
12
+
13
+ # --- Page Configuration ---
14
+ st.set_page_config(
15
+ page_title="FER Test",
16
+ page_icon="πŸ˜€",
17
+ layout="centered"
18
+ )
19
+
20
+ st.title("Step 1: Facial Emotion Recognition (FER) Test")
21
+ st.write("Upload an image with a face to test the DeepFace library.")
22
+
23
+ # --- Logger Configuration ---
24
+ logging.basicConfig(level=logging.INFO)
25
+ logging.getLogger('deepface').setLevel(logging.ERROR)
26
+
27
+
28
+ # --- UI and Processing Logic ---
29
+ uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
30
+
31
+ if uploaded_file is not None:
32
+ pil_image = Image.open(uploaded_file)
33
+ numpy_image = np.array(pil_image)
34
+ image_bgr = cv2.cvtColor(numpy_image, cv2.COLOR_RGB2BGR)
35
+
36
+ st.image(pil_image, caption="Image Uploaded", use_column_width=True)
37
+
38
+ with st.spinner("Analyzing image for emotion..."):
39
+ try:
40
+ # This will now download its models to /tmp/.deepface
41
+ analysis = DeepFace.analyze(
42
+ img_path=image_bgr,
43
+ actions=['emotion'],
44
+ enforce_detection=False,
45
+ silent=True
46
+ )
47
+
48
+ if isinstance(analysis, list) and len(analysis) > 0:
49
+ dominant_emotion = analysis[0]['dominant_emotion']
50
+ st.success(f"Dominant Emotion Detected: **{dominant_emotion.capitalize()}**")
51
+ st.write(analysis[0]['emotion'])
52
+ else:
53
+ st.warning("No face detected in the image.")
54
+
55
+ except Exception as e:
56
+ st.error(f"An error occurred during analysis: {e}")