Spaces:

Tiberiw
/

thesis

Paused

App Files Files Community

Tiberiw commited on Jul 2

Commit

9c92b55

1 Parent(s): 363a45c

Add application file

Browse files

Files changed (3) hide show

Dockerfile +31 -0
app.py +136 -0
requirements.txt +74 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,31 @@

+# Hugging Face Spaces Dockerfile for GPU deployment
+FROM python:3.10-slim
+# Set environment variables
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    ffmpeg \
+    libsndfile1 \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+# Set working directory
+WORKDIR /app
+# Copy requirements and install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install torch==2.1.2+cu118 torchaudio==2.1.2+cu118 torchvision==0.16.2+cu118 --index-url https://download.pytorch.org/whl/cu118 && \
+    pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY . .
+# Expose port (Hugging Face Spaces uses port 7860 by default)
+EXPOSE 7860
+# Command to run the application
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,136 @@

+import os
+import tempfile
+import torch
+from contextlib import asynccontextmanager
+from fastapi import FastAPI, UploadFile, File, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline
+from peft import PeftModel
+import librosa
+from pydub import AudioSegment
+from dotenv import load_dotenv
+transcriber = None
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    global transcriber
+    device = "cuda:0" if torch.cuda.is_available() else "cpu"
+    torch_dtype = torch.float16 if device == "cuda:0" else torch.float32
+    load_dotenv(override=True)  # Load environment variables from .env file
+    print("After load_dotenv, HF_TOKEN:", os.getenv("HF_TOKEN"))
+    hf_token = os.getenv("HF_TOKEN")
+# Add a check to ensure the token is provided
+    if hf_token is None:
+        raise ValueError("Hugging Face token not found. Please set the HUGGING_FACE_TOKEN environment variable.")
+    BASE_MODEL_PATH = "openai/whisper-base"
+    # BASE_MODEL_PATH = "openai/whisper-large-v3-turbo"
+    ADAPTER_AND_PROCESSOR_PATH = "Tiberiw/whisper-base-lora-finetuned-custom-v1"
+    # ADAPTER_AND_PROCESSOR_PATH = "Tiberiw/whisper-large-turbo-lora-finetuned-v3"
+    processor = WhisperProcessor.from_pretrained(ADAPTER_AND_PROCESSOR_PATH, token=hf_token)
+    base_model = WhisperForConditionalGeneration.from_pretrained(BASE_MODEL_PATH, torch_dtype=torch_dtype)
+    final_model = PeftModel.from_pretrained(base_model, ADAPTER_AND_PROCESSOR_PATH, token=hf_token)
+    transcriber = pipeline(
+        "automatic-speech-recognition",
+        model=final_model,
+        torch_dtype=torch_dtype,
+        device=device,
+        tokenizer=processor.tokenizer,
+        feature_extractor=processor.feature_extractor,
+    )
+    print("Model loaded successfully!")
+    yield
+app = FastAPI(lifespan=lifespan)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+def load_audio(path: str):
+    try:
+        audio_array, _ = librosa.load(path, sr=16000, mono=True)
+        return audio_array
+    except Exception as e:
+        import traceback
+        msg = f"Error processing audio failed to load audio: {str(e)}\n{traceback.format_exc()}"
+        if any(err in str(e) for err in ["NoBackendError", "SoundFileNotOpen", "Unsupported format", "AudioreadError"]):
+            raise HTTPException(status_code=415, detail=msg + "\nSupported formats: ( WEBM, WAV, MP3, FLAC)")
+        raise HTTPException(status_code=500, detail=msg)
+@app.post("/api/transcription")
+async def transcribe_pipeline(file: UploadFile = File(...)):
+    if not file.content_type or not file.content_type.startswith("audio/"):
+        raise HTTPException(status_code=400, detail="Invalid file content type.")
+    print(f"Received file: {file.filename}, Content-Type: {file.content_type}")
+    original_temp_path = None  # Path to the originally uploaded file
+    input_for_librosa_path = None    # Path to the file librosa will load (either original or converted)
+    try:
+        # 1. Save the uploaded file to a temporary location first.
+        # This gives us a file path to work with, which is often easier for external tools like FFmpeg via pydub.
+        file_suffix = ".unknown"
+        if file.filename:
+            _, ext = os.path.splitext(file.filename)
+            if ext:
+                file_suffix = ext
+        print(f"Saving uploaded file to temporary location with suffix '{file_suffix}'")
+        with tempfile.NamedTemporaryFile(delete=False, suffix=file_suffix) as temp_orig_file:
+            content = await file.read()
+            temp_orig_file.write(content)
+            original_temp_path = temp_orig_file.name
+        # It's good practice to close the UploadFile object after reading its content
+        await file.close()
+        if file.content_type.startswith("audio/webm"):
+            print(f"Conversion needed for '{original_temp_path}' (ContentType: {file.content_type}) to MP3.")
+            # Define a path for the converted MP3 file
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_conv_file:
+                input_for_librosa_path = temp_conv_file.name
+            try:
+                # Load the WebM audio from the original temporary file using pydub
+                # pydub's from_file can often infer the format, or you can specify format="webm" or format="opus"
+                audio = AudioSegment.from_file(original_temp_path) # pydub will use FFmpeg here
+                # Export as MP3 to the new temporary file path
+                audio.export(input_for_librosa_path, format="mp3")
+                print(f"Successfully converted '{original_temp_path}' to MP3: '{input_for_librosa_path}'")
+            except Exception as e:
+                import traceback
+                err_msg = f"Audio conversion failed: {str(e)}\n{traceback.format_exc()}"
+                if "ffmpeg" in str(e).lower():
+                        err_msg += "\nEnsure FFmpeg is installed and in PATH."
+                raise HTTPException(status_code=500, detail=err_msg)
+        else:
+            input_for_librosa_path = original_temp_path
+            original_temp_path = None
+        audio_array = load_audio(input_for_librosa_path)
+        result = transcriber(audio_array.copy(), return_timestamps=True)
+        return {"transcription": result["text"]}
+    except HTTPException:
+        raise
+    except Exception as e:
+        import traceback
+        raise HTTPException(status_code=500, detail=f"Unexpected error : {str(e)}\n{traceback.format_exc()}")
+    finally:
+        for f in (original_temp_path, input_for_librosa_path):
+            if f and os.path.exists(f):
+                os.unlink(f)

requirements.txt ADDED Viewed

	@@ -0,0 +1,74 @@

+accelerate==1.7.0
+annotated-types==0.7.0
+anyio==4.9.0
+audioread==3.0.1
+certifi==2025.6.15
+cffi==1.17.1
+charset-normalizer==3.4.2
+click==8.2.1
+colorama==0.4.6
+decorator==5.2.1
+dnspython==2.7.0
+email_validator==2.2.0
+exceptiongroup==1.3.0
+fastapi==0.115.12
+fastapi-cli==0.0.7
+filelock==3.18.0
+fsspec==2025.5.1
+h11==0.16.0
+httpcore==1.0.9
+httptools==0.6.4
+httpx==0.28.1
+huggingface-hub==0.33.0
+idna==3.10
+Jinja2==3.1.6
+joblib==1.5.1
+lazy_loader==0.4
+librosa==0.11.0
+llvmlite==0.44.0
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+mdurl==0.1.2
+mpmath==1.3.0
+msgpack==1.1.1
+networkx==3.4.2
+numba==0.61.2
+numpy<2
+packaging==25.0
+peft==0.15.2
+pillow==11.0.0
+platformdirs==4.3.8
+pooch==1.8.2
+psutil==7.0.0
+pycparser==2.22
+pydantic==2.11.7
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments==2.19.1
+python-dotenv==1.1.0
+python-multipart==0.0.20
+PyYAML==6.0.2
+regex==2024.11.6
+requests==2.32.4
+rich==14.0.0
+rich-toolkit==0.14.7
+safetensors==0.5.3
+scikit-learn==1.7.0
+scipy==1.15.3
+shellingham==1.5.4
+sniffio==1.3.1
+soundfile==0.13.1
+soxr==0.5.0.post1
+starlette==0.46.2
+sympy==1.14.0
+threadpoolctl==3.6.0
+tokenizers==0.21.1
+tqdm==4.67.1
+transformers==4.52.4
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+urllib3==2.4.0
+uvicorn==0.34.3
+watchfiles==1.0.5
+websockets==15.0.1