ASL_Sign_Lang / app.py
Haryiank's picture
Create app.py
4acc044 verified
import gradio as gr
import numpy as np
import cv2
from transformers import AutoImageProcessor, SiglipForImageClassification
from collections import Counter
# Load model
model_name = "prithivMLmods/Alphabet-Sign-Language-Detection"
processor = AutoImageProcessor.from_pretrained(model_name)
model = SiglipForImageClassification.from_pretrained(model_name)
def predict_from_video(video_path):
cap = cv2.VideoCapture(video_path)
predictions = []
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
# Convert frame to RGB
img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Run model
inputs = processor(images=img, return_tensors="pt")
outputs = model(**inputs)
probs = outputs.logits.softmax(dim=-1)[0]
idx = int(probs.argmax())
label = model.config.id2label[idx]
predictions.append(label)
cap.release()
# Majority vote
if predictions:
most_common = Counter(predictions).most_common(1)[0]
return f"Predicted Letter: {most_common[0]} (appeared {most_common[1]} times)"
else:
return "No frames processed."
iface = gr.Interface(
fn=predict_from_video,
inputs=gr.Video(), # ✅ no 'type' argument in Gradio 5.x
outputs="text",
title="ASL Alphabet Recognition from Video",
description="Upload a short video of your ASL sign (A–Z). The system will analyze frames and predict the most likely letter."
)
iface.launch()