Spaces:
Sleeping
Sleeping
import gradio as gr | |
import numpy as np | |
import cv2 | |
from transformers import AutoImageProcessor, SiglipForImageClassification | |
from collections import Counter | |
# Load model | |
model_name = "prithivMLmods/Alphabet-Sign-Language-Detection" | |
processor = AutoImageProcessor.from_pretrained(model_name) | |
model = SiglipForImageClassification.from_pretrained(model_name) | |
def predict_from_video(video_path): | |
cap = cv2.VideoCapture(video_path) | |
predictions = [] | |
while cap.isOpened(): | |
ret, frame = cap.read() | |
if not ret: | |
break | |
# Convert frame to RGB | |
img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
# Run model | |
inputs = processor(images=img, return_tensors="pt") | |
outputs = model(**inputs) | |
probs = outputs.logits.softmax(dim=-1)[0] | |
idx = int(probs.argmax()) | |
label = model.config.id2label[idx] | |
predictions.append(label) | |
cap.release() | |
# Majority vote | |
if predictions: | |
most_common = Counter(predictions).most_common(1)[0] | |
return f"Predicted Letter: {most_common[0]} (appeared {most_common[1]} times)" | |
else: | |
return "No frames processed." | |
iface = gr.Interface( | |
fn=predict_from_video, | |
inputs=gr.Video(), # ✅ no 'type' argument in Gradio 5.x | |
outputs="text", | |
title="ASL Alphabet Recognition from Video", | |
description="Upload a short video of your ASL sign (A–Z). The system will analyze frames and predict the most likely letter." | |
) | |
iface.launch() | |