import os import sys os.environ["GRADIO_ANALYTICS_ENABLED"] = "False" # Import libraries import whisper import gradio as gr import torch from transformers import BertTokenizer, BertForSequenceClassification, pipeline from app.questions import get_question # patch(niranjan) try: # original method original_method = gr.Blocks.get_api_info # Create a safer version of the method that catches the specific error def safe_get_api_info(self): try: return original_method(self) except TypeError as e: print(f"API info generation error suppressed: {str(e)}", file=sys.stderr) return {} # Return empty api gr.Blocks.get_api_info = safe_get_api_info print("Applied API info generation patch", file=sys.stderr) except Exception as e: print(f"Failed to apply patch: {str(e)}", file=sys.stderr) # Load models whisper_model = whisper.load_model("small") confidence_model = BertForSequenceClassification.from_pretrained('RiteshAkhade/final_confidence') confidence_tokenizer = BertTokenizer.from_pretrained('RiteshAkhade/final_confidence') context_model = BertForSequenceClassification.from_pretrained('RiteshAkhade/context_model') context_tokenizer = BertTokenizer.from_pretrained('RiteshAkhade/context_model') emotion_pipe = pipeline("text-classification", model="bhadresh-savani/distilbert-base-uncased-emotion", top_k=1) # Emotion map with labels and emojis interview_emotion_map = { "joy": ("Confident", "🙂"), "fear": ("Nervous", "😨"), "sadness": ("Uncertain", "🙁"), "anger": ("Frustrated", "😠"), "surprise": ("Curious", "😮"), "neutral": ("Calm", "😐"), "disgust": ("Disengaged", "😒"), } # Static question sets technical_questions = [get_question(i) for i in range(6)] non_technical_questions = [ "Tell me about yourself.", "What are your strengths and weaknesses?", "Where do you see yourself in 5 years?", "How do you handle stress or pressure?", "Describe a time you faced a conflict and how you resolved it.", "What motivates you to do your best?" ] # Index trackers current_tech_index = 0 current_non_tech_index = 0 # Relevance prediction def predict_relevance(question, answer): if not answer.strip(): return "Irrelevant" inputs = context_tokenizer(question, answer, return_tensors="pt", padding=True, truncation=True) context_model.eval() with torch.no_grad(): outputs = context_model(**inputs) probabilities = torch.softmax(outputs.logits, dim=-1) return "Relevant" if probabilities[0, 1] > 0.5 else "Irrelevant" # Confidence prediction def predict_confidence(question, answer, threshold=0.4): if not isinstance(answer, str) or not answer.strip(): return "Not Confident" inputs = confidence_tokenizer(question, answer, return_tensors="pt", padding=True, truncation=True) confidence_model.eval() with torch.no_grad(): outputs = confidence_model(**inputs) probabilities = torch.softmax(outputs.logits, dim=-1) return "Confident" if probabilities[0, 1].item() > threshold else "Not Confident" # Emotion detection def detect_emotion(answer): if not answer.strip(): return "No Answer", "" result = emotion_pipe(answer) label = result[0][0]["label"].lower() emotion_text, emoji = interview_emotion_map.get(label, ("Unknown", "❓")) return emotion_text, emoji # Question navigation (non-tech) def show_non_tech_question(): global current_non_tech_index return non_technical_questions[current_non_tech_index] def next_non_tech_question(): global current_non_tech_index current_non_tech_index = (current_non_tech_index + 1) % len(non_technical_questions) return non_technical_questions[current_non_tech_index], None, "", "" # Question navigation (tech) def show_tech_question(): global current_tech_index return technical_questions[current_tech_index] def next_tech_question(): global current_tech_index current_tech_index = (current_tech_index + 1) % len(technical_questions) return technical_questions[current_tech_index], None, "", "", "" # Transcribe + analyze (non-technical) def transcribe_and_analyze_non_tech(audio, question): try: audio = whisper.load_audio(audio) audio = whisper.pad_or_trim(audio) mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device) result = whisper.decode(whisper_model, mel, whisper.DecodingOptions(fp16=False)) transcribed_text = result.text emotion_text, emoji = detect_emotion(transcribed_text) return transcribed_text, f"{emotion_text} {emoji}" except Exception as e: return f"Error: {str(e)}", "❓" # Transcribe + analyze (technical) def transcribe_and_analyze_tech(audio, question): try: audio = whisper.load_audio(audio) audio = whisper.pad_or_trim(audio) mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device) result = whisper.decode(whisper_model, mel, whisper.DecodingOptions(fp16=False)) transcribed_text = result.text context_result = predict_relevance(question, transcribed_text) confidence_result = predict_confidence(question, transcribed_text) return transcribed_text, context_result, confidence_result except Exception as e: return f"Error: {str(e)}", "", "" # UI layout with gr.Blocks(css="textarea, .gr-box { font-size: 18px !important; }") as demo: gr.HTML("