import gradio as gr import numpy as np import matplotlib.pyplot as plt import time import os from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer import pandas as pd from sklearn.feature_extraction.text import CountVectorizer import nltk from nltk.tokenize import word_tokenize import re # Download necessary NLTK data try: # Make the download more reliable by specifying download directory nltk_data_dir = '/home/user/nltk_data' os.makedirs(nltk_data_dir, exist_ok=True) # Download all required resources nltk.download('punkt', download_dir=nltk_data_dir) nltk.download('averaged_perceptron_tagger', download_dir=nltk_data_dir) # Set the data path to include our custom directory nltk.data.path.insert(0, nltk_data_dir) except Exception as e: print(f"NLTK download issue: {e}") # Fallback simple approach if the directory approach fails nltk.download('punkt') nltk.download('averaged_perceptron_tagger') # Add error handling around model loading try: # Load Whisper for ASR asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3") # Load Grammar Scoring Model (CoLA) cola_model = AutoModelForSequenceClassification.from_pretrained("textattack/roberta-base-CoLA") cola_tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-CoLA") grammar_pipeline = pipeline("text-classification", model=cola_model, tokenizer=cola_tokenizer) # Load Grammar Correction Model (T5) correction_pipeline = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction") # Add sentiment analysis sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english") # Add fluency analysis (using BERT) fluency_pipeline = pipeline("text-classification", model="textattack/bert-base-uncased-CoLA") # Set variables to track loaded models MODELS_LOADED = True except Exception as e: print(f"Error loading models: {e}") # Set variable to track failed model loading MODELS_LOADED = False # Common English filler words to detect FILLER_WORDS = ["um", "uh", "like", "you know", "actually", "basically", "literally", "sort of", "kind of", "i mean", "so", "well", "right", "okay", "yeah"] def count_filler_words(text): """Count filler words in the text""" text = text.lower() count = 0 for word in FILLER_WORDS: count += len(re.findall(r'\b' + word + r'\b', text)) return count, count / max(len(text.split()), 1) # Count and ratio def calculate_speaking_rate(text, duration): """Calculate words per minute""" if duration <= 0: return 0 words = len(text.split()) return (words / duration) * 60 # Words per minute def analyze_vocabulary_richness(text): """Analyze vocabulary richness""" # Split text by simple regex instead of using word_tokenize to avoid NLTK issues try: # Try using word_tokenize first words = word_tokenize(text.lower()) except LookupError: # Fallback to simple regex-based tokenization if NLTK fails words = re.findall(r'\b\w+\b', text.lower()) if not words: return 0, {} # Vocabulary richness (unique words / total words) unique_words = set(words) richness = len(unique_words) / len(words) # Use simple POS tagging or skip it if NLTK fails try: pos_tags = nltk.pos_tag(words) pos_counts = {} for _, tag in pos_tags: pos_counts[tag] = pos_counts.get(tag, 0) + 1 except Exception: # Return simplified count if POS tagging fails pos_counts = {"WORD": len(words), "UNIQUE": len(unique_words)} return richness, pos_counts def analyze_sentence_complexity(text): """Analyze sentence complexity with error handling""" try: # Simple sentence splitting by punctuation sentences = re.split(r'[.!?]+', text) sentences = [s.strip() for s in sentences if s.strip()] if not sentences: return 0, 0 # Average words per sentence words_per_sentence = [len(s.split()) for s in sentences] avg_words = sum(words_per_sentence) / len(sentences) # Sentence length variation (standard deviation) sentence_length_variation = np.std(words_per_sentence) if len(sentences) > 1 else 0 return avg_words, sentence_length_variation except Exception: # In case of any error, return simple defaults word_count = len(text.split()) # Assume approximately 15 words per sentence if we can't detect return word_count / max(1, text.count('.') + text.count('!') + text.count('?')), 0 def create_detailed_feedback(transcription, grammar_score, corrected_text, sentiment, fluency, filler_ratio, speaking_rate, vocabulary_richness, avg_words_per_sentence): """Create detailed feedback based on all metrics""" feedback = [] # Grammar feedback if "acceptable" in grammar_score.lower(): feedback.append("✅ Your grammar is good!") else: feedback.append("❗ Your grammar needs improvement. Check the corrections provided.") # Fluency feedback if fluency > 0.7: feedback.append("✅ Your speech flows naturally.") else: feedback.append("❗ Work on making your speech more fluid and natural.") # Filler words feedback if filler_ratio > 0.1: feedback.append(f"❗ You used too many filler words ({filler_ratio:.1%} of your words).") else: feedback.append("✅ Good job minimizing filler words!") # Speaking rate feedback if 120 <= speaking_rate <= 160: feedback.append(f"✅ Your speaking pace is good ({speaking_rate:.0f} words/min).") elif speaking_rate < 120: feedback.append(f"❗ Try speaking a bit faster ({speaking_rate:.0f} words/min is slower than ideal).") else: feedback.append(f"❗ Try speaking a bit slower ({speaking_rate:.0f} words/min is faster than ideal).") # Vocabulary feedback if vocabulary_richness > 0.6: feedback.append("✅ Excellent vocabulary diversity!") elif vocabulary_richness > 0.4: feedback.append("✅ Good vocabulary usage.") else: feedback.append("❗ Try using more varied vocabulary.") # Sentence complexity feedback if 10 <= avg_words_per_sentence <= 20: feedback.append("✅ Good sentence structure and length.") elif avg_words_per_sentence < 10: feedback.append("❗ Try using more complex sentences occasionally.") else: feedback.append("❗ Your sentences are quite long. Consider varying your sentence length.") # Overall sentiment feedback if sentiment == "POSITIVE": feedback.append("✅ Your tone is positive and engaging.") else: feedback.append("ℹ️ Your tone is neutral/negative. Consider if this matches your intent.") return "\n".join(feedback) def process_audio(audio): if audio is None: return "No audio provided.", "", "", "", None, "" start_time = time.time() # Check if models loaded properly if 'MODELS_LOADED' in globals() and not MODELS_LOADED: return ("Models failed to load. Please check the logs for details.", "Error", "Error", "Unable to process audio due to model loading issues.", None, "## Error\nThe required models couldn't be loaded. Please check the system configuration.") try: # Get audio duration (assuming audio[1] contains the sample rate) sample_rate = 16000 # Default if we can't determine if isinstance(audio, tuple) and len(audio) > 1: sample_rate = audio[1] # For file uploads, we need to handle differently duration = 0 if isinstance(audio, str): # This is a file path try: import librosa y, sr = librosa.load(audio, sr=None) duration = librosa.get_duration(y=y, sr=sr) except Exception as e: print(f"Error getting duration: {e}") # Estimate duration based on file size try: file_size = os.path.getsize(audio) # Rough estimate: 16kHz, 16-bit audio is about 32KB per second duration = file_size / 32000 except: duration = 10 # Default to 10 seconds if we can't determine else: # Assuming a tuple with (samples, sample_rate) try: duration = len(audio[0]) / sample_rate if sample_rate > 0 else 0 except: duration = 10 # Default duration # Step 1: Transcription try: transcription_result = asr_pipeline(audio) transcription = transcription_result["text"] except Exception as e: print(f"Transcription error: {e}") return ("Error in speech recognition. Please try again.", "Error", "Error", "There was an error processing your audio.", None, f"## Error\nError in speech recognition: {str(e)[:100]}...") if not transcription or transcription.strip() == "": return ("No speech detected. Please speak louder or check your microphone.", "N/A", "N/A", "No speech detected in the audio.", None, "## No Speech Detected\nPlease try recording again with clearer speech.") # Step 2: Grammar Scoring try: score_output = grammar_pipeline(transcription)[0] label = score_output["label"] confidence = score_output["score"] grammar_score = f"{label} ({confidence:.2f})" except Exception as e: print(f"Grammar scoring error: {e}") label = "UNKNOWN" confidence = 0.5 grammar_score = "Could not analyze grammar" # Step 3: Grammar Correction try: corrected = correction_pipeline(transcription, max_length=128)[0]["generated_text"] except Exception as e: print(f"Grammar correction error: {e}") corrected = transcription # Step 4: Sentiment Analysis try: sentiment_result = sentiment_pipeline(transcription)[0] sentiment = sentiment_result["label"] sentiment_score = sentiment_result["score"] except Exception as e: print(f"Sentiment analysis error: {e}") sentiment = "NEUTRAL" sentiment_score = 0.5 # Step 5: Fluency Analysis try: fluency_result = fluency_pipeline(transcription)[0] fluency_score = fluency_result["score"] if fluency_result["label"] == "acceptable" else 1 - fluency_result["score"] except Exception as e: print(f"Fluency analysis error: {e}") fluency_score = 0.5 # Step 6: Filler Words Analysis try: filler_count, filler_ratio = count_filler_words(transcription) except Exception as e: print(f"Filler word analysis error: {e}") filler_count, filler_ratio = 0, 0 # Step 7: Speaking Rate try: speaking_rate = calculate_speaking_rate(transcription, duration) except Exception as e: print(f"Speaking rate calculation error: {e}") speaking_rate = 0 # Step 8: Vocabulary Richness try: vocab_richness, pos_counts = analyze_vocabulary_richness(transcription) except Exception as e: print(f"Vocabulary analysis error: {e}") vocab_richness, pos_counts = 0.5, {"N/A": 1} # Step 9: Sentence Complexity try: avg_words, sentence_variation = analyze_sentence_complexity(transcription) except Exception as e: print(f"Sentence complexity analysis error: {e}") avg_words, sentence_variation = 0, 0 # Create feedback try: feedback = create_detailed_feedback( transcription, grammar_score, corrected, sentiment, fluency_score, filler_ratio, speaking_rate, vocab_richness, avg_words ) except Exception as e: print(f"Feedback creation error: {e}") feedback = "Error generating detailed feedback." # Create metrics visualization try: fig, ax = plt.subplots(figsize=(10, 6)) # Define metrics for radar chart categories = ['Grammar', 'Fluency', 'Vocabulary', 'Speaking Rate', 'Clarity'] # Normalize scores between 0 and 1 grammar_norm = confidence if label == "acceptable" else 1 - confidence speaking_rate_norm = max(0, min(1, 1 - abs((speaking_rate - 140) / 100))) # Optimal around 140 wpm values = [ grammar_norm, fluency_score, vocab_richness, speaking_rate_norm, 1 - filler_ratio # Lower filler ratio is better ] # Complete the loop for the radar chart values += values[:1] categories += categories[:1] # Convert to radians and plot angles = np.linspace(0, 2*np.pi, len(categories), endpoint=False).tolist() angles += angles[:1] ax.plot(angles, values, linewidth=2, linestyle='solid') ax.fill(angles, values, alpha=0.25) ax.set_yticklabels([]) ax.set_xticks(angles[:-1]) ax.set_xticklabels(categories[:-1]) ax.grid(True) plt.title('Speaking Performance Metrics', size=15, color='navy', y=1.1) except Exception as e: print(f"Visualization error: {e}") # Create a simple error figure fig, ax = plt.subplots(figsize=(6, 3)) ax.text(0.5, 0.5, "Error creating visualization", horizontalalignment='center', verticalalignment='center') ax.axis('off') # Create detailed analysis text processing_time = time.time() - start_time try: pos_counts_str = ', '.join([f"{k}: {v}" for k, v in sorted(pos_counts.items(), key=lambda x: x[1], reverse=True)[:5]]) except: pos_counts_str = "N/A" detailed_analysis = f""" ## Detailed Speech Analysis **Processing Time:** {processing_time:.2f} seconds **Audio Duration:** {duration:.2f} seconds ### Metrics: - **Grammar Score:** {confidence:.2f} ({label}) - **Fluency Score:** {fluency_score:.2f} - **Speaking Rate:** {speaking_rate:.1f} words per minute - **Vocabulary Richness:** {vocab_richness:.2f} (higher is better) - **Filler Words:** {filler_count} occurrences ({filler_ratio:.1%} of speech) - **Avg Words Per Sentence:** {avg_words:.1f} - **Sentiment:** {sentiment} ({sentiment_score:.2f}) ### Word Types Used: {pos_counts_str} """ return transcription, grammar_score, corrected, feedback, fig, detailed_analysis except Exception as e: print(f"Unexpected error in process_audio: {e}") return ("An unexpected error occurred during processing.", "Error", "Error", "There was an unexpected error processing your audio.", None, f"## Unexpected Error\n\nAn error occurred: {str(e)[:200]}...") # Create theme theme = gr.themes.Soft( primary_hue="blue", secondary_hue="indigo", ).set( button_primary_background_fill="*primary_500", button_primary_background_fill_hover="*primary_600", button_primary_text_color="white", block_title_text_weight="600", block_border_width="2px", block_shadow="0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1)", ) with gr.Blocks(theme=theme, css=""" .container { max-width: 1000px; margin: auto; } .header { text-align: center; margin-bottom: 20px; } .header h1 { color: #1e40af; font-size: 2.5rem; } .header p { color: #6b7280; font-size: 1.1rem; } .footer { text-align: center; margin-top: 30px; color: #6b7280; } .tips-box { background-color: #f0f9ff; border-radius: 10px; padding: 15px; margin: 10px 0; } .score-card { border: 2px solid #dbeafe; border-radius: 10px; padding: 10px; } """) as demo: gr.HTML("""

🎙️ Advanced ENGLISH Speaking Assessment

Record or upload your speech to receive comprehensive feedback on your English speaking skills

""") with gr.Row(): with gr.Column(): audio_input = gr.Audio( sources=["microphone", "upload"], type="filepath", label="🎤 Speak or Upload Audio" ) with gr.Accordion("Speaking Tips", open=False): gr.HTML("""

Tips for Better Results:

""") submit_btn = gr.Button("Analyze Speech", variant="primary") with gr.Row(): with gr.Column(): transcription_output = gr.Textbox(label="📝 Transcription", lines=3) corrected_output = gr.Textbox(label="✍️ Grammar Correction", lines=3) grammar_score_output = gr.Textbox(label="✅ Grammar Score") with gr.Row(): with gr.Column(): metrics_chart = gr.Plot(label="Performance Metrics") with gr.Column(): feedback_output = gr.Textbox(label="💬 Feedback", lines=8) with gr.Accordion("Detailed Analysis", open=False): detailed_analysis = gr.Markdown() gr.HTML(""" """) submit_btn.click( fn=process_audio, inputs=[audio_input], outputs=[ transcription_output, grammar_score_output, corrected_output, feedback_output, metrics_chart, detailed_analysis ] ) if __name__ == "__main__": demo.launch()