Spaces:

adnaan05
/

TruthCheck

Running

App Files Files Community

KhaqanNasir commited on 10 days ago

Commit

5edf4ff

verified ·

1 Parent(s): 9deb27b

Update src/app.py

Browse files

Files changed (1) hide show

src/app.py +209 -147

src/app.py CHANGED Viewed

@@ -4,84 +4,72 @@ import pandas as pd
 import numpy as np
 from pathlib import Path
 import sys
-import plotly.express as px
 import plotly.graph_objects as go
 from transformers import BertTokenizer
 import nltk
 # Download required NLTK data
-try:
-    nltk.data.find('tokenizers/punkt')
-except LookupError:
-    nltk.download('punkt')
-try:
-    nltk.data.find('corpora/stopwords')
-except LookupError:
-    nltk.download('stopwords')
-try:
-    nltk.data.find('tokenizers/punkt_tab')
-except LookupError:
-    nltk.download('punkt_tab')
-try:
-    nltk.data.find('corpora/wordnet')
-except LookupError:
-    nltk.download('wordnet')
 # Add project root to Python path
 project_root = Path(__file__).parent.parent
 sys.path.append(str(project_root))
 from src.models.hybrid_model import HybridFakeNewsDetector
-from src.config.config import *
 from src.data.preprocessor import TextPreprocessor
-# Custom CSS with Poppins font and increased font sizes
 st.markdown("""
 <style>
-    /* Import Google Fonts */
     @import url('https://fonts.googleapis.com/css2?family=Poppins:wght@200;300;400;500;600;700&display=swap');
-    /* Global Styles */
     * {
-        margin: 0;
-        padding: 0;
         box-sizing: border-box;
     }
     .stApp {
-        font-family: 'Poppins', sans-serif;
-        background: #f8fafc;
         min-height: 100vh;
-        color: #1a202c;
     }
-    /* Ensure sidebar is visible */
-    #MainMenu {visibility: visible;}
     footer {visibility: hidden;}
     .stDeployButton {display: none;}
     header {visibility: hidden;}
     .stApp > header {visibility: hidden;}
-    /* Container */
-    .container {
         max-width: 1200px;
         margin: 0 auto;
-        padding: 1.5rem;
     }
-    /* Header */
-    .header {
-        padding: 1.5rem 0;
         text-align: center;
     }
     .header-title {
-        font-size: 2.5rem;
         font-weight: 700;
-        color: #1a202c;
-        display: inline-flex;
-        align-items: center;
-        gap: 0.5rem;
     }
     /* Hero Section */
@@ -90,6 +78,7 @@ st.markdown("""
         align-items: center;
         gap: 2rem;
         margin-bottom: 2rem;
     }
     .hero-left {
@@ -112,16 +101,16 @@ st.markdown("""
     }
     .hero-title {
-        font-size: 3rem;
         font-weight: 700;
-        color: #1a202c;
         margin-bottom: 0.5rem;
     }
     .hero-text {
-        font-size: 1.2rem;
-        color: #4a5568;
-        line-height: 1.5;
         max-width: 450px;
     }
@@ -129,19 +118,20 @@ st.markdown("""
     .about-section {
         margin-bottom: 2rem;
         text-align: center;
     }
     .about-title {
-        font-size: 2.2rem;
         font-weight: 600;
-        color: #1a202c;
         margin-bottom: 0.5rem;
     }
     .about-text {
-        font-size: 1.1rem;
-        color: #4a5568;
-        line-height: 1.5;
         max-width: 600px;
         margin: 0 auto;
     }
@@ -156,8 +146,7 @@ st.markdown("""
         border-radius: 8px !important;
         border: 1px solid #d1d5db !important;
         padding: 1rem !important;
-        font-size: 1.1rem !important;
-        font-family: 'Poppins', sans-serif !important;
         background: #ffffff !important;
         min-height: 150px !important;
         transition: all 0.2s ease !important;
@@ -179,12 +168,12 @@ st.markdown("""
         color: white !important;
         border-radius: 8px !important;
         padding: 0.75rem 2rem !important;
-        font-size: 1.1rem !important;
         font-weight: 600 !important;
-        font-family: 'Poppins', sans-serif !important;
         transition: all 0.2s ease !important;
         border: none !important;
         width: 100% !important;
     }
     .stButton > button:hover {
@@ -197,6 +186,9 @@ st.markdown("""
         margin-top: 1rem;
         padding: 1rem;
         border-radius: 8px;
     }
     .result-card {
@@ -218,7 +210,7 @@ st.markdown("""
     .prediction-badge {
         font-weight: 600;
-        font-size: 1.1rem;
         margin-bottom: 0.5rem;
         display: flex;
         align-items: center;
@@ -228,7 +220,7 @@ st.markdown("""
     .confidence-score {
         font-weight: 600;
         margin-left: auto;
-        font-size: 1.1rem;
     }
     /* Chart Containers */
@@ -236,78 +228,149 @@ st.markdown("""
         padding: 1rem;
         border-radius: 8px;
         margin: 1rem 0;
     }
-    /* Sidebar Styling */
-    .stSidebar {
-        background: #ffffff;
-        border-right: 1px solid #e5e7eb;
     }
-    .stSidebar .sidebar-content {
-        padding: 1rem;
     }
 </style>
 """, unsafe_allow_html=True)
 @st.cache_resource
-def load_model_and_tokenizer():
     """Load the model and tokenizer (cached)."""
-    model = HybridFakeNewsDetector(
-        bert_model_name=BERT_MODEL_NAME,
-        lstm_hidden_size=LSTM_HIDDEN_SIZE,
-        lstm_num_layers=LSTM_NUM_LAYERS,
-        dropout_rate=DROPOUT_RATE
-    )
-    state_dict = torch.load(SAVED_MODELS_DIR / "final_model.pt", map_location=torch.device('cpu'))
-    model_state_dict = model.state_dict()
-    filtered_state_dict = {k: v for k, v in state_dict.items() if k in model_state_dict}
-    model.load_state_dict(filtered_state_dict, strict=False)
-    model.eval()
-    tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME)
-    return model, tokenizer
 @st.cache_resource
-def get_preprocessor():
     """Get the text preprocessor (cached)."""
-    return TextPreprocessor()
-def predict_news(text):
     """Predict if the given news is fake or real."""
     model, tokenizer = load_model_and_tokenizer()
     preprocessor = get_preprocessor()
-    processed_text = preprocessor.preprocess_text(text)
-    encoding = tokenizer.encode_plus(
-        processed_text,
-        add_special_tokens=True,
-        max_length=MAX_SEQUENCE_LENGTH,
-        padding='max_length',
-        truncation=True,
-        return_attention_mask=True,
-        return_tensors='pt'
-    )
-    with torch.no_grad():
-        outputs = model(
-            encoding['input_ids'],
-            encoding['attention_mask']
         )
-        probabilities = torch.softmax(outputs['logits'], dim=1)
-        prediction = torch.argmax(outputs['logits'], dim=1)
-        attention_weights = outputs['attention_weights']
-    attention_weights_np = attention_weights[0].cpu().numpy()
-    return {
-        'prediction': prediction.item(),
-        'label': 'FAKE' if prediction.item() == 1 else 'REAL',
-        'confidence': torch.max(probabilities, dim=1)[0].item(),
-        'probabilities': {
-            'REAL': probabilities[0][0].item(),
-            'FAKE': probabilities[0][1].item()
-        },
-        'attention_weights': attention_weights_np
-    }
-def plot_confidence(probabilities):
     """Plot prediction confidence with simplified styling."""
     fig = go.Figure(data=[
         go.Bar(
             x=list(probabilities.keys()),
@@ -330,8 +393,10 @@ def plot_confidence(probabilities):
     )
     return fig
-def plot_attention(text, attention_weights):
     """Plot attention weights with simplified styling."""
     tokens = text.split()[:20]
     attention_weights = attention_weights[:len(tokens)]
     if isinstance(attention_weights, (list, np.ndarray)):
@@ -358,47 +423,43 @@ def plot_attention(text, attention_weights):
     return fig
 def main():
-    # Header
     st.markdown("""
-    <div class="header">
-        <div class="container">
-            <h1 class="header-title">🛡️ TruthCheck</h1>
-        </div>
     </div>
     """, unsafe_allow_html=True)
     # Hero Section
     st.markdown("""
-    <div class="container">
-        <div class="hero">
-            <div class="hero-left">
-                <h2 class="hero-title">Instant Fake News Detection</h2>
-                <p class="hero-text">
-                    Verify news articles with our AI-powered tool, driven by BERT and BiLSTM for fast and accurate authenticity analysis.
-                </p>
-            </div>
-            <div class="hero-right">
-                <img src="https://images.unsplash.com/photo-1593642532973-d31b97d0fad2?ixlib=rb-4.0.3&auto=format&fit=crop&w=500&q=80" alt="Fake News Detector" onerror="this.src='https://via.placeholder.com/500x300.png?text=Fake+News+Detector'">
-            </div>
         </div>
     </div>
     """, unsafe_allow_html=True)
     # About Section
     st.markdown("""
-    <div class="container">
-        <div class="about-section">
-            <h2 class="about-title">About TruthCheck</h2>
-            <p class="about-text">
-                TruthCheck uses a hybrid BERT-BiLSTM model to detect fake news with high accuracy. Paste an article below for instant analysis.
-            </p>
-        </div>
     </div>
     """, unsafe_allow_html=True)
     # Input Section
-    st.markdown('<div class="container"><div class="input-container">', unsafe_allow_html=True)
     news_text = st.text_area(
         "Analyze a News Article",
         height=150,
@@ -408,18 +469,16 @@ def main():
     st.markdown('</div>', unsafe_allow_html=True)
     # Analyze Button
-    st.markdown('<div class="container">', unsafe_allow_html=True)
     col1, col2, col3 = st.columns([1, 2, 1])
     with col2:
         analyze_button = st.button("🔍 Analyze Now", key="analyze_button")
-    st.markdown('</div>', unsafe_allow_html=True)
     if analyze_button:
         if news_text and len(news_text.strip()) > 10:
             with st.spinner("Analyzing article..."):
-                try:
-                    result = predict_news(news_text)
-                    st.markdown('<div class="container"><div class="results-container">', unsafe_allow_html=True)
                     # Prediction Result
                     col1, col2 = st.columns([1, 1], gap="medium")
@@ -428,14 +487,14 @@ def main():
                             st.markdown(f'''
                             <div class="result-card fake-news">
                                 <div class="prediction-badge">🚨 Fake News Detected <span class="confidence-score">{result["confidence"]:.1%}</span></div>
-                                <p>Our AI has identified this content as likely misinformation based on linguistic patterns and content analysis.</p>
                             </div>
                             ''', unsafe_allow_html=True)
                         else:
                             st.markdown(f'''
                             <div class="result-card real-news">
                                 <div class="prediction-badge">✅ Authentic News <span class="confidence-score">{result["confidence"]:.1%}</span></div>
-                                <p>This content appears to be legitimate based on professional writing style and factual consistency.</p>
                             </div>
                             ''', unsafe_allow_html=True)
@@ -447,15 +506,18 @@ def main():
                     # Attention Analysis
                     st.markdown('<div class="chart-container">', unsafe_allow_html=True)
                     st.plotly_chart(plot_attention(news_text, result['attention_weights']), use_container_width=True)
-                    st.markdown('</div></div></div>', unsafe_allow_html=True)
-                except Exception as e:
-                    st.markdown('<div class="container">', unsafe_allow_html=True)
-                    st.error(f"Error: {str(e)}. Please try again or contact support.")
-                    st.markdown('</div>', unsafe_allow_html=True)
         else:
-            st.markdown('<div class="container">', unsafe_allow_html=True)
             st.error("Please enter a news article (at least 10 words) for analysis.")
-            st.markdown('</div>', unsafe_allow_html=True)
 if __name__ == "__main__":
     main()

 import numpy as np
 from pathlib import Path
 import sys
 import plotly.graph_objects as go
 from transformers import BertTokenizer
 import nltk
 # Download required NLTK data
+nltk_data = {
+    'tokenizers/punkt': 'punkt',
+    'corpora/stopwords': 'stopwords',
+    'tokenizers/punkt_tab': 'punkt_tab',
+    'corpora/wordnet': 'wordnet'
+}
+for resource, package in nltk_data.items():
+    try:
+        nltk.data.find(resource)
+    except LookupError:
+        nltk.download(package)
 # Add project root to Python path
 project_root = Path(__file__).parent.parent
 sys.path.append(str(project_root))
 from src.models.hybrid_model import HybridFakeNewsDetector
+from src.config.config import BERT_MODEL_NAME, LSTM_HIDDEN_SIZE, LSTM_NUM_LAYERS, DROPOUT_RATE, SAVED_MODELS_DIR, MAX_SEQUENCE_LENGTH
 from src.data.preprocessor import TextPreprocessor
+# Custom CSS with Poppins font
 st.markdown("""
 <style>
     @import url('https://fonts.googleapis.com/css2?family=Poppins:wght@200;300;400;500;600;700&display=swap');
     * {
+        font-family: 'Poppins', sans-serif !important;
         box-sizing: border-box;
     }
     .stApp {
+        background: #ffffff;
         min-height: 100vh;
+        color: #1f2a44;
     }
+    #MainMenu {visibility: hidden;}
     footer {visibility: hidden;}
     .stDeployButton {display: none;}
     header {visibility: hidden;}
     .stApp > header {visibility: hidden;}
+    /* Main Container */
+    .main-container {
         max-width: 1200px;
         margin: 0 auto;
+        padding: 1rem 2rem;
     }
+    /* Header Section */
+    .header-section {
         text-align: center;
+        margin-bottom: 2.5rem;
+        padding: 1.5rem 0;
     }
     .header-title {
+        font-size: 2.25rem;
         font-weight: 700;
+        color: #1f2a44;
+        margin: 0;
     }
     /* Hero Section */
         align-items: center;
         gap: 2rem;
         margin-bottom: 2rem;
+        padding: 0 1rem;
     }
     .hero-left {
     }
     .hero-title {
+        font-size: 2.5rem;
         font-weight: 700;
+        color: #1f2a44;
         margin-bottom: 0.5rem;
     }
     .hero-text {
+        font-size: 1rem;
+        color: #6b7280;
+        line-height: 1.6;
         max-width: 450px;
     }
     .about-section {
         margin-bottom: 2rem;
         text-align: center;
+        padding: 0 1rem;
     }
     .about-title {
+        font-size: 1.75rem;
         font-weight: 600;
+        color: #1f2a44;
         margin-bottom: 0.5rem;
     }
     .about-text {
+        font-size: 0.95rem;
+        color: #6b7280;
+        line-height: 1.6;
         max-width: 600px;
         margin: 0 auto;
     }
         border-radius: 8px !important;
         border: 1px solid #d1d5db !important;
         padding: 1rem !important;
+        font-size: 1rem !important;
         background: #ffffff !important;
         min-height: 150px !important;
         transition: all 0.2s ease !important;
         color: white !important;
         border-radius: 8px !important;
         padding: 0.75rem 2rem !important;
+        font-size: 1rem !important;
         font-weight: 600 !important;
         transition: all 0.2s ease !important;
         border: none !important;
         width: 100% !important;
+        max-width: 300px;
     }
     .stButton > button:hover {
         margin-top: 1rem;
         padding: 1rem;
         border-radius: 8px;
+        max-width: 1200px;
+        margin-left: auto;
+        margin-right: auto;
     }
     .result-card {
     .prediction-badge {
         font-weight: 600;
+        font-size: 1rem;
         margin-bottom: 0.5rem;
         display: flex;
         align-items: center;
     .confidence-score {
         font-weight: 600;
         margin-left: auto;
+        font-size: 1rem;
     }
     /* Chart Containers */
         padding: 1rem;
         border-radius: 8px;
         margin: 1rem 0;
+        max-width: 1200px;
+        margin-left: auto;
+        margin-right: auto;
     }
+    /* Footer */
+    .footer {
+        border-top: 1px solid #e5e7eb;
+        padding: 1.5rem 0;
+        text-align: center;
+        max-width: 1200px;
+        margin: 2rem auto 0;
     }
+    /* Responsive Design */
+    @media (max-width: 1024px) {
+        .hero {
+            flex-direction: column;
+            text-align: center;
+        }
+        .hero-right img {
+            max-width: 80%;
+        }
+    }
+    @media (max-width: 768px) {
+        .header-title {
+            font-size: 1.75rem;
+        }
+        .hero-title {
+            font-size: 2rem;
+        }
+        .hero-text {
+            font-size: 0.9rem;
+        }
+        .about-title {
+            font-size: 1.5rem;
+        }
+        .about-text {
+            font-size: 0.9rem;
+        }
+    }
+    @media (max-width: 480px) {
+        .header-title {
+            font-size: 1.5rem;
+        }
+        .hero-title {
+            font-size: 1.75rem;
+        }
+        .hero-text {
+            font-size: 0.85rem;
+        }
+        .about-title {
+            font-size: 1.25rem;
+        }
+        .about-text {
+            font-size: 0.85rem;
+        }
     }
 </style>
 """, unsafe_allow_html=True)
 @st.cache_resource
+def load_model_and_tokenizer() -> tuple[HybridFakeNewsDetector, BertTokenizer] | tuple[None, None]:
     """Load the model and tokenizer (cached)."""
+    try:
+        model = HybridFakeNewsDetector(
+            bert_model_name=BERT_MODEL_NAME,
+            lstm_hidden_size=LSTM_HIDDEN_SIZE,
+            lstm_num_layers=LSTM_NUM_LAYERS,
+            dropout_rate=DROPOUT_RATE
+        )
+        model_path = SAVED_MODELS_DIR / "final_model.pt"
+        if not model_path.exists():
+            st.error("Model file not found. Please ensure 'final_model.pt' is in the models/saved directory.")
+            return None, None
+        state_dict = torch.load(model_path, map_location=torch.device('cpu'))
+        model_state_dict = model.state_dict()
+        filtered_state_dict = {k: v for k, v in state_dict.items() if k in model_state_dict}
+        model.load_state_dict(filtered_state_dict, strict=False)
+        model.eval()
+        tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME)
+        return model, tokenizer
+    except Exception as e:
+        st.error(f"Error loading model or tokenizer: {str(e)}")
+        return None, None
 @st.cache_resource
+def get_preprocessor() -> TextPreprocessor | None:
     """Get the text preprocessor (cached)."""
+    try:
+        return TextPreprocessor()
+    except Exception as e:
+        st.error(f"Error initializing preprocessor: {str(e)}")
+        return None
+def predict_news(text: str) -> dict | None:
     """Predict if the given news is fake or real."""
     model, tokenizer = load_model_and_tokenizer()
+    if model is None or tokenizer is None:
+        return None
     preprocessor = get_preprocessor()
+    if preprocessor is None:
+        return None
+    try:
+        processed_text = preprocessor.preprocess_text(text)
+        encoding = tokenizer.encode_plus(
+            processed_text,
+            add_special_tokens=True,
+            max_length=MAX_SEQUENCE_LENGTH,
+            padding='max_length',
+            truncation=True,
+            return_attention_mask=True,
+            return_tensors='pt'
         )
+        with torch.no_grad():
+            outputs = model(
+                encoding['input_ids'],
+                encoding['attention_mask']
+            )
+            probabilities = torch.softmax(outputs['logits'], dim=1)
+            prediction = torch.argmax(outputs['logits'], dim=1)
+            attention_weights = outputs.get('attention_weights', torch.zeros(1))
+        attention_weights_np = attention_weights[0].cpu().numpy()
+        return {
+            'prediction': prediction.item(),
+            'label': 'FAKE' if prediction.item() == 1 else 'REAL',
+            'confidence': torch.max(probabilities, dim=1)[0].item(),
+            'probabilities': {
+                'REAL': probabilities[0][0].item(),
+                'FAKE': probabilities[0][1].item()
+            },
+            'attention_weights': attention_weights_np
+        }
+    except Exception as e:
+        st.error(f"Prediction error: {str(e)}")
+        return None
+def plot_confidence(probabilities: dict) -> go.Figure:
     """Plot prediction confidence with simplified styling."""
+    if not probabilities or not isinstance(probabilities, dict):
+        return go.Figure()
     fig = go.Figure(data=[
         go.Bar(
             x=list(probabilities.keys()),
     )
     return fig
+def plot_attention(text: str, attention_weights: np.ndarray) -> go.Figure:
     """Plot attention weights with simplified styling."""
+    if not text or not attention_weights.size:
+        return go.Figure()
     tokens = text.split()[:20]
     attention_weights = attention_weights[:len(tokens)]
     if isinstance(attention_weights, (list, np.ndarray)):
     return fig
 def main():
+    # Main Container
+    st.markdown('<div class="main-container">', unsafe_allow_html=True)
+    # Header Section
     st.markdown("""
+    <div class="header-section">
+        <h1 class="header-title">🛡️ TruthCheck - Advanced Fake News Detector</h1>
     </div>
     """, unsafe_allow_html=True)
     # Hero Section
     st.markdown("""
+    <div class="hero">
+        <div class="hero-left">
+            <h2 class="hero-title">Instant Fake News Detection</h2>
+            <p class="hero-text">
+                Verify news articles with our AI-powered tool, driven by advanced BERT and BiLSTM models for accurate authenticity analysis.
+            </p>
+        </div>
+        <div class="hero-right">
+            <img src="https://images.pexels.com/photos/267350/pexels-photo-267350.jpeg?auto=compress&cs=tinysrgb&w=500" alt="Fake News Illustration" onerror="this.src='https://via.placeholder.com/500x300.png?text=Fake+News+Illustration'">
         </div>
     </div>
     """, unsafe_allow_html=True)
     # About Section
     st.markdown("""
+    <div class="about-section">
+        <h2 class="about-title">About TruthCheck</h2>
+        <p class="about-text">
+            TruthCheck harnesses a hybrid BERT-BiLSTM model to detect fake news with high precision. Simply paste an article below to analyze its authenticity instantly.
+        </p>
     </div>
     """, unsafe_allow_html=True)
     # Input Section
+    st.markdown('<div class="input-container">', unsafe_allow_html=True)
     news_text = st.text_area(
         "Analyze a News Article",
         height=150,
     st.markdown('</div>', unsafe_allow_html=True)
     # Analyze Button
     col1, col2, col3 = st.columns([1, 2, 1])
     with col2:
         analyze_button = st.button("🔍 Analyze Now", key="analyze_button")
     if analyze_button:
         if news_text and len(news_text.strip()) > 10:
             with st.spinner("Analyzing article..."):
+                result = predict_news(news_text)
+                if result:
+                    st.markdown('<div class="results-container">', unsafe_allow_html=True)
                     # Prediction Result
                     col1, col2 = st.columns([1, 1], gap="medium")
                             st.markdown(f'''
                             <div class="result-card fake-news">
                                 <div class="prediction-badge">🚨 Fake News Detected <span class="confidence-score">{result["confidence"]:.1%}</span></div>
+                                <p>Our AI has identified this content as likely misinformation based on linguistic patterns and context.</p>
                             </div>
                             ''', unsafe_allow_html=True)
                         else:
                             st.markdown(f'''
                             <div class="result-card real-news">
                                 <div class="prediction-badge">✅ Authentic News <span class="confidence-score">{result["confidence"]:.1%}</span></div>
+                                <p>This content appears legitimate based on professional writing style and factual consistency.</p>
                             </div>
                             ''', unsafe_allow_html=True)
                     # Attention Analysis
                     st.markdown('<div class="chart-container">', unsafe_allow_html=True)
                     st.plotly_chart(plot_attention(news_text, result['attention_weights']), use_container_width=True)
+                    st.markdown('</div></div>', unsafe_allow_html=True)
         else:
             st.error("Please enter a news article (at least 10 words) for analysis.")
+    # Footer
+    st.markdown("---")
+    st.markdown(
+        '<p style="text-align: center; font-weight: 600; font-size: 16px;">💻 Developed with ❤️ using Streamlit | © 2025</p>',
+        unsafe_allow_html=True
+    )
+    st.markdown('</div>', unsafe_allow_html=True)  # Close main-container
 if __name__ == "__main__":
     main()