Spaces:
Sleeping
Sleeping
import streamlit as st | |
from PIL import Image | |
from bs4 import BeautifulSoup as soup | |
from urllib.request import urlopen | |
from newspaper import Article | |
import io | |
import nltk | |
from bs4 import BeautifulSoup as soup | |
from googletrans import Translator | |
import yake | |
from wordcloud import WordCloud | |
import matplotlib.pyplot as plt | |
from gtts import gTTS | |
import joblib | |
import numpy as np | |
from tensorflow.keras.models import load_model | |
import spacy | |
from sklearn.pipeline import make_pipeline | |
from sklearn.preprocessing import FunctionTransformer | |
from sklearn.feature_extraction.text import CountVectorizer | |
from sklearn.naive_bayes import MultinomialNB | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.metrics import classification_report | |
nlp = spacy.load("en_core_web_sm") | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from tensorflow.keras.preprocessing.text import Tokenizer | |
from tensorflow.keras.preprocessing.sequence import pad_sequences | |
from tensorflow.keras.models import Sequential | |
from tensorflow.keras.layers import LSTM,Dense, Dropout, SpatialDropout1D | |
from tensorflow.keras.layers import Embedding | |
import joblib | |
import tensorflow as tf | |
# Define a custom object scope to register the custom layer | |
# Load the saved RoBERTa model with the custom object scope | |
# Now, you can use the loaded_model for inference or further training | |
nltk.download('punkt') | |
df = pd.read_csv("dataset/train.csv",delimiter=',', encoding='ISO-8859-1') | |
tweet_df = df[['text','sentiment']] | |
tweet_df = tweet_df[tweet_df['sentiment'] != 'neutral'] | |
sentiment_label = tweet_df.sentiment.factorize() | |
tweet = tweet_df.text.values | |
tokenizer = Tokenizer(num_words=5000) | |
tokenizer.fit_on_texts(tweet) | |
vocab_size = len(tokenizer.word_index) + 1 | |
encoded_docs = tokenizer.texts_to_sequences(tweet) | |
padded_sequence = pad_sequences(encoded_docs, maxlen=200) | |
embedding_vector_length = 32 | |
model = Sequential() | |
model.add(Embedding(vocab_size, embedding_vector_length, input_length=200) ) | |
model.add(SpatialDropout1D(0.25)) | |
model.add(LSTM(50, dropout=0.5, recurrent_dropout=0.5)) | |
model.add(Dropout(0.2)) | |
model.add(Dense(1, activation='sigmoid')) | |
model.compile(loss='binary_crossentropy',optimizer='adam', metrics=['accuracy']) | |
model=load_model('models/new.h5') | |
# history = model.fit(padded_sequence,sentiment_label[0],validation_split=0.2, epochs=5, batch_size=32) | |
import spacy | |
from sklearn.pipeline import make_pipeline | |
from sklearn.preprocessing import FunctionTransformer | |
from sklearn.feature_extraction.text import CountVectorizer | |
from sklearn.naive_bayes import MultinomialNB | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.metrics import classification_report | |
from preprocessor import preprocesser | |
nlp = spacy.load("en_core_web_sm") | |
text_processer = FunctionTransformer(preprocesser) | |
import joblib | |
# Save the trained model to a file | |
# To load the model back in the future | |
#define a function for filter stop words and punctuations and extract lemma from the txts | |
from model import pd | |
def predict_sentiment(text): | |
tw = tokenizer.texts_to_sequences([text]) | |
tw = pad_sequences(tw,maxlen=200) | |
prediction = int(model.predict(tw).round().item()) | |
return sentiment_label[1][prediction] | |
# Define the mapping of numerical labels to category names | |
# Assuming you have already loaded your model as 'loaded_model' | |
# Load your model here or replace 'loaded_model' with your actual model loading code | |
# Set Streamlit theme and layout | |
st.markdown( | |
f""" | |
<link rel="stylesheet" href="styles.css"> | |
""", | |
unsafe_allow_html=True, | |
) | |
def fetch_news_search_topic(topic): | |
site = 'https://news.google.com/rss/search?q={}'.format(topic) | |
op = urlopen(site) # Open that site | |
rd = op.read() # read data from site | |
op.close() # close the object | |
sp_page = soup(rd, 'xml') # scrapping data from site | |
news_list = sp_page.find_all('item') # finding news | |
return news_list | |
def fetch_top_news(): | |
site = 'https://news.google.com/news/rss' | |
op = urlopen(site) # Open that site | |
rd = op.read() # read data from site | |
op.close() # close the object | |
sp_page = soup(rd, 'xml') # scrapping data from site | |
news_list = sp_page.find_all('item') # finding news | |
return news_list | |
def analyze_sentiment_with_model(text): | |
# Preprocess the text using the loaded vectorizer | |
text_vectorized = vectorizer.transform([text]) | |
# Predict sentiment using the model | |
sentiment = model.predict(text_vectorized) | |
return sentiment[0] | |
def fetch_category_news(topic): | |
site = 'https://news.google.com/news/rss/headlines/section/topic/{}'.format(topic) | |
op = urlopen(site) # Open that site | |
rd = op.read() # read data from site | |
op.close() # close the object | |
sp_page = soup(rd, 'xml') # scrapping data from site | |
news_list = sp_page.find_all('item') # finding news | |
return news_list | |
def fetch_news_poster(poster_link): | |
try: | |
u = urlopen(poster_link) | |
raw_data = u.read() | |
image = Image.open(io.BytesIO(raw_data)) | |
st.image(image, use_column_width=True) | |
except: | |
image = Image.open('./picture/no_image.jpg') | |
st.image(image, use_column_width=True) | |
def display_news_stories(news_list, quantity, target_language=None, enable_audio=False): | |
for news in news_list: | |
c = 0 # Initialize the counter for each news article | |
st.write('**<span style="color: #f0f0f0;">({}) {}</span>**'.format(c, news.title.text), unsafe_allow_html=True) | |
news_data = Article(news.link.text) | |
try: | |
news_data.download() | |
news_data.parse() | |
news_data.nlp() | |
except Exception as e: | |
st.error(e) | |
# Calculate read time estimation | |
word_count = len(news_data.text.split()) | |
read_time_minutes = int(word_count / 200) # Assuming an average reading speed of 200 words per minute | |
fetch_news_poster(news_data.top_image) | |
with st.expander(news.title.text): | |
st.markdown( | |
'''<h6 style='text-align: justify; color: #f0f0f0; font-weight: bold;'>{}</h6>'''.format(news_data.summary), | |
unsafe_allow_html=True) | |
st.markdown("[Read more at {}...]({})".format(news.source.text, news.link.text)) | |
st.markdown("<span style='color:#ffffff;'>Estimated Read Time: {} min</span>".format(read_time_minutes), unsafe_allow_html=True) | |
predicted_sentiment = predict_sentiment(news_data.summary) | |
sentiment_emoji = get_sentiment_emoji(predicted_sentiment) | |
st.markdown("<span style='color: #ffffff;'>Predicted Sentiment: {} ({})</span>".format(sentiment_emoji, predicted_sentiment), unsafe_allow_html=True) | |
# Set the Category of news text with custom style | |
st.markdown("<span style='color: #ffffff;'>Category of news: {}</span>".format(pd(news_data.summary)), unsafe_allow_html=True) | |
if target_language: | |
translated_summary = translate_text(news_data.summary, target_language) | |
st.markdown("<span style='color: #ffffff; font-weight: bold;'>Translated Summary ({})</span>:".format(target_language), unsafe_allow_html=True) | |
news_title_translated = translate_text(news.title.text, target_language) | |
# Set the translated text with custom style | |
st.markdown("<span style='color: #ffffff;'>{}</span>".format(news_title_translated), unsafe_allow_html=True) | |
st.markdown("<span style='color: #ffffff;'>{}</span>".format(translated_summary), unsafe_allow_html=True) | |
# Audio Summaries | |
if enable_audio: | |
audio_summary_button = st.button("Generate Audio Summary") | |
if audio_summary_button: | |
audio_path = generate_audio_summary(news_data.summary, lang=target_language) | |
if audio_path: | |
st.audio(audio_path, format='audio/mp3') | |
else: | |
st.warning("Unable to generate audio summary.") | |
st.success("Published Date: " + news.pubDate.text) | |
if c >= quantity: | |
break | |
def generate_audio_summary(text, lang='en'): | |
try: | |
tts = gTTS(text=text, lang=lang) | |
audio_path = './audio_summary.mp3' | |
tts.save(audio_path) | |
return audio_path | |
except Exception as e: | |
st.error(f"Error generating audio summary: {e}") | |
return None | |
def analyze_sentiment(text): | |
analysis = TextBlob(text) | |
sentiment_score = analysis.sentiment.polarity | |
if sentiment_score > 0: | |
return "positive" | |
elif sentiment_score < 0: | |
return "negative" | |
else: | |
return "neutral" | |
# Function to get sentiment emoji | |
def get_sentiment_emoji(sentiment): | |
if sentiment == "positive": | |
return "😃" | |
elif sentiment == "negative": | |
return "😞" | |
else: | |
return "😐" | |
def translate_text(text, target_language): | |
try: | |
translator = Translator() | |
translated_text = translator.translate(text, dest=target_language) | |
return translated_text.text | |
except Exception as e: | |
st.error(f"Error translating text: {e}") | |
return "" | |
def extract_keywords(text): | |
custom_kw_extractor = yake.KeywordExtractor(lan="en", n=1, dedupLim=0.9, dedupFunc='seqm', windowsSize=1, top=20) | |
keywords = custom_kw_extractor.extract_keywords(text) | |
return [kw for kw, _ in keywords] | |
def run(): | |
# Define a custom CSS class to change the background color of the Streamlit app | |
custom_css =""" | |
<style> | |
.stApp { | |
background-image: url('https://img.freepik.com/free-vector/global-technology-earth-news-bulletin-background_1017-33687.jpg?w=1380&t=st=1697978148~exp=1697978748~hmac=4943a05997b7d4461e9e581e177b3a5dcca3df44d6fa519f830ebe1b922fcfa0'); /* Replace with your image file name */ | |
background-color: #333; /* Fallback color if the image is unavailable */ | |
background-size: cover; | |
background-repeat: no-repeat; | |
background-attachment: fixed; | |
background-position: center center; | |
opacity: 0.9; | |
} | |
</style> | |
""" | |
# Display the custom CSS using st.markdown | |
st.markdown(custom_css, unsafe_allow_html=True) | |
# Your Streamlit app content goes here | |
# Define a custom CSS class with styles for the centered header | |
# Define a custom CSS class with styles for the centered header | |
custom_css = """ | |
<style> | |
.custom-header { | |
display: flex; | |
align-items: center; | |
justify-content: center; | |
height: 20; | |
text-align: center; | |
color: #002366; | |
background: rgba(245, 245, 245, 0.7); /* Transparent whitish background */ | |
border: 2px solid #0074D9; /* Stylish border color */ | |
border-radius: 15px; /* Circular border radius for a stylish look */ | |
font-family: 'Bebas Neue', sans-serif; | |
font-size: 60px; | |
text-transform: uppercase; | |
box-shadow: 0 0 20px rgba(0, 0, 0, 0.3); /* Box shadow for depth and style */ | |
} | |
.sub-header { | |
font-size: 25px; | |
color: #f0f0f0; /* Set font color to white */ | |
text-align: center; /* Center the text */ | |
margin-left: 20px; | |
} | |
</style> | |
""" | |
# Display the custom CSS using st.markdown | |
st.markdown(custom_css, unsafe_allow_html=True) | |
# Use the custom class on your centered header element | |
st.markdown("<div class='custom-header'>NewsWaves</div>", unsafe_allow_html=True) | |
st.markdown("<div class='sub-header'>A platform to get daily latest news updates of your favorite category.</div>", unsafe_allow_html=True) | |
# The rest of your Streamlit app goes here | |
# The rest of your Streamlit app goes here | |
col1, col2, col3 = st.columns([3, 5, 3]) | |
with col1: | |
st.write("") | |
with col3: | |
st.write("") | |
category = ['Select any category', 'Latest News', 'Favourite News', 'Search Any News'] | |
cat_op = st.selectbox('Select your Category', category) | |
if cat_op == category[0]: | |
st.warning('Please select a category!') | |
elif cat_op == category[1]: | |
st.markdown("<h3 style='color: #ffffff; font-weight: bold;'>Latest News for you</h3>", unsafe_allow_html=True) | |
st.markdown("<span style='color: #ffffff;'>Number of News:</span>", unsafe_allow_html=True) | |
# Set the number of news input | |
# Set the number of news input with custom style for deep black font | |
no_of_news = st.number_input('', min_value=5, max_value=25, step=1, value=10, format="%d", key="no_of_news") | |
st.markdown("<style>div[data-baseweb='input'] input { color: #000000 !important; }</style>", unsafe_allow_html=True) | |
st.markdown("<span style='color: #ffffff;'>Translate to Language (optional):</span>", unsafe_allow_html=True) | |
# Set the target language input with reduced newline | |
target_language = st.text_input('', key="target_language") | |
st.markdown("<style>div[data-baseweb='input'] input { margin-top: 0; color: #ffffff; }</style>", unsafe_allow_html=True) | |
news_list = fetch_top_news() | |
display_news_stories(news_list, no_of_news, target_language) | |
elif cat_op == category[2]: | |
av_topics = ['Choose Topic', 'WORLD', 'NATION', 'BUSINESS', 'TECHNOLOGY', 'ENTERTAINMENT', 'SPORTS', 'SCIENCE', 'HEALTH'] | |
st.subheader("Choose your favorite Topic") | |
chosen_topic = st.selectbox("Choose your favorite Topic", av_topics) | |
if chosen_topic == av_topics[0]: | |
st.warning("Please choose a topic") | |
else: | |
no_of_news = st.number_input('Number of News:', min_value=5, max_value=25, step=1, value=10) | |
target_language = st.text_input('Translate to Language (optional):') | |
news_list = fetch_category_news(chosen_topic) | |
if news_list: | |
st.subheader(f"✅ Here are some {chosen_topic} News for you") | |
display_news_stories(news_list, no_of_news,target_language) | |
else: | |
st.error(f"No News found for {chosen_topic}") | |
elif cat_op == category[3]: | |
user_topic = st.text_input("Enter your Topic🔍") | |
no_of_news = st.number_input('Number of News:', min_value=5, max_value=15, step=1, value=10) | |
target_language = st.text_input('Translate to Language (optional):') | |
if st.button("Search", key="search_button") and user_topic: | |
user_topic_pr = user_topic.replace(' ', '') | |
news_list = fetch_news_search_topic(topic=user_topic_pr) | |
if news_list: | |
st.subheader(f"✅ Here are some {user_topic.capitalize()} News for you") | |
display_news_stories(news_list, no_of_news,target_language) | |
else: | |
st.error(f"No News found for {user_topic}") | |
st.markdown( | |
""" | |
<style> | |
.footer { | |
position: fixed; | |
bottom: 0; | |
left: 0; | |
width: 100%; | |
background-color: #333; | |
color: white; | |
padding: 10px; | |
text-align: center; | |
font-size: 14px; | |
} | |
</style> | |
""", | |
unsafe_allow_html=True | |
) | |
# Display your contact information in the footer bar | |
st.markdown( | |
""" | |
<div class="footer"> | |
Developed by:-Md Shoaib Shahriar Ibrahim | [email protected] | [GitHub Profile](https://github.com/Shoaib-33) | |
</div> | |
""", | |
unsafe_allow_html=True | |
) | |
run() | |