Spaces:
Sleeping
Sleeping
Update app (3).py
Browse files- app (3).py +43 -36
app (3).py
CHANGED
|
@@ -6,6 +6,7 @@ import plotly
|
|
| 6 |
import plotly.express as px
|
| 7 |
import json # for graph plotting in website
|
| 8 |
# NLTK VADER for sentiment analysis
|
|
|
|
| 9 |
import nltk
|
| 10 |
nltk.downloader.download('vader_lexicon')
|
| 11 |
from nltk.sentiment.vader import SentimentIntensityAnalyzer
|
|
@@ -15,7 +16,7 @@ import os
|
|
| 15 |
|
| 16 |
import datetime
|
| 17 |
|
| 18 |
-
st.set_page_config(page_title = "
|
| 19 |
|
| 20 |
|
| 21 |
def get_news(ticker):
|
|
@@ -29,40 +30,41 @@ def get_news(ticker):
|
|
| 29 |
return news_table
|
| 30 |
|
| 31 |
# parse news into dataframe
|
|
|
|
|
|
|
| 32 |
def parse_news(news_table):
|
| 33 |
parsed_news = []
|
| 34 |
-
today_string = datetime.datetime.today().strftime('%Y-%m-%d')
|
| 35 |
|
| 36 |
for x in news_table.findAll('tr'):
|
| 37 |
try:
|
| 38 |
-
#
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
#
|
| 44 |
-
|
| 45 |
if len(date_scrape) == 1:
|
|
|
|
| 46 |
time = date_scrape[0]
|
| 47 |
-
|
| 48 |
-
# else load 'date' as the 1st element and 'time' as the second
|
| 49 |
else:
|
| 50 |
date = date_scrape[0]
|
| 51 |
time = date_scrape[1]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
|
| 62 |
-
# Create a pandas datetime object from the strings in 'date' and 'time' column
|
| 63 |
-
parsed_news_df['date'] = parsed_news_df['date'].replace("Today", today_string)
|
| 64 |
-
parsed_news_df['datetime'] = pd.to_datetime(parsed_news_df['date'] + ' ' + parsed_news_df['time'])
|
| 65 |
-
|
| 66 |
return parsed_news_df
|
| 67 |
|
| 68 |
|
|
@@ -80,35 +82,40 @@ def score_news(parsed_news_df):
|
|
| 80 |
# Join the DataFrames of the news and the list of dicts
|
| 81 |
parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')
|
| 82 |
parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')
|
| 83 |
-
parsed_and_scored_news = parsed_and_scored_news.drop(['date', 'time'], 1)
|
| 84 |
parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})
|
| 85 |
|
| 86 |
return parsed_and_scored_news
|
| 87 |
|
| 88 |
|
|
|
|
| 89 |
def plot_hourly_sentiment(parsed_and_scored_news, ticker):
|
| 90 |
-
|
|
|
|
|
|
|
| 91 |
# Group by date and ticker columns from scored_news and calculate the mean
|
| 92 |
-
mean_scores =
|
| 93 |
|
| 94 |
-
# Plot a bar chart with
|
| 95 |
-
fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title
|
| 96 |
-
return fig
|
| 97 |
|
| 98 |
def plot_daily_sentiment(parsed_and_scored_news, ticker):
|
| 99 |
-
|
|
|
|
|
|
|
| 100 |
# Group by date and ticker columns from scored_news and calculate the mean
|
| 101 |
-
mean_scores =
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
-
# Plot a bar chart with plotly
|
| 104 |
-
fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title = ticker + ' Daily Sentiment Scores')
|
| 105 |
-
return fig # instead of using fig.show(), we return fig and turn it into a graphjson object for displaying in web page later
|
| 106 |
|
| 107 |
# for extracting data from finviz
|
| 108 |
finviz_url = 'https://finviz.com/quote.ashx?t='
|
| 109 |
|
| 110 |
|
| 111 |
-
st.header("
|
| 112 |
|
| 113 |
ticker = st.text_input('Enter Stock Ticker', '').upper()
|
| 114 |
|
|
|
|
| 6 |
import plotly.express as px
|
| 7 |
import json # for graph plotting in website
|
| 8 |
# NLTK VADER for sentiment analysis
|
| 9 |
+
from dateutil import parser
|
| 10 |
import nltk
|
| 11 |
nltk.downloader.download('vader_lexicon')
|
| 12 |
from nltk.sentiment.vader import SentimentIntensityAnalyzer
|
|
|
|
| 16 |
|
| 17 |
import datetime
|
| 18 |
|
| 19 |
+
st.set_page_config(page_title = "Stock News Sentiment Analyzer", layout = "wide")
|
| 20 |
|
| 21 |
|
| 22 |
def get_news(ticker):
|
|
|
|
| 30 |
return news_table
|
| 31 |
|
| 32 |
# parse news into dataframe
|
| 33 |
+
|
| 34 |
+
|
| 35 |
def parse_news(news_table):
|
| 36 |
parsed_news = []
|
|
|
|
| 37 |
|
| 38 |
for x in news_table.findAll('tr'):
|
| 39 |
try:
|
| 40 |
+
# Get the headline text
|
| 41 |
+
text = x.a.get_text()
|
| 42 |
+
# Get the date and time from the first <td> tag
|
| 43 |
+
date_scrape = x.td.text.strip().split()
|
| 44 |
+
|
| 45 |
+
# Handle cases where only time is present
|
|
|
|
| 46 |
if len(date_scrape) == 1:
|
| 47 |
+
date = datetime.datetime.today().strftime('%Y-%m-%d')
|
| 48 |
time = date_scrape[0]
|
|
|
|
|
|
|
| 49 |
else:
|
| 50 |
date = date_scrape[0]
|
| 51 |
time = date_scrape[1]
|
| 52 |
+
|
| 53 |
+
# Parse the date and time using dateutil.parser
|
| 54 |
+
datetime_str = f"{date} {time}"
|
| 55 |
+
datetime_parsed = parser.parse(datetime_str)
|
| 56 |
+
|
| 57 |
+
# Append the parsed news to the list
|
| 58 |
+
parsed_news.append([datetime_parsed, text])
|
| 59 |
|
| 60 |
+
except Exception as e:
|
| 61 |
+
print("Error parsing news:", e)
|
| 62 |
+
continue
|
| 63 |
+
|
| 64 |
+
# Convert the list to a DataFrame
|
| 65 |
+
columns = ['datetime', 'headline']
|
| 66 |
+
parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
|
| 67 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
return parsed_news_df
|
| 69 |
|
| 70 |
|
|
|
|
| 82 |
# Join the DataFrames of the news and the list of dicts
|
| 83 |
parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')
|
| 84 |
parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')
|
|
|
|
| 85 |
parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})
|
| 86 |
|
| 87 |
return parsed_and_scored_news
|
| 88 |
|
| 89 |
|
| 90 |
+
|
| 91 |
def plot_hourly_sentiment(parsed_and_scored_news, ticker):
|
| 92 |
+
# Ensure that only numeric columns are resampled
|
| 93 |
+
numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
|
| 94 |
+
|
| 95 |
# Group by date and ticker columns from scored_news and calculate the mean
|
| 96 |
+
mean_scores = numeric_cols.resample('h').mean()
|
| 97 |
|
| 98 |
+
# Plot a bar chart with Plotly
|
| 99 |
+
fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=ticker + ' Hourly Sentiment Scores')
|
| 100 |
+
return fig # Return the figure to display in the Streamlit app
|
| 101 |
|
| 102 |
def plot_daily_sentiment(parsed_and_scored_news, ticker):
|
| 103 |
+
# Ensure that only numeric columns are resampled
|
| 104 |
+
numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
|
| 105 |
+
|
| 106 |
# Group by date and ticker columns from scored_news and calculate the mean
|
| 107 |
+
mean_scores = numeric_cols.resample('D').mean()
|
| 108 |
+
|
| 109 |
+
# Plot a bar chart with Plotly
|
| 110 |
+
fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=ticker + ' Daily Sentiment Scores')
|
| 111 |
+
return fig # Return the figure to display in the Streamlit app
|
| 112 |
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
# for extracting data from finviz
|
| 115 |
finviz_url = 'https://finviz.com/quote.ashx?t='
|
| 116 |
|
| 117 |
|
| 118 |
+
st.header("Stock News Sentiment Analyzer")
|
| 119 |
|
| 120 |
ticker = st.text_input('Enter Stock Ticker', '').upper()
|
| 121 |
|