Spaces:

Akshayram1
/

Stock

Sleeping

App Files Files Community

Akshayram1 commited on Oct 13, 2024

Commit

10bfc4e

verified ·

1 Parent(s): 073f2cc

Update app (3).py

Browse files

Files changed (1) hide show

app (3).py +43 -36

app (3).py CHANGED Viewed

@@ -6,6 +6,7 @@ import plotly
 import plotly.express as px
 import json # for graph plotting in website
 # NLTK VADER for sentiment analysis
 import nltk
 nltk.downloader.download('vader_lexicon')
 from nltk.sentiment.vader import SentimentIntensityAnalyzer
@@ -15,7 +16,7 @@ import os
 import datetime
-st.set_page_config(page_title = "Akshay's Stock News Sentiment Analyzer", layout = "wide")
 def get_news(ticker):
@@ -29,40 +30,41 @@ def get_news(ticker):
     return news_table
 # parse news into dataframe
 def parse_news(news_table):
     parsed_news = []
-    today_string = datetime.datetime.today().strftime('%Y-%m-%d')
     for x in news_table.findAll('tr'):
         try:
-            # read the text from each tr tag into text
-            # get text from a only
-            text = x.a.get_text()
-            # splite text in the td tag into a list
-            date_scrape = x.td.text.split()
-            # if the length of 'date_scrape' is 1, load 'time' as the only element
             if len(date_scrape) == 1:
                 time = date_scrape[0]
-            # else load 'date' as the 1st element and 'time' as the second
             else:
                 date = date_scrape[0]
                 time = date_scrape[1]
-            # Append ticker, date, time and headline as a list to the 'parsed_news' list
-            parsed_news.append([date, time, text])
-        except:
-            pass
-        # Set column names
-        columns = ['date', 'time', 'headline']
-        # Convert the parsed_news list into a DataFrame called 'parsed_and_scored_news'
-        parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
-        # Create a pandas datetime object from the strings in 'date' and 'time' column
-        parsed_news_df['date'] = parsed_news_df['date'].replace("Today", today_string)
-        parsed_news_df['datetime'] = pd.to_datetime(parsed_news_df['date'] + ' ' + parsed_news_df['time'])
     return parsed_news_df
@@ -80,35 +82,40 @@ def score_news(parsed_news_df):
     # Join the DataFrames of the news and the list of dicts
     parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')
     parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')
-    parsed_and_scored_news = parsed_and_scored_news.drop(['date', 'time'], 1)
     parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})
     return parsed_and_scored_news
 def plot_hourly_sentiment(parsed_and_scored_news, ticker):
     # Group by date and ticker columns from scored_news and calculate the mean
-    mean_scores = parsed_and_scored_news.resample('H').mean()
-    # Plot a bar chart with plotly
-    fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title = ticker + ' Hourly Sentiment Scores')
-    return fig # instead of using fig.show(), we return fig and turn it into a graphjson object for displaying in web page later
 def plot_daily_sentiment(parsed_and_scored_news, ticker):
     # Group by date and ticker columns from scored_news and calculate the mean
-    mean_scores = parsed_and_scored_news.resample('D').mean()
-    # Plot a bar chart with plotly
-    fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title = ticker + ' Daily Sentiment Scores')
-    return fig # instead of using fig.show(), we return fig and turn it into a graphjson object for displaying in web page later
 # for extracting data from finviz
 finviz_url = 'https://finviz.com/quote.ashx?t='
-st.header("Bohmian's Stock News Sentiment Analyzer")
 ticker = st.text_input('Enter Stock Ticker', '').upper()

 import plotly.express as px
 import json # for graph plotting in website
 # NLTK VADER for sentiment analysis
+from dateutil import parser
 import nltk
 nltk.downloader.download('vader_lexicon')
 from nltk.sentiment.vader import SentimentIntensityAnalyzer
 import datetime
+st.set_page_config(page_title = "Stock News Sentiment Analyzer", layout = "wide")
 def get_news(ticker):
     return news_table
 # parse news into dataframe
 def parse_news(news_table):
     parsed_news = []
     for x in news_table.findAll('tr'):
         try:
+            # Get the headline text
+            text = x.a.get_text()
+            # Get the date and time from the first <td> tag
+            date_scrape = x.td.text.strip().split()
+            # Handle cases where only time is present
             if len(date_scrape) == 1:
+                date = datetime.datetime.today().strftime('%Y-%m-%d')
                 time = date_scrape[0]
             else:
                 date = date_scrape[0]
                 time = date_scrape[1]
+            # Parse the date and time using dateutil.parser
+            datetime_str = f"{date} {time}"
+            datetime_parsed = parser.parse(datetime_str)
+            # Append the parsed news to the list
+            parsed_news.append([datetime_parsed, text])
+        except Exception as e:
+            print("Error parsing news:", e)
+            continue
+    # Convert the list to a DataFrame
+    columns = ['datetime', 'headline']
+    parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
     return parsed_news_df
     # Join the DataFrames of the news and the list of dicts
     parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')
     parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')
     parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})
     return parsed_and_scored_news
 def plot_hourly_sentiment(parsed_and_scored_news, ticker):
+    # Ensure that only numeric columns are resampled
+    numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
     # Group by date and ticker columns from scored_news and calculate the mean
+    mean_scores = numeric_cols.resample('h').mean()
+    # Plot a bar chart with Plotly
+    fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=ticker + ' Hourly Sentiment Scores')
+    return fig  # Return the figure to display in the Streamlit app
 def plot_daily_sentiment(parsed_and_scored_news, ticker):
+    # Ensure that only numeric columns are resampled
+    numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
     # Group by date and ticker columns from scored_news and calculate the mean
+    mean_scores = numeric_cols.resample('D').mean()
+    # Plot a bar chart with Plotly
+    fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=ticker + ' Daily Sentiment Scores')
+    return fig  # Return the figure to display in the Streamlit app
 # for extracting data from finviz
 finviz_url = 'https://finviz.com/quote.ashx?t='
+st.header("Stock News Sentiment Analyzer")
 ticker = st.text_input('Enter Stock Ticker', '').upper()