import os from git import Repo import streamlit as st import time from PIL import Image import base64 from transformers import pipeline import spacy import numpy as np from sentence_transformers import SentenceTransformer from matplotlib import colormaps from matplotlib.colors import ListedColormap GITHUB_PAT = os.environ['GITHUB'] SENTIMENT = os.environ['SENTIMENT'] EMBEDDING = os.environ['EMBEDDING'] if not os.path.exists('repo_directory'): try: Repo.clone_from(f'https://marcus-t-s:{GITHUB_PAT}@github.com/marcus-t-s/yt-comment-analyser.git', 'repo_directory' ) except: st.error("Error: Oops there's an issue on our end, please wait a moment and try again.") st.stop() from repo_directory.utils.chart_utils import * from repo_directory.youtube_comment_class import * # Streamlit configuration st.set_page_config( page_title="ViewerVoice | YouTube Comment Analyser", layout="wide", page_icon=Image.open('images/page_icon.png') ) # Define and load cached resources @st.cache_resource def load_models(): sentiment_pipeline = pipeline("sentiment-analysis", model=r"cardiffnlp/twitter-roberta-base-sentiment") embedding_model = SentenceTransformer('flax-sentence-embeddings/all_datasets_v4_MiniLM-L6') spacy_nlp = spacy.load("en_core_web_sm") add_custom_stopwords(spacy_nlp, {"bring", "know", "come"}) return sentiment_pipeline, embedding_model, spacy_nlp @st.cache_resource def load_colors_image(): mask = np.array(Image.open('images/youtube_icon.jpg')) Reds = colormaps['Reds'] colors = ListedColormap(Reds(np.linspace(0.4, 0.8, 256))) with open("images/viewervoice_logo_crop.png", "rb") as img_file: logo_image = base64.b64encode(img_file.read()).decode("utf-8") return mask, colors, logo_image sentiment_pipeline, embedding_model, spacy_nlp = load_models() mask, colors, logo_image = load_colors_image() # Hide line at the top and "made with streamlit" text hide_decoration_bar_style = """ """ st.markdown(hide_decoration_bar_style, unsafe_allow_html=True) if 'YouTubeParser' not in st.session_state: st.session_state['YouTubeParser'] = YoutubeCommentParser() if 'comment_fig' not in st.session_state: st.session_state["comment_fig"] = None st.session_state["wordcloud_fig"] = None st.session_state["topic_fig"] = None st.session_state["sentiment_fig"] = None if 'rerun_button' not in st.session_state: st.session_state['rerun_button'] = "INIT" if 'topic_filter' not in st.session_state: st.session_state['topic_filter'] = False if 'sentiment_filter' not in st.session_state: st.session_state['sentiment_filter'] = False if 'filter_state' not in st.session_state: st.session_state['filter_state'] = "INIT" if 'video_link' not in st.session_state: st.session_state["video_link"] = None if 'num_comments' not in st.session_state: st.session_state['num_comments'] = None # Set reference to YouTubeParser object for more concise code yt_parser = st.session_state['YouTubeParser'] main_page = st.container() def query_comments_button(): # Delete larger objects from session state to later replace del st.session_state["comment_fig"] del st.session_state["wordcloud_fig"] del st.session_state["topic_fig"] del st.session_state["sentiment_fig"] del st.session_state["YouTubeParser"] # Reset session state variables back to placeholder values st.session_state.rerun_button = "QUERYING" st.session_state['filter_state'] = "INIT" st.session_state["topic_filter"] = False st.session_state["sentiment_filter"] = False st.session_state["semantic_filter"] = False st.session_state["figures_built"] = False st.session_state["comment_fig"] = None st.session_state["wordcloud_fig"] = None st.session_state["topic_fig"] = None st.session_state["sentiment_fig"] = None st.session_state["YouTubeParser"] = YoutubeCommentParser() def filter_visuals_button(): st.session_state["filter_state"] = "FILTERING" with st.sidebar: st.session_state["video_link"] = st.text_input('YouTube Video URL', value="") st.session_state["max_comments"] = st.slider(label="Maximum number of comments to query", min_value=100, max_value=2000, step=100) st.session_state["max_topics"] = st.slider(label="Maximum number of topics", min_value=5, max_value=20, step=1) st.button('Query comments :left_speech_bubble:', on_click=query_comments_button) with main_page: # Reduce space at the top reduce_header_height_style = """ """ st.markdown(reduce_header_height_style, unsafe_allow_html=True) # Title and intro section markdown_content = f"""
Made by Afiba Annor Marcus Singh
📝 Notes
", unsafe_allow_html=True) html_content = """Comments
""", unsafe_allow_html=True) st.plotly_chart(st.session_state["table_fig"], use_container_width=True) with word_cloud_col: st.markdown(f"""Word Cloud
""", unsafe_allow_html=True) st.pyplot(st.session_state["wordcloud_fig"], use_container_width=True) treemap_col, sentiment_donut_col = st.columns([0.55, 0.45]) with treemap_col: st.markdown(f"""Topic Proportions
""", unsafe_allow_html=True) st.plotly_chart(st.session_state["topic_fig"], use_container_width=True) with sentiment_donut_col: st.markdown(f"""Sentiment Distribution
""", unsafe_allow_html=True) st.plotly_chart(st.session_state["sentiment_fig"], use_container_width=True) # st.table(yt_parser.df_comments.head()) else: st.write("Unfortunately we couldn't find any comments for this set of filters, please try " "editing the filters and try again") with st.sidebar: # Define the HTML and CSS for the button-style container if st.session_state['num_comments'] is not None: num_comments = st.session_state['num_comments'] else: num_comments = 0 htmlstr = f"""{num_comments}
""" # Display the button-style container with number of comments st.subheader("Number of comments") st.markdown(htmlstr, unsafe_allow_html=True) # Filters section st.subheader("Filters") if yt_parser.df_comments is not None: st.session_state["topic_filter"] = st.multiselect("Topic", options=sorted(list(yt_parser.df_comments['Topic'].unique()))) st.session_state["sentiment_filter"] = st.multiselect("Sentiment", options=list(yt_parser.df_comments['Sentiment'].unique())) st.session_state["semantic_filter"] = st.text_input("Keyword search", max_chars=30) st.button('Filter visualisations :sleuth_or_spy:', on_click=filter_visuals_button) else: st.multiselect("Topic", options=["Please query comments from a video"], disabled=True) st.multiselect("Sentiment", options=["Please query comments from a video"], disabled=True) st.text_input("Keyword search", disabled=True) st.button('Please query comments before filtering', disabled=True)