Spaces:

si568project2
/

English_Music_Recommender

Sleeping

App Files Files Community

jeonghin commited on Apr 14, 2024

Commit

5738ae0

1 Parent(s): f521560

Deployable version

Browse files

Files changed (5) hide show

app.py +12 -0
app_function.py +258 -0
choosingdata.py +75 -0
data.json +0 -0
requirements.txt +111 -0

app.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from app_function import *
+def main():
+    init()
+    page_title()
+    chat_sidebar()
+    chat()
+if __name__ == "__main__":
+    main()

app_function.py ADDED Viewed

	@@ -0,0 +1,258 @@

+import streamlit as st
+from streamlit_js_eval import streamlit_js_eval
+import choosingdata as choice
+from dotenv import load_dotenv
+from langchain.text_splitter import CharacterTextSplitter
+from langchain_community.embeddings import HuggingFaceInstructEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain_openai import ChatOpenAI
+from langchain_openai import OpenAIEmbeddings
+from langchain.memory import ConversationBufferMemory
+from langchain.chains import ConversationalRetrievalChain
+from langchain_community.llms import HuggingFaceHub
+def get_text_chunks(text):
+    """
+    Splits the given text into chunks based on specified character settings.
+    Parameters:
+    - text (str): The text to be split into chunks.
+    Returns:
+    - list: A list of text chunks.
+    """
+    text_splitter = CharacterTextSplitter(
+        separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len
+    )
+    chunks = text_splitter.split_text(text)
+    return chunks
+def get_vectorstore(text_chunks):
+    """
+    Generates a vector store from a list of text chunks using specified embeddings.
+    Parameters:
+    - text_chunks (list of str): Text segments to convert into vector embeddings.
+    Returns:
+    - FAISS: A FAISS vector store containing the embeddings of the text chunks.
+    """
+    embeddings = OpenAIEmbeddings(
+        openai_api_base="https://openai.vocareum.com/v1",
+    )
+    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
+    return vectorstore
+def get_conversation_chain(vectorstore):
+    """
+    Initializes a conversational retrieval chain that uses a large language model
+    for generating responses based on the provided vector store.
+    Parameters:
+    - vectorstore (FAISS): A vector store to be used for retrieving relevant content.
+    Returns:
+    - ConversationalRetrievalChain: An initialized conversational chain object.
+    """
+    llm = ChatOpenAI(
+        model_name="gpt-4-1106-preview",
+        openai_api_base="https://openai.vocareum.com/v1",
+    )
+    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
+    conversation_chain = ConversationalRetrievalChain.from_llm(
+        llm=llm, retriever=vectorstore.as_retriever(), memory=memory
+    )
+    return conversation_chain
+def set_prompt(text_block):
+    """
+    Callback function that sets the chosen prompt in the session state.
+    Parameters:
+    - text_block (str): The prompt text selected by the user.
+    """
+    st.session_state["messages"].append({"role": "user", "content": text_block})
+    st.session_state["prompts"] = text_block
+def prompts():
+    """
+    Renders clickable buttons for predefined prompts in the Streamlit application,
+    allowing the user to select a prompt to send to the conversation chain.
+    """
+    potential_prompts = [
+        f"What is the meaning of the song {st.session_state['title']} by {st.session_state['artist']}?",
+        f"What is the most difficult English grammar point in the song {st.session_state['title']} by {st.session_state['artist']}? Can you explain it?",
+        f"What is the most common English word in the song {st.session_state['title']} by {st.session_state['artist']} (excluding stopwords)? Can you give some example sentences using that word?",
+        f"What is the most worth learning English phrase in the song {st.session_state['title']} by {st.session_state['artist']}? Can you explain it and provide practical example using the phrase?",
+    ]
+    chosen_prompt = None
+    for index, text_block in enumerate(potential_prompts):
+        st.button(
+            f"Prompt {index + 1}: {text_block}", on_click=set_prompt, args=(text_block,)
+        )
+def get_lyrics():
+    """
+    Retrieves the lyrics stored in the session state.
+    Returns:
+    - str: The lyrics of the currently selected song.
+    """
+    lyrics = st.session_state["lyrics"]
+    return lyrics
+def page_title():
+    """
+    Sets the title of the Streamlit page based on the selected song and artist.
+    """
+    if st.session_state["title"] and st.session_state["artist"]:
+        st.title(
+            f'🎵 English Music Recommender 💬  ({st.session_state["title"]} by {st.session_state["artist"]})'
+        )
+    else:
+        st.title("🎵 English Music Recommender 💬")
+def chat_sidebar():
+    """
+    Renders the sidebar in the Streamlit application for selecting music preferences
+    and handling song recommendations.
+    """
+    with st.sidebar:
+        st.title("💚 Music Preferences")
+        user_difficulty = st.sidebar.radio(
+            "Choose a difficulty level:", ("Easy", "Medium", "Hard")
+        )
+        user_danceability = st.sidebar.radio(
+            "How much do you want to dance?", ("Low", "Medium", "High")
+        )
+        user_valence = st.sidebar.radio(
+            "What energy are you feeling?", ("Negative", "Neutral", "Positive")
+        )
+        if not st.session_state["song_bool"]:
+            if st.sidebar.button("Submit"):
+                recommendations = choice.recommendation(
+                    choice.df,
+                    dance_choice=user_danceability,
+                    valence_choice=user_valence,
+                    difficulty_choice=user_difficulty,
+                )
+                st.session_state["title"] = recommendations["title"].values[0]
+                st.session_state["artist"] = recommendations["artist"].values[0]
+                st.session_state["lyrics"] = recommendations["lyrics"].values[0]
+                st.session_state["id"] = (
+                    f'https://open.spotify.com/track/{recommendations["id"].values[0]}'
+                )
+                st.session_state["song_bool"] = True
+                st.rerun()
+        else:
+            if st.session_state["song_bool"]:
+                st.write("### We would recommend you...")
+                st.write(f"## {st.session_state['title']}")
+                st.write(f" by {st.session_state['artist']}")
+                st.markdown(
+                    f'<a href="{st.session_state["id"]}"><img src="{st.session_state["icon"]}" alt="Clickable image" style="height:60px;"></a>',
+                    unsafe_allow_html=True,
+                )
+                st.write("Please refresh the page for a new recommendation.")
+                if st.button("Reload page"):
+                    streamlit_js_eval(js_expressions="parent.window.location.reload()")
+def chat():
+    """
+    Manages the chat interface in the Streamlit application, handling the conversation
+    flow and displaying the chat history.
+    """
+    if st.session_state["lyrics"]:
+        text_chunks = get_text_chunks(get_lyrics())
+        vectorstore = get_vectorstore(text_chunks)
+        st.session_state.conversation = get_conversation_chain(vectorstore)
+        if len(st.session_state.messages) == 1:
+            message = st.session_state.messages[0]
+            with st.chat_message(message["role"]):
+                st.write(message["content"])
+                prompts()
+        else:
+            for message in st.session_state.messages:
+                with st.chat_message(message["role"]):
+                    st.write(message["content"])
+        # User-provided prompt
+        if prompt := st.chat_input():
+            st.session_state.messages.append({"role": "user", "content": prompt})
+            st.session_state.prompts = prompt
+            with st.chat_message("user"):
+                st.write(prompt)
+        if st.session_state.messages[-1]["role"] != "system":
+            with st.chat_message("system"):
+                with st.spinner("Generating response..."):
+                    response = st.session_state.conversation.invoke(
+                        {"question": st.session_state.prompts}
+                    )
+                    st.session_state.chat_history = response["chat_history"]
+                    message = st.session_state.chat_history[-1]
+                    st.write(message.content)
+                    message = {"role": "system", "content": message.content}
+                    st.session_state.messages.append(message)
+    else:
+        st.write("You can chat with GPT once a song has been recommended to you!")
+def init():
+    """
+    Initializes the session state variables used in the Streamlit application and
+    loads environment variables.
+    """
+    load_dotenv()
+    if "title" not in st.session_state:
+        st.session_state["title"] = ""
+    if "artist" not in st.session_state:
+        st.session_state["artist"] = ""
+    if "icon" not in st.session_state:
+        st.session_state["icon"] = (
+            "https://thereceptionist.com/wp-content/uploads/2021/02/Podcast-Listen-On-Spotify-1.png"
+        )
+    if "id" not in st.session_state:
+        st.session_state["id"] = ""
+    if "song_bool" not in st.session_state:
+        st.session_state["song_bool"] = False
+    if "messages" not in st.session_state.keys():
+        st.session_state.messages = [
+            {
+                "role": "system",
+                "content": "What do you want to learn about? Here are some suggested prompts: ",
+            }
+        ]
+    if "conversation" not in st.session_state:
+        st.session_state.conversation = None
+    if "chat_history" not in st.session_state:
+        st.session_state.chat_history = None
+    if "lyrics" not in st.session_state:
+        st.session_state["lyrics"] = ""
+    if "prompts" not in st.session_state:
+        st.session_state["prompts"] = ""

choosingdata.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import pandas as pd
+df = pd.read_json('data.json')
+percentiles = df['danceability'].quantile([0, 0.33, 0.66, 1])
+bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
+labels = ['Low', 'Medium', 'High']
+df['danceability_level'] = pd.cut(df['danceability'], bins=bins, labels=labels, include_lowest=True)
+percentiles = df['valence'].quantile([0, 0.33, 0.66, 1])
+bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
+labels = ['Low', 'Medium', 'High']
+df['valence_level'] = pd.cut(df['valence'], bins=bins, labels=labels, include_lowest=True)
+percentiles = df['speechiness'].quantile([0, 0.33, 0.66, 1])
+bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
+labels = [1, 2, 3]
+df['speechiness_level'] =  pd.cut(df['speechiness'], bins=bins, labels=labels, include_lowest=True).astype(int)
+percentiles = df['fres'].quantile([0, 0.33, 0.66, 1])
+bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
+labels = [1, 2, 3]
+df['fres_level'] =  pd.cut(df['fres'], bins=bins, labels=labels, include_lowest=True).astype(int)
+percentiles = df['vocabComplex'].quantile([0, 0.33, 0.66, 1])
+bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
+labels = [1, 2, 3]
+df['vocabComplex_level'] =  pd.cut(df['vocabComplex'], bins=bins, labels=labels, include_lowest=True).astype(int)
+percentiles = df['avgSyllable'].quantile([0, 0.33, 0.66, 1])
+bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
+labels = [1, 2, 3]
+df['avgSyllable_level'] =  pd.cut(df['avgSyllable'], bins=bins, labels=labels, include_lowest=True).astype(int)
+df['difficulty'] = df['speechiness_level'] + df['fres_level'] + df['vocabComplex_level'] + df['avgSyllable_level']
+percentiles = df['difficulty'].quantile([0, 0.33, 0.66, 1])
+bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
+labels = ["Low", "Medium", "High"]
+df['difficulty_level'] =  pd.cut(df['difficulty'], bins=bins, labels=labels, include_lowest=True)
+# dance_choice = input("Which level do you want for danceability?")
+# valence_choice = input("Which level do you want for valence?")
+# difficulty_choice = input("Which level do you want for the difficulty?")
+def recommendation(df, dance_choice, valence_choice, difficulty_choice):
+    if dance_choice == "Low":
+        df = df[df['danceability_level'] == "Low"]
+    elif dance_choice == "Medium":
+        df = df[df['danceability_level'] == "Medium"]
+    elif dance_choice == "High":
+        df = df[df['danceability_level'] == "High"]
+    if valence_choice == "Negative":
+        df = df[df['valence_level'] == "Low"]
+    elif valence_choice == "Neutral":
+        df = df[df['valence_level'] == "Medium"]
+    elif valence_choice == "Positive":
+        df = df[df['valence_level'] == "High"]
+    if difficulty_choice == "Easy":
+        df = df[df['difficulty_level'] == "Low"]
+    elif difficulty_choice == "Medium":
+        df = df[df['difficulty_level'] == "Medium"]
+    elif difficulty_choice == "Hard":
+        df = df[df['difficulty_level'] == "High"]
+    chosen = df.sample() # random choose 1 song
+    return chosen

data.json ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,111 @@

+aiohttp==3.9.3
+aiosignal==1.3.1
+altair==4.0.0
+anyio==4.3.0
+appnope==0.1.4
+asttokens==2.4.1
+attrs==23.2.0
+beautifulsoup4==4.12.3
+blinker==1.7.0
+bs4==0.0.2
+cachetools==5.3.3
+certifi==2023.11.17
+charset-normalizer==3.3.2
+click==8.1.7
+comm==0.2.2
+dataclasses-json==0.5.14
+debugpy==1.8.1
+decorator==5.1.1
+distro==1.9.0
+entrypoints==0.4
+executing==2.0.1
+faiss-cpu==1.7.4
+frozenlist==1.4.1
+gitdb==4.0.11
+GitPython==3.1.43
+h11==0.14.0
+httpcore==1.0.5
+httpx==0.27.0
+hugchat==0.4.1
+idna==3.6
+ipykernel==6.29.3
+ipython==8.22.2
+jedi==0.19.1
+Jinja2==3.1.3
+joblib==1.4.0
+jsonpatch==1.33
+jsonpointer==2.4
+jsonschema==4.21.1
+jsonschema-specifications==2023.12.1
+jupyter_client==8.6.1
+jupyter_core==5.7.2
+langchain==0.1.16
+langchain-community==0.0.32
+langchain-core==0.1.42
+langchain-openai==0.1.3
+langchain-text-splitters==0.0.1
+langdetect==1.0.9
+langsmith==0.1.47
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+marshmallow==3.21.1
+matplotlib-inline==0.1.6
+mdurl==0.1.2
+multidict==6.0.5
+mypy-extensions==1.0.0
+nest-asyncio==1.6.0
+nltk==3.8.1
+numexpr==2.10.0
+numpy==1.26.4
+openai==1.17.1
+openapi-schema-pydantic==1.2.4
+orjson==3.10.0
+packaging==23.2
+pandas==2.2.1
+parso==0.8.3
+pexpect==4.9.0
+pillow==10.3.0
+platformdirs==4.2.0
+prompt-toolkit==3.0.43
+protobuf==4.25.3
+psutil==5.9.8
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pyarrow==15.0.2
+pydantic==1.10.15
+pydeck==0.8.1b0
+Pygments==2.17.2
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.0
+pytz==2024.1
+PyYAML==6.0.1
+pyzmq==25.1.2
+referencing==0.34.0
+regex==2023.12.25
+requests==2.31.0
+requests-toolbelt==1.0.0
+rich==13.7.1
+rpds-py==0.18.0
+scipy==1.12.0
+six==1.16.0
+smmap==5.0.1
+sniffio==1.3.1
+soupsieve==2.5
+SQLAlchemy==2.0.29
+stack-data==0.6.3
+streamlit==1.33.0
+streamlit-js-eval==0.1.7
+tenacity==8.2.3
+tiktoken==0.6.0
+toml==0.10.2
+toolz==0.12.1
+tornado==6.4
+tqdm==4.66.2
+traitlets==5.14.2
+typing-inspect==0.9.0
+typing_extensions==4.11.0
+tzdata==2024.1
+urllib3==2.2.1
+watchdog==4.0.0
+wcwidth==0.2.13
+yarl==1.9.4