jeonghin commited on
Commit
5738ae0
·
1 Parent(s): f521560

Deployable version

Browse files
Files changed (5) hide show
  1. app.py +12 -0
  2. app_function.py +258 -0
  3. choosingdata.py +75 -0
  4. data.json +0 -0
  5. requirements.txt +111 -0
app.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app_function import *
2
+
3
+
4
+ def main():
5
+ init()
6
+ page_title()
7
+ chat_sidebar()
8
+ chat()
9
+
10
+
11
+ if __name__ == "__main__":
12
+ main()
app_function.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_js_eval import streamlit_js_eval
3
+ import choosingdata as choice
4
+ from dotenv import load_dotenv
5
+ from langchain.text_splitter import CharacterTextSplitter
6
+ from langchain_community.embeddings import HuggingFaceInstructEmbeddings
7
+ from langchain_community.vectorstores import FAISS
8
+ from langchain_openai import ChatOpenAI
9
+ from langchain_openai import OpenAIEmbeddings
10
+ from langchain.memory import ConversationBufferMemory
11
+ from langchain.chains import ConversationalRetrievalChain
12
+ from langchain_community.llms import HuggingFaceHub
13
+
14
+
15
+ def get_text_chunks(text):
16
+ """
17
+ Splits the given text into chunks based on specified character settings.
18
+
19
+ Parameters:
20
+ - text (str): The text to be split into chunks.
21
+
22
+ Returns:
23
+ - list: A list of text chunks.
24
+ """
25
+ text_splitter = CharacterTextSplitter(
26
+ separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len
27
+ )
28
+ chunks = text_splitter.split_text(text)
29
+ return chunks
30
+
31
+
32
+ def get_vectorstore(text_chunks):
33
+ """
34
+ Generates a vector store from a list of text chunks using specified embeddings.
35
+
36
+ Parameters:
37
+ - text_chunks (list of str): Text segments to convert into vector embeddings.
38
+
39
+ Returns:
40
+ - FAISS: A FAISS vector store containing the embeddings of the text chunks.
41
+ """
42
+ embeddings = OpenAIEmbeddings(
43
+ openai_api_base="https://openai.vocareum.com/v1",
44
+ )
45
+ vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
46
+ return vectorstore
47
+
48
+
49
+ def get_conversation_chain(vectorstore):
50
+ """
51
+ Initializes a conversational retrieval chain that uses a large language model
52
+ for generating responses based on the provided vector store.
53
+
54
+ Parameters:
55
+ - vectorstore (FAISS): A vector store to be used for retrieving relevant content.
56
+
57
+ Returns:
58
+ - ConversationalRetrievalChain: An initialized conversational chain object.
59
+ """
60
+ llm = ChatOpenAI(
61
+ model_name="gpt-4-1106-preview",
62
+ openai_api_base="https://openai.vocareum.com/v1",
63
+ )
64
+
65
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
66
+ conversation_chain = ConversationalRetrievalChain.from_llm(
67
+ llm=llm, retriever=vectorstore.as_retriever(), memory=memory
68
+ )
69
+ return conversation_chain
70
+
71
+
72
+ def set_prompt(text_block):
73
+ """
74
+ Callback function that sets the chosen prompt in the session state.
75
+
76
+ Parameters:
77
+ - text_block (str): The prompt text selected by the user.
78
+ """
79
+ st.session_state["messages"].append({"role": "user", "content": text_block})
80
+ st.session_state["prompts"] = text_block
81
+
82
+
83
+ def prompts():
84
+ """
85
+ Renders clickable buttons for predefined prompts in the Streamlit application,
86
+ allowing the user to select a prompt to send to the conversation chain.
87
+ """
88
+ potential_prompts = [
89
+ f"What is the meaning of the song {st.session_state['title']} by {st.session_state['artist']}?",
90
+ f"What is the most difficult English grammar point in the song {st.session_state['title']} by {st.session_state['artist']}? Can you explain it?",
91
+ f"What is the most common English word in the song {st.session_state['title']} by {st.session_state['artist']} (excluding stopwords)? Can you give some example sentences using that word?",
92
+ f"What is the most worth learning English phrase in the song {st.session_state['title']} by {st.session_state['artist']}? Can you explain it and provide practical example using the phrase?",
93
+ ]
94
+ chosen_prompt = None
95
+ for index, text_block in enumerate(potential_prompts):
96
+ st.button(
97
+ f"Prompt {index + 1}: {text_block}", on_click=set_prompt, args=(text_block,)
98
+ )
99
+
100
+
101
+ def get_lyrics():
102
+ """
103
+ Retrieves the lyrics stored in the session state.
104
+
105
+ Returns:
106
+ - str: The lyrics of the currently selected song.
107
+ """
108
+ lyrics = st.session_state["lyrics"]
109
+ return lyrics
110
+
111
+
112
+ def page_title():
113
+ """
114
+ Sets the title of the Streamlit page based on the selected song and artist.
115
+ """
116
+ if st.session_state["title"] and st.session_state["artist"]:
117
+ st.title(
118
+ f'🎵 English Music Recommender 💬 ({st.session_state["title"]} by {st.session_state["artist"]})'
119
+ )
120
+ else:
121
+ st.title("🎵 English Music Recommender 💬")
122
+
123
+
124
+ def chat_sidebar():
125
+ """
126
+ Renders the sidebar in the Streamlit application for selecting music preferences
127
+ and handling song recommendations.
128
+ """
129
+ with st.sidebar:
130
+ st.title("💚 Music Preferences")
131
+
132
+ user_difficulty = st.sidebar.radio(
133
+ "Choose a difficulty level:", ("Easy", "Medium", "Hard")
134
+ )
135
+
136
+ user_danceability = st.sidebar.radio(
137
+ "How much do you want to dance?", ("Low", "Medium", "High")
138
+ )
139
+
140
+ user_valence = st.sidebar.radio(
141
+ "What energy are you feeling?", ("Negative", "Neutral", "Positive")
142
+ )
143
+
144
+ if not st.session_state["song_bool"]:
145
+
146
+ if st.sidebar.button("Submit"):
147
+ recommendations = choice.recommendation(
148
+ choice.df,
149
+ dance_choice=user_danceability,
150
+ valence_choice=user_valence,
151
+ difficulty_choice=user_difficulty,
152
+ )
153
+
154
+ st.session_state["title"] = recommendations["title"].values[0]
155
+ st.session_state["artist"] = recommendations["artist"].values[0]
156
+ st.session_state["lyrics"] = recommendations["lyrics"].values[0]
157
+ st.session_state["id"] = (
158
+ f'https://open.spotify.com/track/{recommendations["id"].values[0]}'
159
+ )
160
+ st.session_state["song_bool"] = True
161
+
162
+ st.rerun()
163
+
164
+ else:
165
+ if st.session_state["song_bool"]:
166
+
167
+ st.write("### We would recommend you...")
168
+ st.write(f"## {st.session_state['title']}")
169
+ st.write(f" by {st.session_state['artist']}")
170
+ st.markdown(
171
+ f'<a href="{st.session_state["id"]}"><img src="{st.session_state["icon"]}" alt="Clickable image" style="height:60px;"></a>',
172
+ unsafe_allow_html=True,
173
+ )
174
+ st.write("Please refresh the page for a new recommendation.")
175
+ if st.button("Reload page"):
176
+ streamlit_js_eval(js_expressions="parent.window.location.reload()")
177
+
178
+
179
+ def chat():
180
+ """
181
+ Manages the chat interface in the Streamlit application, handling the conversation
182
+ flow and displaying the chat history.
183
+ """
184
+ if st.session_state["lyrics"]:
185
+
186
+ text_chunks = get_text_chunks(get_lyrics())
187
+ vectorstore = get_vectorstore(text_chunks)
188
+ st.session_state.conversation = get_conversation_chain(vectorstore)
189
+
190
+ if len(st.session_state.messages) == 1:
191
+ message = st.session_state.messages[0]
192
+ with st.chat_message(message["role"]):
193
+ st.write(message["content"])
194
+ prompts()
195
+
196
+ else:
197
+ for message in st.session_state.messages:
198
+ with st.chat_message(message["role"]):
199
+ st.write(message["content"])
200
+
201
+ # User-provided prompt
202
+ if prompt := st.chat_input():
203
+ st.session_state.messages.append({"role": "user", "content": prompt})
204
+ st.session_state.prompts = prompt
205
+ with st.chat_message("user"):
206
+ st.write(prompt)
207
+
208
+ if st.session_state.messages[-1]["role"] != "system":
209
+
210
+ with st.chat_message("system"):
211
+ with st.spinner("Generating response..."):
212
+ response = st.session_state.conversation.invoke(
213
+ {"question": st.session_state.prompts}
214
+ )
215
+ st.session_state.chat_history = response["chat_history"]
216
+ message = st.session_state.chat_history[-1]
217
+ st.write(message.content)
218
+ message = {"role": "system", "content": message.content}
219
+ st.session_state.messages.append(message)
220
+
221
+ else:
222
+ st.write("You can chat with GPT once a song has been recommended to you!")
223
+
224
+
225
+ def init():
226
+ """
227
+ Initializes the session state variables used in the Streamlit application and
228
+ loads environment variables.
229
+ """
230
+ load_dotenv()
231
+
232
+ if "title" not in st.session_state:
233
+ st.session_state["title"] = ""
234
+ if "artist" not in st.session_state:
235
+ st.session_state["artist"] = ""
236
+ if "icon" not in st.session_state:
237
+ st.session_state["icon"] = (
238
+ "https://thereceptionist.com/wp-content/uploads/2021/02/Podcast-Listen-On-Spotify-1.png"
239
+ )
240
+ if "id" not in st.session_state:
241
+ st.session_state["id"] = ""
242
+ if "song_bool" not in st.session_state:
243
+ st.session_state["song_bool"] = False
244
+ if "messages" not in st.session_state.keys():
245
+ st.session_state.messages = [
246
+ {
247
+ "role": "system",
248
+ "content": "What do you want to learn about? Here are some suggested prompts: ",
249
+ }
250
+ ]
251
+ if "conversation" not in st.session_state:
252
+ st.session_state.conversation = None
253
+ if "chat_history" not in st.session_state:
254
+ st.session_state.chat_history = None
255
+ if "lyrics" not in st.session_state:
256
+ st.session_state["lyrics"] = ""
257
+ if "prompts" not in st.session_state:
258
+ st.session_state["prompts"] = ""
choosingdata.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ df = pd.read_json('data.json')
4
+
5
+ percentiles = df['danceability'].quantile([0, 0.33, 0.66, 1])
6
+ bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
7
+ labels = ['Low', 'Medium', 'High']
8
+ df['danceability_level'] = pd.cut(df['danceability'], bins=bins, labels=labels, include_lowest=True)
9
+
10
+ percentiles = df['valence'].quantile([0, 0.33, 0.66, 1])
11
+ bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
12
+ labels = ['Low', 'Medium', 'High']
13
+ df['valence_level'] = pd.cut(df['valence'], bins=bins, labels=labels, include_lowest=True)
14
+
15
+ percentiles = df['speechiness'].quantile([0, 0.33, 0.66, 1])
16
+ bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
17
+ labels = [1, 2, 3]
18
+ df['speechiness_level'] = pd.cut(df['speechiness'], bins=bins, labels=labels, include_lowest=True).astype(int)
19
+
20
+ percentiles = df['fres'].quantile([0, 0.33, 0.66, 1])
21
+ bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
22
+ labels = [1, 2, 3]
23
+ df['fres_level'] = pd.cut(df['fres'], bins=bins, labels=labels, include_lowest=True).astype(int)
24
+
25
+ percentiles = df['vocabComplex'].quantile([0, 0.33, 0.66, 1])
26
+ bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
27
+ labels = [1, 2, 3]
28
+ df['vocabComplex_level'] = pd.cut(df['vocabComplex'], bins=bins, labels=labels, include_lowest=True).astype(int)
29
+
30
+ percentiles = df['avgSyllable'].quantile([0, 0.33, 0.66, 1])
31
+ bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
32
+ labels = [1, 2, 3]
33
+ df['avgSyllable_level'] = pd.cut(df['avgSyllable'], bins=bins, labels=labels, include_lowest=True).astype(int)
34
+
35
+ df['difficulty'] = df['speechiness_level'] + df['fres_level'] + df['vocabComplex_level'] + df['avgSyllable_level']
36
+
37
+ percentiles = df['difficulty'].quantile([0, 0.33, 0.66, 1])
38
+ bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
39
+ labels = ["Low", "Medium", "High"]
40
+ df['difficulty_level'] = pd.cut(df['difficulty'], bins=bins, labels=labels, include_lowest=True)
41
+
42
+ # dance_choice = input("Which level do you want for danceability?")
43
+ # valence_choice = input("Which level do you want for valence?")
44
+ # difficulty_choice = input("Which level do you want for the difficulty?")
45
+
46
+ def recommendation(df, dance_choice, valence_choice, difficulty_choice):
47
+ if dance_choice == "Low":
48
+ df = df[df['danceability_level'] == "Low"]
49
+
50
+ elif dance_choice == "Medium":
51
+ df = df[df['danceability_level'] == "Medium"]
52
+
53
+ elif dance_choice == "High":
54
+ df = df[df['danceability_level'] == "High"]
55
+
56
+ if valence_choice == "Negative":
57
+ df = df[df['valence_level'] == "Low"]
58
+
59
+ elif valence_choice == "Neutral":
60
+ df = df[df['valence_level'] == "Medium"]
61
+
62
+ elif valence_choice == "Positive":
63
+ df = df[df['valence_level'] == "High"]
64
+
65
+ if difficulty_choice == "Easy":
66
+ df = df[df['difficulty_level'] == "Low"]
67
+
68
+ elif difficulty_choice == "Medium":
69
+ df = df[df['difficulty_level'] == "Medium"]
70
+
71
+ elif difficulty_choice == "Hard":
72
+ df = df[df['difficulty_level'] == "High"]
73
+
74
+ chosen = df.sample() # random choose 1 song
75
+ return chosen
data.json ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.9.3
2
+ aiosignal==1.3.1
3
+ altair==4.0.0
4
+ anyio==4.3.0
5
+ appnope==0.1.4
6
+ asttokens==2.4.1
7
+ attrs==23.2.0
8
+ beautifulsoup4==4.12.3
9
+ blinker==1.7.0
10
+ bs4==0.0.2
11
+ cachetools==5.3.3
12
+ certifi==2023.11.17
13
+ charset-normalizer==3.3.2
14
+ click==8.1.7
15
+ comm==0.2.2
16
+ dataclasses-json==0.5.14
17
+ debugpy==1.8.1
18
+ decorator==5.1.1
19
+ distro==1.9.0
20
+ entrypoints==0.4
21
+ executing==2.0.1
22
+ faiss-cpu==1.7.4
23
+ frozenlist==1.4.1
24
+ gitdb==4.0.11
25
+ GitPython==3.1.43
26
+ h11==0.14.0
27
+ httpcore==1.0.5
28
+ httpx==0.27.0
29
+ hugchat==0.4.1
30
+ idna==3.6
31
+ ipykernel==6.29.3
32
+ ipython==8.22.2
33
+ jedi==0.19.1
34
+ Jinja2==3.1.3
35
+ joblib==1.4.0
36
+ jsonpatch==1.33
37
+ jsonpointer==2.4
38
+ jsonschema==4.21.1
39
+ jsonschema-specifications==2023.12.1
40
+ jupyter_client==8.6.1
41
+ jupyter_core==5.7.2
42
+ langchain==0.1.16
43
+ langchain-community==0.0.32
44
+ langchain-core==0.1.42
45
+ langchain-openai==0.1.3
46
+ langchain-text-splitters==0.0.1
47
+ langdetect==1.0.9
48
+ langsmith==0.1.47
49
+ markdown-it-py==3.0.0
50
+ MarkupSafe==2.1.5
51
+ marshmallow==3.21.1
52
+ matplotlib-inline==0.1.6
53
+ mdurl==0.1.2
54
+ multidict==6.0.5
55
+ mypy-extensions==1.0.0
56
+ nest-asyncio==1.6.0
57
+ nltk==3.8.1
58
+ numexpr==2.10.0
59
+ numpy==1.26.4
60
+ openai==1.17.1
61
+ openapi-schema-pydantic==1.2.4
62
+ orjson==3.10.0
63
+ packaging==23.2
64
+ pandas==2.2.1
65
+ parso==0.8.3
66
+ pexpect==4.9.0
67
+ pillow==10.3.0
68
+ platformdirs==4.2.0
69
+ prompt-toolkit==3.0.43
70
+ protobuf==4.25.3
71
+ psutil==5.9.8
72
+ ptyprocess==0.7.0
73
+ pure-eval==0.2.2
74
+ pyarrow==15.0.2
75
+ pydantic==1.10.15
76
+ pydeck==0.8.1b0
77
+ Pygments==2.17.2
78
+ python-dateutil==2.9.0.post0
79
+ python-dotenv==1.0.0
80
+ pytz==2024.1
81
+ PyYAML==6.0.1
82
+ pyzmq==25.1.2
83
+ referencing==0.34.0
84
+ regex==2023.12.25
85
+ requests==2.31.0
86
+ requests-toolbelt==1.0.0
87
+ rich==13.7.1
88
+ rpds-py==0.18.0
89
+ scipy==1.12.0
90
+ six==1.16.0
91
+ smmap==5.0.1
92
+ sniffio==1.3.1
93
+ soupsieve==2.5
94
+ SQLAlchemy==2.0.29
95
+ stack-data==0.6.3
96
+ streamlit==1.33.0
97
+ streamlit-js-eval==0.1.7
98
+ tenacity==8.2.3
99
+ tiktoken==0.6.0
100
+ toml==0.10.2
101
+ toolz==0.12.1
102
+ tornado==6.4
103
+ tqdm==4.66.2
104
+ traitlets==5.14.2
105
+ typing-inspect==0.9.0
106
+ typing_extensions==4.11.0
107
+ tzdata==2024.1
108
+ urllib3==2.2.1
109
+ watchdog==4.0.0
110
+ wcwidth==0.2.13
111
+ yarl==1.9.4