Spaces:
Runtime error
Runtime error
from dotenv import load_dotenv | |
load_dotenv(dotenv_path="../env_vars.env") | |
import json | |
from pathlib import Path | |
import os | |
from langchain.chains import LLMChain | |
from langchain.chat_models import ChatOpenAI | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.prompts import PromptTemplate | |
from langchain.vectorstores import DeepLake | |
from utils import clean_emotions_json | |
""" | |
This function takes all the songs we have and use the lyrics to create a list of 8 emotions. | |
These 8 emotions will then be used for similarity matching with user prompt emotions, instead of using the entire lyrics. | |
""" | |
def generate_emotion_from_lyrics(input_file:str, prompt_path:str, output_file:str, clean_output=True) -> None: | |
prompt = PromptTemplate( | |
input_variables=["song_lyrics"], | |
template=Path(prompt_path).read_text(), | |
) | |
llm = ChatOpenAI(temperature=0.8) | |
chain = LLMChain(llm=llm, prompt=prompt) | |
# Read file that has scraped lyrics. | |
with open(input_file, "r") as f: | |
lyrical_data = json.load(f) | |
''' | |
'song' looks like as follows | |
{ | |
"song_name": "Cruel Summer", | |
"iframe": "<iframe style=\"border-radius: 12px\" width=\"100%\" height=\"152\" title=\"Spotify Embed:... | |
"lyrics": "Fever dream high in the quiet of the nightYou know that I caught it Bad... | |
} | |
''' | |
# Collect 8 common emotions conveyed in the songs using their lyrics | |
emotion_data = [] | |
for song in lyrical_data: | |
print(f"{song['song_name']}") | |
emotions = chain.run(song_lyrics=song["lyrics"]) | |
emotion_data.append( | |
{ | |
"song_name": song["song_name"], | |
"iframe": song["iframe"], | |
"emotions": emotions | |
}) | |
print(emotions) | |
# Write to output file which will be used to store the song emotions as embeddings | |
with open(output_file, "w") as f: | |
json.dump(emotion_data, f, indent=4) | |
print(f"Spotify song, url and song emotions saved to {output_file}") | |
# Clean the generated emotions | |
if clean_output: | |
clean_emotions_json("../data/spotify_song_url_emotions.json") | |
def create_db(dataset_path: str, input_file: str) -> DeepLake: | |
with open(input_file, "r") as f: | |
emotion_data = json.load(f) | |
texts = [] | |
metadatas = [] | |
''' | |
{ | |
"song_name": "Mastermind", | |
"iframe": "<iframe style=\"border-radius: 12px\" width=\"100%\" height=\"152\" title=\"Spotify Embed: | |
"emotions": "excitement, happiness, love, desire, confidence, determination, vulnerability, manipulation" | |
} | |
''' | |
for song in emotion_data: | |
texts.append(song["emotions"]) | |
metadatas.append( | |
{ | |
"name": song["song_name"], | |
"iframe": song["iframe"], | |
} | |
) | |
embeddings = OpenAIEmbeddings(model=os.environ['MODEL']) | |
db = DeepLake.from_texts( | |
texts, embeddings, metadatas=metadatas, dataset_path=dataset_path | |
) | |
return db | |
def create_emotion_embeddings(input_file:str) ->None: | |
dataset_path = f"hub://{os.environ['ACTIVELOOP_ORG_ID']}/{os.environ['DATASET']}" | |
create_db(dataset_path, input_file) | |
if __name__ == "__main__": | |
# Get top 8 emotions for each song lyric that was scraped, using GPT 3.5 Turbo | |
prompt_path = "../prompts/get_song_emotions.prompt" | |
input_file = "../data/spotify_song_url_lyrics.json" | |
output_file = "../data/spotify_song_url_emotions.json" | |
generate_emotion_from_lyrics(input_file, prompt_path, output_file, clean_output=True) | |
# Convert the generated emotions to embeddings (using 'text-embedding-ada-002' model) and save them in a vector store(DeepLake) | |
input_file = "../data/spotify_song_url_emotions.json" | |
create_emotion_embeddings(input_file) |