Spaces:
Running
Running
HF Spaces Fix
Browse files- .gitattributes +4 -0
- README.md +17 -0
- app.py +66 -17
- artist_utils.py +8 -2
- recommendation.py +14 -12
- requirements.txt +5 -1
.gitattributes
CHANGED
@@ -34,3 +34,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
datasets/*.csv filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
datasets/*.csv filter=lfs diff=lfs merge=lfs -text
|
37 |
+
datasets filter=lfs diff=lfs merge=lfs -text
|
38 |
+
datasets/Global Music Artists.csv filter=lfs diff=lfs merge=lfs -text
|
39 |
+
datasets/Music.csv filter=lfs diff=lfs merge=lfs -text
|
40 |
+
datasets/data.csv filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
@@ -11,3 +11,20 @@ short_description: A youtube and spotify song and playlist downloader
|
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
14 |
+
|
15 |
+
# SongPorter
|
16 |
+
|
17 |
+
A music recommendation API that provides song recommendations and artist information.
|
18 |
+
|
19 |
+
## Endpoints
|
20 |
+
|
21 |
+
- `/` - API info and documentation
|
22 |
+
- `/recommendations/` - Get song recommendations (POST)
|
23 |
+
- `/artist-info/` - Get artist information (POST)
|
24 |
+
|
25 |
+
## Technologies
|
26 |
+
|
27 |
+
- FastAPI
|
28 |
+
- Pandas & NumPy
|
29 |
+
- Scikit-learn
|
30 |
+
- Content-based recommendation
|
app.py
CHANGED
@@ -1,34 +1,56 @@
|
|
1 |
import logging
|
2 |
-
from fastapi import FastAPI, HTTPException
|
3 |
from pydantic import BaseModel, Field
|
4 |
from typing import List, Dict, Optional
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
-
#
|
7 |
from recommendation import MusicRecommender, get_hardcoded_recommendations
|
8 |
-
# Import artist utils
|
9 |
from artist_utils import get_bulk_artist_info, load_artist_data
|
10 |
|
11 |
-
logging
|
|
|
|
|
|
|
|
|
12 |
logger = logging.getLogger(__name__)
|
13 |
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
# --- Initialize Recommender ---
|
17 |
recommender = None
|
18 |
try:
|
|
|
19 |
recommender = MusicRecommender()
|
20 |
logger.info("Music Recommender loaded successfully.")
|
21 |
except Exception as e:
|
22 |
logger.error(f"Failed to load Music Recommender: {e}", exc_info=True)
|
23 |
|
24 |
# --- Initialize Artist Data ---
|
25 |
-
# Ensure artist data is loaded on startup
|
26 |
if not load_artist_data():
|
27 |
logger.error("CRITICAL: Failed to load artist data on startup.")
|
28 |
-
#
|
29 |
|
30 |
# --- API Input Models ---
|
31 |
-
class RecommendationRequestData(BaseModel):
|
32 |
recent_song_ids: List[str] = Field(..., example=["spotify_id_1", "song_name_2"])
|
33 |
top_genres: List[str] = Field(..., example=["Pop", "Rock"])
|
34 |
limit: int = 10
|
@@ -36,29 +58,37 @@ class RecommendationRequestData(BaseModel): # Renamed from UserData for clarity
|
|
36 |
class ArtistInfoRequestData(BaseModel):
|
37 |
artist_names: List[str] = Field(..., example=["Artist Name 1", "Another Artist"])
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
# --- API Endpoints ---
|
40 |
|
41 |
@app.post("/recommendations/")
|
42 |
-
async def get_recommendations_endpoint(request_data: RecommendationRequestData):
|
43 |
if recommender is None:
|
44 |
logger.error("Recommender not available.")
|
45 |
-
return {"recommendations": get_hardcoded_recommendations(request_data.limit),
|
|
|
46 |
|
47 |
try:
|
48 |
logger.info(f"Received recommendation request: {request_data.dict()}")
|
49 |
-
#
|
50 |
-
# ... existing recommendation logic using request_data ...
|
51 |
-
|
52 |
all_recommendations = []
|
53 |
|
54 |
# 1. Content-based from recent songs
|
55 |
-
for song_id in request_data.recent_song_ids[:5]:
|
56 |
song_recommendations = recommender.find_similar_songs(song_id, n=20)
|
57 |
if song_recommendations:
|
58 |
all_recommendations.extend(song_recommendations)
|
59 |
|
60 |
# 2. Genre-based
|
61 |
-
for genre in request_data.top_genres[:3]:
|
62 |
if genre and genre != 'Unknown':
|
63 |
genre_recommendations = recommender.get_recommendations_by_genre(genre, n=10)
|
64 |
if genre_recommendations:
|
@@ -103,7 +133,7 @@ async def get_artist_info_endpoint(request_data: ArtistInfoRequestData) -> Dict[
|
|
103 |
try:
|
104 |
logger.info(f"Received artist info request for {len(request_data.artist_names)} artists.")
|
105 |
if not request_data.artist_names:
|
106 |
-
return {}
|
107 |
|
108 |
artist_info = get_bulk_artist_info(request_data.artist_names)
|
109 |
logger.info(f"Returning info for {len(artist_info)} artists.")
|
@@ -116,4 +146,23 @@ async def get_artist_info_endpoint(request_data: ArtistInfoRequestData) -> Dict[
|
|
116 |
|
117 |
@app.get("/")
|
118 |
async def root():
|
119 |
-
return {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import logging
|
2 |
+
from fastapi import FastAPI, HTTPException, Request
|
3 |
from pydantic import BaseModel, Field
|
4 |
from typing import List, Dict, Optional
|
5 |
+
import uvicorn
|
6 |
+
from fastapi.middleware.cors import CORSMiddleware
|
7 |
+
from fastapi.staticfiles import StaticFiles
|
8 |
+
import os
|
9 |
+
import time
|
10 |
|
11 |
+
# Import recommender and artist utils
|
12 |
from recommendation import MusicRecommender, get_hardcoded_recommendations
|
|
|
13 |
from artist_utils import get_bulk_artist_info, load_artist_data
|
14 |
|
15 |
+
# Configure logging
|
16 |
+
logging.basicConfig(
|
17 |
+
level=logging.INFO,
|
18 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
19 |
+
)
|
20 |
logger = logging.getLogger(__name__)
|
21 |
|
22 |
+
# Initialize FastAPI app
|
23 |
+
app = FastAPI(
|
24 |
+
title="SongPorter API",
|
25 |
+
description="Music recommendation and artist info API",
|
26 |
+
version="1.0.0",
|
27 |
+
)
|
28 |
+
|
29 |
+
# Add CORS middleware
|
30 |
+
app.add_middleware(
|
31 |
+
CORSMiddleware,
|
32 |
+
allow_origins=["*"], # Allows all origins
|
33 |
+
allow_credentials=True,
|
34 |
+
allow_methods=["*"], # Allows all methods
|
35 |
+
allow_headers=["*"], # Allows all headers
|
36 |
+
)
|
37 |
|
38 |
# --- Initialize Recommender ---
|
39 |
recommender = None
|
40 |
try:
|
41 |
+
logger.info("Initializing Music Recommender...")
|
42 |
recommender = MusicRecommender()
|
43 |
logger.info("Music Recommender loaded successfully.")
|
44 |
except Exception as e:
|
45 |
logger.error(f"Failed to load Music Recommender: {e}", exc_info=True)
|
46 |
|
47 |
# --- Initialize Artist Data ---
|
|
|
48 |
if not load_artist_data():
|
49 |
logger.error("CRITICAL: Failed to load artist data on startup.")
|
50 |
+
# App will run without artist data, but log errors.
|
51 |
|
52 |
# --- API Input Models ---
|
53 |
+
class RecommendationRequestData(BaseModel):
|
54 |
recent_song_ids: List[str] = Field(..., example=["spotify_id_1", "song_name_2"])
|
55 |
top_genres: List[str] = Field(..., example=["Pop", "Rock"])
|
56 |
limit: int = 10
|
|
|
58 |
class ArtistInfoRequestData(BaseModel):
|
59 |
artist_names: List[str] = Field(..., example=["Artist Name 1", "Another Artist"])
|
60 |
|
61 |
+
# --- Request timing middleware ---
|
62 |
+
@app.middleware("http")
|
63 |
+
async def add_process_time_header(request: Request, call_next):
|
64 |
+
start_time = time.time()
|
65 |
+
response = await call_next(request)
|
66 |
+
process_time = time.time() - start_time
|
67 |
+
response.headers["X-Process-Time"] = str(process_time)
|
68 |
+
return response
|
69 |
+
|
70 |
# --- API Endpoints ---
|
71 |
|
72 |
@app.post("/recommendations/")
|
73 |
+
async def get_recommendations_endpoint(request_data: RecommendationRequestData):
|
74 |
if recommender is None:
|
75 |
logger.error("Recommender not available.")
|
76 |
+
return {"recommendations": get_hardcoded_recommendations(request_data.limit),
|
77 |
+
"message": "Recommender unavailable, returning popular songs."}
|
78 |
|
79 |
try:
|
80 |
logger.info(f"Received recommendation request: {request_data.dict()}")
|
81 |
+
# Process recommendation logic
|
|
|
|
|
82 |
all_recommendations = []
|
83 |
|
84 |
# 1. Content-based from recent songs
|
85 |
+
for song_id in request_data.recent_song_ids[:5]: # Limit seed songs
|
86 |
song_recommendations = recommender.find_similar_songs(song_id, n=20)
|
87 |
if song_recommendations:
|
88 |
all_recommendations.extend(song_recommendations)
|
89 |
|
90 |
# 2. Genre-based
|
91 |
+
for genre in request_data.top_genres[:3]: # Limit seed genres
|
92 |
if genre and genre != 'Unknown':
|
93 |
genre_recommendations = recommender.get_recommendations_by_genre(genre, n=10)
|
94 |
if genre_recommendations:
|
|
|
133 |
try:
|
134 |
logger.info(f"Received artist info request for {len(request_data.artist_names)} artists.")
|
135 |
if not request_data.artist_names:
|
136 |
+
return {} # Return empty dict if no names provided
|
137 |
|
138 |
artist_info = get_bulk_artist_info(request_data.artist_names)
|
139 |
logger.info(f"Returning info for {len(artist_info)} artists.")
|
|
|
146 |
|
147 |
@app.get("/")
|
148 |
async def root():
|
149 |
+
return {
|
150 |
+
"message": "SongPorter API",
|
151 |
+
"endpoints": [
|
152 |
+
{"path": "/", "method": "GET", "description": "This help message"},
|
153 |
+
{"path": "/recommendations/", "method": "POST", "description": "Get song recommendations"},
|
154 |
+
{"path": "/artist-info/", "method": "POST", "description": "Get artist information"}
|
155 |
+
],
|
156 |
+
"version": "1.0.0"
|
157 |
+
}
|
158 |
+
|
159 |
+
# --- Serve static files if they exist ---
|
160 |
+
static_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static")
|
161 |
+
if os.path.exists(static_dir):
|
162 |
+
app.mount("/static", StaticFiles(directory=static_dir), name="static")
|
163 |
+
logger.info(f"Mounted static files from {static_dir}")
|
164 |
+
|
165 |
+
# --- For local development ---
|
166 |
+
if __name__ == "__main__":
|
167 |
+
port = int(os.environ.get("PORT", 8000))
|
168 |
+
uvicorn.run("app:app", host="0.0.0.0", port=port, reload=True)
|
artist_utils.py
CHANGED
@@ -17,8 +17,14 @@ def load_artist_data():
|
|
17 |
# Get the directory of the current script
|
18 |
current_dir = os.path.dirname(os.path.abspath(__file__))
|
19 |
datasets_path = os.path.join(current_dir, 'datasets')
|
20 |
-
# --- IMPORTANT:
|
21 |
-
csv_path = os.path.join(datasets_path, '
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
# --- ---
|
23 |
|
24 |
if not os.path.exists(csv_path):
|
|
|
17 |
# Get the directory of the current script
|
18 |
current_dir = os.path.dirname(os.path.abspath(__file__))
|
19 |
datasets_path = os.path.join(current_dir, 'datasets')
|
20 |
+
# --- IMPORTANT: Check for both filename variants ---
|
21 |
+
csv_path = os.path.join(datasets_path, 'Global Music Artists.csv')
|
22 |
+
if not os.path.exists(csv_path):
|
23 |
+
# Try alternative filename
|
24 |
+
csv_path = os.path.join(datasets_path, 'Global_Music_Artists.csv')
|
25 |
+
if not os.path.exists(csv_path):
|
26 |
+
logger.error(f"Artist CSV file not found at: {csv_path}")
|
27 |
+
raise FileNotFoundError(f"Artist CSV file not found in datasets directory")
|
28 |
# --- ---
|
29 |
|
30 |
if not os.path.exists(csv_path):
|
recommendation.py
CHANGED
@@ -2,15 +2,11 @@ import logging
|
|
2 |
import numpy as np
|
3 |
import pandas as pd
|
4 |
import os
|
5 |
-
from django.conf import settings
|
6 |
from datetime import datetime
|
7 |
from sklearn.metrics.pairwise import cosine_similarity
|
8 |
from sklearn.preprocessing import StandardScaler
|
9 |
from sklearn.pipeline import Pipeline
|
10 |
from sklearn.cluster import KMeans
|
11 |
-
from django.utils import timezone
|
12 |
-
from django.db.models import Count
|
13 |
-
from django.db.models.functions import TruncDate
|
14 |
|
15 |
logger = logging.getLogger(__name__)
|
16 |
|
@@ -79,7 +75,9 @@ class MusicRecommender:
|
|
79 |
|
80 |
try:
|
81 |
# Load datasets
|
82 |
-
|
|
|
|
|
83 |
|
84 |
# Skip Music.csv as it has an invalid format with song titles as column names
|
85 |
self.logger.info("Skipping Music.csv due to invalid format, using data.csv directly")
|
@@ -91,12 +89,16 @@ class MusicRecommender:
|
|
91 |
self.logger.info("Loaded data.csv dataset")
|
92 |
|
93 |
# Load genre and year data
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
|
|
|
|
|
|
|
|
100 |
|
101 |
# Prepare data and build clusters
|
102 |
self._prepare_data()
|
@@ -725,4 +727,4 @@ def update_user_recommendations(user):
|
|
725 |
|
726 |
except Exception as e:
|
727 |
logger.error(f"Error updating recommendations: {e}")
|
728 |
-
return False
|
|
|
2 |
import numpy as np
|
3 |
import pandas as pd
|
4 |
import os
|
|
|
5 |
from datetime import datetime
|
6 |
from sklearn.metrics.pairwise import cosine_similarity
|
7 |
from sklearn.preprocessing import StandardScaler
|
8 |
from sklearn.pipeline import Pipeline
|
9 |
from sklearn.cluster import KMeans
|
|
|
|
|
|
|
10 |
|
11 |
logger = logging.getLogger(__name__)
|
12 |
|
|
|
75 |
|
76 |
try:
|
77 |
# Load datasets
|
78 |
+
# Get the directory of the current script for HF Spaces compatibility
|
79 |
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
80 |
+
datasets_path = os.path.join(current_dir, 'datasets')
|
81 |
|
82 |
# Skip Music.csv as it has an invalid format with song titles as column names
|
83 |
self.logger.info("Skipping Music.csv due to invalid format, using data.csv directly")
|
|
|
89 |
self.logger.info("Loaded data.csv dataset")
|
90 |
|
91 |
# Load genre and year data
|
92 |
+
try:
|
93 |
+
self.genre_data = pd.read_csv(os.path.join(datasets_path, 'data_by_genres.csv'),
|
94 |
+
on_bad_lines='skip',
|
95 |
+
engine='python')
|
96 |
+
self.year_data = pd.read_csv(os.path.join(datasets_path, 'data_by_year.csv'),
|
97 |
+
on_bad_lines='skip',
|
98 |
+
engine='python')
|
99 |
+
self.logger.info("Loaded genre and year datasets")
|
100 |
+
except FileNotFoundError:
|
101 |
+
self.logger.warning("Genre or year datasets not found, continuing with limited functionality")
|
102 |
|
103 |
# Prepare data and build clusters
|
104 |
self._prepare_data()
|
|
|
727 |
|
728 |
except Exception as e:
|
729 |
logger.error(f"Error updating recommendations: {e}")
|
730 |
+
return False
|
requirements.txt
CHANGED
@@ -3,4 +3,8 @@ uvicorn[standard]
|
|
3 |
pandas
|
4 |
numpy
|
5 |
scikit-learn
|
6 |
-
python-dotenv # Optional, if you use environment variables
|
|
|
|
|
|
|
|
|
|
3 |
pandas
|
4 |
numpy
|
5 |
scikit-learn
|
6 |
+
python-dotenv # Optional, if you use environment variables
|
7 |
+
matplotlib # For visualizations if needed
|
8 |
+
requests # For external API calls
|
9 |
+
pillow # For image handling
|
10 |
+
plotly # Optional for interactive visualizations
|