MonilM commited on
Commit
f1fe2d6
·
1 Parent(s): 382e186

HF Spaces Fix

Browse files
Files changed (6) hide show
  1. .gitattributes +4 -0
  2. README.md +17 -0
  3. app.py +66 -17
  4. artist_utils.py +8 -2
  5. recommendation.py +14 -12
  6. requirements.txt +5 -1
.gitattributes CHANGED
@@ -34,3 +34,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  datasets/*.csv filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  datasets/*.csv filter=lfs diff=lfs merge=lfs -text
37
+ datasets filter=lfs diff=lfs merge=lfs -text
38
+ datasets/Global Music Artists.csv filter=lfs diff=lfs merge=lfs -text
39
+ datasets/Music.csv filter=lfs diff=lfs merge=lfs -text
40
+ datasets/data.csv filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -11,3 +11,20 @@ short_description: A youtube and spotify song and playlist downloader
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
+
15
+ # SongPorter
16
+
17
+ A music recommendation API that provides song recommendations and artist information.
18
+
19
+ ## Endpoints
20
+
21
+ - `/` - API info and documentation
22
+ - `/recommendations/` - Get song recommendations (POST)
23
+ - `/artist-info/` - Get artist information (POST)
24
+
25
+ ## Technologies
26
+
27
+ - FastAPI
28
+ - Pandas & NumPy
29
+ - Scikit-learn
30
+ - Content-based recommendation
app.py CHANGED
@@ -1,34 +1,56 @@
1
  import logging
2
- from fastapi import FastAPI, HTTPException
3
  from pydantic import BaseModel, Field
4
  from typing import List, Dict, Optional
 
 
 
 
 
5
 
6
- # If using recommender.py:
7
  from recommendation import MusicRecommender, get_hardcoded_recommendations
8
- # Import artist utils
9
  from artist_utils import get_bulk_artist_info, load_artist_data
10
 
11
- logging.basicConfig(level=logging.INFO)
 
 
 
 
12
  logger = logging.getLogger(__name__)
13
 
14
- app = FastAPI()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  # --- Initialize Recommender ---
17
  recommender = None
18
  try:
 
19
  recommender = MusicRecommender()
20
  logger.info("Music Recommender loaded successfully.")
21
  except Exception as e:
22
  logger.error(f"Failed to load Music Recommender: {e}", exc_info=True)
23
 
24
  # --- Initialize Artist Data ---
25
- # Ensure artist data is loaded on startup
26
  if not load_artist_data():
27
  logger.error("CRITICAL: Failed to load artist data on startup.")
28
- # Decide if the app should run without artist data. For now, it will, but log errors.
29
 
30
  # --- API Input Models ---
31
- class RecommendationRequestData(BaseModel): # Renamed from UserData for clarity
32
  recent_song_ids: List[str] = Field(..., example=["spotify_id_1", "song_name_2"])
33
  top_genres: List[str] = Field(..., example=["Pop", "Rock"])
34
  limit: int = 10
@@ -36,29 +58,37 @@ class RecommendationRequestData(BaseModel): # Renamed from UserData for clarity
36
  class ArtistInfoRequestData(BaseModel):
37
  artist_names: List[str] = Field(..., example=["Artist Name 1", "Another Artist"])
38
 
 
 
 
 
 
 
 
 
 
39
  # --- API Endpoints ---
40
 
41
  @app.post("/recommendations/")
42
- async def get_recommendations_endpoint(request_data: RecommendationRequestData): # Use renamed model
43
  if recommender is None:
44
  logger.error("Recommender not available.")
45
- return {"recommendations": get_hardcoded_recommendations(request_data.limit), "message": "Recommender unavailable, returning popular songs."}
 
46
 
47
  try:
48
  logger.info(f"Received recommendation request: {request_data.dict()}")
49
- # (Keep the recommendation logic from the previous step here)
50
- # ... existing recommendation logic using request_data ...
51
-
52
  all_recommendations = []
53
 
54
  # 1. Content-based from recent songs
55
- for song_id in request_data.recent_song_ids[:5]: # Limit seed songs
56
  song_recommendations = recommender.find_similar_songs(song_id, n=20)
57
  if song_recommendations:
58
  all_recommendations.extend(song_recommendations)
59
 
60
  # 2. Genre-based
61
- for genre in request_data.top_genres[:3]: # Limit seed genres
62
  if genre and genre != 'Unknown':
63
  genre_recommendations = recommender.get_recommendations_by_genre(genre, n=10)
64
  if genre_recommendations:
@@ -103,7 +133,7 @@ async def get_artist_info_endpoint(request_data: ArtistInfoRequestData) -> Dict[
103
  try:
104
  logger.info(f"Received artist info request for {len(request_data.artist_names)} artists.")
105
  if not request_data.artist_names:
106
- return {} # Return empty dict if no names provided
107
 
108
  artist_info = get_bulk_artist_info(request_data.artist_names)
109
  logger.info(f"Returning info for {len(artist_info)} artists.")
@@ -116,4 +146,23 @@ async def get_artist_info_endpoint(request_data: ArtistInfoRequestData) -> Dict[
116
 
117
  @app.get("/")
118
  async def root():
119
- return {"message": "TuneVault Recommender & Artist Info API"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import logging
2
+ from fastapi import FastAPI, HTTPException, Request
3
  from pydantic import BaseModel, Field
4
  from typing import List, Dict, Optional
5
+ import uvicorn
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+ from fastapi.staticfiles import StaticFiles
8
+ import os
9
+ import time
10
 
11
+ # Import recommender and artist utils
12
  from recommendation import MusicRecommender, get_hardcoded_recommendations
 
13
  from artist_utils import get_bulk_artist_info, load_artist_data
14
 
15
+ # Configure logging
16
+ logging.basicConfig(
17
+ level=logging.INFO,
18
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
19
+ )
20
  logger = logging.getLogger(__name__)
21
 
22
+ # Initialize FastAPI app
23
+ app = FastAPI(
24
+ title="SongPorter API",
25
+ description="Music recommendation and artist info API",
26
+ version="1.0.0",
27
+ )
28
+
29
+ # Add CORS middleware
30
+ app.add_middleware(
31
+ CORSMiddleware,
32
+ allow_origins=["*"], # Allows all origins
33
+ allow_credentials=True,
34
+ allow_methods=["*"], # Allows all methods
35
+ allow_headers=["*"], # Allows all headers
36
+ )
37
 
38
  # --- Initialize Recommender ---
39
  recommender = None
40
  try:
41
+ logger.info("Initializing Music Recommender...")
42
  recommender = MusicRecommender()
43
  logger.info("Music Recommender loaded successfully.")
44
  except Exception as e:
45
  logger.error(f"Failed to load Music Recommender: {e}", exc_info=True)
46
 
47
  # --- Initialize Artist Data ---
 
48
  if not load_artist_data():
49
  logger.error("CRITICAL: Failed to load artist data on startup.")
50
+ # App will run without artist data, but log errors.
51
 
52
  # --- API Input Models ---
53
+ class RecommendationRequestData(BaseModel):
54
  recent_song_ids: List[str] = Field(..., example=["spotify_id_1", "song_name_2"])
55
  top_genres: List[str] = Field(..., example=["Pop", "Rock"])
56
  limit: int = 10
 
58
  class ArtistInfoRequestData(BaseModel):
59
  artist_names: List[str] = Field(..., example=["Artist Name 1", "Another Artist"])
60
 
61
+ # --- Request timing middleware ---
62
+ @app.middleware("http")
63
+ async def add_process_time_header(request: Request, call_next):
64
+ start_time = time.time()
65
+ response = await call_next(request)
66
+ process_time = time.time() - start_time
67
+ response.headers["X-Process-Time"] = str(process_time)
68
+ return response
69
+
70
  # --- API Endpoints ---
71
 
72
  @app.post("/recommendations/")
73
+ async def get_recommendations_endpoint(request_data: RecommendationRequestData):
74
  if recommender is None:
75
  logger.error("Recommender not available.")
76
+ return {"recommendations": get_hardcoded_recommendations(request_data.limit),
77
+ "message": "Recommender unavailable, returning popular songs."}
78
 
79
  try:
80
  logger.info(f"Received recommendation request: {request_data.dict()}")
81
+ # Process recommendation logic
 
 
82
  all_recommendations = []
83
 
84
  # 1. Content-based from recent songs
85
+ for song_id in request_data.recent_song_ids[:5]: # Limit seed songs
86
  song_recommendations = recommender.find_similar_songs(song_id, n=20)
87
  if song_recommendations:
88
  all_recommendations.extend(song_recommendations)
89
 
90
  # 2. Genre-based
91
+ for genre in request_data.top_genres[:3]: # Limit seed genres
92
  if genre and genre != 'Unknown':
93
  genre_recommendations = recommender.get_recommendations_by_genre(genre, n=10)
94
  if genre_recommendations:
 
133
  try:
134
  logger.info(f"Received artist info request for {len(request_data.artist_names)} artists.")
135
  if not request_data.artist_names:
136
+ return {} # Return empty dict if no names provided
137
 
138
  artist_info = get_bulk_artist_info(request_data.artist_names)
139
  logger.info(f"Returning info for {len(artist_info)} artists.")
 
146
 
147
  @app.get("/")
148
  async def root():
149
+ return {
150
+ "message": "SongPorter API",
151
+ "endpoints": [
152
+ {"path": "/", "method": "GET", "description": "This help message"},
153
+ {"path": "/recommendations/", "method": "POST", "description": "Get song recommendations"},
154
+ {"path": "/artist-info/", "method": "POST", "description": "Get artist information"}
155
+ ],
156
+ "version": "1.0.0"
157
+ }
158
+
159
+ # --- Serve static files if they exist ---
160
+ static_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static")
161
+ if os.path.exists(static_dir):
162
+ app.mount("/static", StaticFiles(directory=static_dir), name="static")
163
+ logger.info(f"Mounted static files from {static_dir}")
164
+
165
+ # --- For local development ---
166
+ if __name__ == "__main__":
167
+ port = int(os.environ.get("PORT", 8000))
168
+ uvicorn.run("app:app", host="0.0.0.0", port=port, reload=True)
artist_utils.py CHANGED
@@ -17,8 +17,14 @@ def load_artist_data():
17
  # Get the directory of the current script
18
  current_dir = os.path.dirname(os.path.abspath(__file__))
19
  datasets_path = os.path.join(current_dir, 'datasets')
20
- # --- IMPORTANT: Adjust this filename if yours is different ---
21
- csv_path = os.path.join(datasets_path, 'Global_Music_Artists.csv')
 
 
 
 
 
 
22
  # --- ---
23
 
24
  if not os.path.exists(csv_path):
 
17
  # Get the directory of the current script
18
  current_dir = os.path.dirname(os.path.abspath(__file__))
19
  datasets_path = os.path.join(current_dir, 'datasets')
20
+ # --- IMPORTANT: Check for both filename variants ---
21
+ csv_path = os.path.join(datasets_path, 'Global Music Artists.csv')
22
+ if not os.path.exists(csv_path):
23
+ # Try alternative filename
24
+ csv_path = os.path.join(datasets_path, 'Global_Music_Artists.csv')
25
+ if not os.path.exists(csv_path):
26
+ logger.error(f"Artist CSV file not found at: {csv_path}")
27
+ raise FileNotFoundError(f"Artist CSV file not found in datasets directory")
28
  # --- ---
29
 
30
  if not os.path.exists(csv_path):
recommendation.py CHANGED
@@ -2,15 +2,11 @@ import logging
2
  import numpy as np
3
  import pandas as pd
4
  import os
5
- from django.conf import settings
6
  from datetime import datetime
7
  from sklearn.metrics.pairwise import cosine_similarity
8
  from sklearn.preprocessing import StandardScaler
9
  from sklearn.pipeline import Pipeline
10
  from sklearn.cluster import KMeans
11
- from django.utils import timezone
12
- from django.db.models import Count
13
- from django.db.models.functions import TruncDate
14
 
15
  logger = logging.getLogger(__name__)
16
 
@@ -79,7 +75,9 @@ class MusicRecommender:
79
 
80
  try:
81
  # Load datasets
82
- datasets_path = os.path.join(settings.BASE_DIR, 'songs', 'datasets')
 
 
83
 
84
  # Skip Music.csv as it has an invalid format with song titles as column names
85
  self.logger.info("Skipping Music.csv due to invalid format, using data.csv directly")
@@ -91,12 +89,16 @@ class MusicRecommender:
91
  self.logger.info("Loaded data.csv dataset")
92
 
93
  # Load genre and year data
94
- self.genre_data = pd.read_csv(os.path.join(datasets_path, 'data_by_genres.csv'),
95
- on_bad_lines='skip',
96
- engine='python')
97
- self.year_data = pd.read_csv(os.path.join(datasets_path, 'data_by_year.csv'),
98
- on_bad_lines='skip',
99
- engine='python')
 
 
 
 
100
 
101
  # Prepare data and build clusters
102
  self._prepare_data()
@@ -725,4 +727,4 @@ def update_user_recommendations(user):
725
 
726
  except Exception as e:
727
  logger.error(f"Error updating recommendations: {e}")
728
- return False
 
2
  import numpy as np
3
  import pandas as pd
4
  import os
 
5
  from datetime import datetime
6
  from sklearn.metrics.pairwise import cosine_similarity
7
  from sklearn.preprocessing import StandardScaler
8
  from sklearn.pipeline import Pipeline
9
  from sklearn.cluster import KMeans
 
 
 
10
 
11
  logger = logging.getLogger(__name__)
12
 
 
75
 
76
  try:
77
  # Load datasets
78
+ # Get the directory of the current script for HF Spaces compatibility
79
+ current_dir = os.path.dirname(os.path.abspath(__file__))
80
+ datasets_path = os.path.join(current_dir, 'datasets')
81
 
82
  # Skip Music.csv as it has an invalid format with song titles as column names
83
  self.logger.info("Skipping Music.csv due to invalid format, using data.csv directly")
 
89
  self.logger.info("Loaded data.csv dataset")
90
 
91
  # Load genre and year data
92
+ try:
93
+ self.genre_data = pd.read_csv(os.path.join(datasets_path, 'data_by_genres.csv'),
94
+ on_bad_lines='skip',
95
+ engine='python')
96
+ self.year_data = pd.read_csv(os.path.join(datasets_path, 'data_by_year.csv'),
97
+ on_bad_lines='skip',
98
+ engine='python')
99
+ self.logger.info("Loaded genre and year datasets")
100
+ except FileNotFoundError:
101
+ self.logger.warning("Genre or year datasets not found, continuing with limited functionality")
102
 
103
  # Prepare data and build clusters
104
  self._prepare_data()
 
727
 
728
  except Exception as e:
729
  logger.error(f"Error updating recommendations: {e}")
730
+ return False
requirements.txt CHANGED
@@ -3,4 +3,8 @@ uvicorn[standard]
3
  pandas
4
  numpy
5
  scikit-learn
6
- python-dotenv # Optional, if you use environment variables
 
 
 
 
 
3
  pandas
4
  numpy
5
  scikit-learn
6
+ python-dotenv # Optional, if you use environment variables
7
+ matplotlib # For visualizations if needed
8
+ requests # For external API calls
9
+ pillow # For image handling
10
+ plotly # Optional for interactive visualizations