MonilM commited on
Commit
b5976f7
·
1 Parent(s): 2478f41

HF Spaces FIx#7

Browse files
Files changed (2) hide show
  1. app.py +45 -35
  2. artist_utils.py +148 -109
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import logging
2
- from fastapi import FastAPI, HTTPException, Request
3
  from pydantic import BaseModel, Field
4
  from typing import List, Dict, Optional
5
  import uvicorn
@@ -10,7 +10,7 @@ import time
10
 
11
  # Import recommender and artist utils
12
  from recommendation import MusicRecommender, get_hardcoded_recommendations
13
- from artist_utils import get_bulk_artist_info, load_artist_data
14
 
15
  # Configure logging
16
  logging.basicConfig(
@@ -157,50 +157,60 @@ async def get_recommendations_endpoint(request_data: RecommendationRequestData):
157
 
158
 
159
  @app.post("/artist-info/")
160
- # --- FIX: Update endpoint signature and logic ---
161
  async def get_artist_info_endpoint(request_data: ArtistInfoRequestData):
162
  """
163
- Endpoint to get information for a single artist (adapted for backend).
164
- IMPORTANT: Returns the info in the format expected by the API schema.
165
  """
166
  try:
167
- artist_name = request_data.artist_name # Get the single name
168
  logger.info(f"Received artist info request for artist: {artist_name}")
 
169
  if not artist_name:
170
- raise HTTPException(status_code=400, detail="artist_name field cannot be empty")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
- # Call the bulk function with a list containing the single name
173
- artist_info_map = get_bulk_artist_info([artist_name])
 
 
 
 
 
 
174
 
175
- # Get the artist info for this specific artist
176
- artist_info = artist_info_map.get(artist_name, {})
177
 
178
- # Format the response according to the API schema expectations - wrap values in a dictionary
179
- # The response validation is expecting dictionary values, not string values
180
- if artist_info:
181
- # Format the response to match what the API schema expects
182
- response = {
183
- 'artist': {'name': artist_name},
184
- 'artist_img': {'url': artist_info.get('artist_img', '')},
185
- 'country': {'name': artist_info.get('country', 'Unknown')},
186
- 'artist_genre': {'name': artist_info.get('artist_genre', 'Unknown')}
187
  }
188
- logger.info(f"Returning info for artist: {artist_name}")
189
- return response
190
- else:
191
- # Return default values in the expected dictionary format
192
- default_img = "https://media.istockphoto.com/id/1298261537/vector/blank-man-profile-head-icon-placeholder.jpg?s=612x612&w=0&k=20&c=CeT1RVWZzQDay4t54ookMaFsdi7ZHVFg2Y5v7hxigCA="
193
- default_response = {
194
- 'artist': {'name': artist_name},
195
- 'artist_img': {'url': default_img},
196
- 'country': {'name': 'Unknown'},
197
- 'artist_genre': {'name': 'Unknown'}
198
- }
199
- logger.warning(f"No info found for artist: {artist_name}, returning default values")
200
- return default_response
201
-
202
  except Exception as e:
203
- logger.error(f"Error fetching artist info: {e}", exc_info=True)
204
  raise HTTPException(status_code=500, detail=f"Failed to fetch artist info: {str(e)}")
205
 
206
  @app.get("/")
 
1
  import logging
2
+ from fastapi import FastAPI, HTTPException, Request, Body
3
  from pydantic import BaseModel, Field
4
  from typing import List, Dict, Optional
5
  import uvicorn
 
10
 
11
  # Import recommender and artist utils
12
  from recommendation import MusicRecommender, get_hardcoded_recommendations
13
+ from artist_utils import get_bulk_artist_info, load_artist_data, get_artist_info
14
 
15
  # Configure logging
16
  logging.basicConfig(
 
157
 
158
 
159
  @app.post("/artist-info/")
 
160
  async def get_artist_info_endpoint(request_data: ArtistInfoRequestData):
161
  """
162
+ Endpoint to get information for a single artist in the format expected by Django views.
163
+ Returns data with values directly in braces, not as key-value pairs.
164
  """
165
  try:
166
+ artist_name = request_data.artist_name
167
  logger.info(f"Received artist info request for artist: {artist_name}")
168
+
169
  if not artist_name:
170
+ raise HTTPException(status_code=400, detail="artist_name field cannot be empty")
171
+
172
+ # Get artist info from CSV data
173
+ artist_info = get_artist_info(artist_name)
174
+ logger.info(f"Returning info for artist: {artist_name}")
175
+
176
+ # Return in the exact format expected by Django's ArtistSerializer
177
+ return {
178
+ 'artist': {artist_info['artist']},
179
+ 'artist_img': {artist_info['artist_img']},
180
+ 'country': {artist_info['country']},
181
+ 'artist_genre': {artist_info['artist_genre']}
182
+ }
183
+
184
+ except Exception as e:
185
+ logger.error(f"Error fetching artist info: {e}", exc_info=True)
186
+ raise HTTPException(status_code=500, detail=f"Failed to fetch artist info: {str(e)}")
187
 
188
+ @app.post("/artists/")
189
+ async def get_multiple_artist_info(artist_names: List[str] = Body(...)):
190
+ """
191
+ Get information for multiple artists in one request.
192
+ Format matches the single artist endpoint with values directly in braces, not as key-value pairs.
193
+ """
194
+ try:
195
+ logger.info(f"Received request for multiple artists: {artist_names}")
196
 
197
+ results = {}
 
198
 
199
+ for name in artist_names:
200
+ info = get_artist_info(name)
201
+
202
+ # Format response in the expected format with values directly in braces
203
+ results[name] = {
204
+ 'artist': {info['artist']},
205
+ 'artist_img': {info['artist_img']},
206
+ 'country': {info['country']},
207
+ 'artist_genre': {info['artist_genre']}
208
  }
209
+
210
+ return JSONResponse(content=results)
211
+
 
 
 
 
 
 
 
 
 
 
 
212
  except Exception as e:
213
+ logger.error(f"Error fetching multiple artist info: {e}", exc_info=True)
214
  raise HTTPException(status_code=500, detail=f"Failed to fetch artist info: {str(e)}")
215
 
216
  @app.get("/")
artist_utils.py CHANGED
@@ -1,140 +1,179 @@
 
1
  import pandas as pd
2
  import os
3
- import logging
4
 
 
5
  logger = logging.getLogger(__name__)
6
 
 
7
  ARTIST_DATA = None
8
- ARTIST_MAP = {} # Cache for faster lookups {artist_name_lower: {info}}
 
9
 
10
  def load_artist_data():
11
- """Loads the artist dataset from the CSV file."""
12
  global ARTIST_DATA, ARTIST_MAP
13
- if ARTIST_DATA is not None:
14
- return True # Already loaded
15
-
16
  try:
17
  # Get the directory of the current script
18
  current_dir = os.path.dirname(os.path.abspath(__file__))
19
  datasets_path = os.path.join(current_dir, 'datasets')
20
- # --- IMPORTANT: Check for both filename variants ---
21
- csv_path = os.path.join(datasets_path, 'Global Music Artists.csv')
22
- if not os.path.exists(csv_path):
 
23
  # Try alternative filename
24
- csv_path = os.path.join(datasets_path, 'Global_Music_Artists.csv')
25
- if not os.path.exists(csv_path):
26
- logger.error(f"Artist CSV file not found at: {csv_path}")
27
- raise FileNotFoundError(f"Artist CSV file not found in datasets directory")
28
- # --- ---
29
-
30
- if not os.path.exists(csv_path):
31
- logger.error(f"Artist CSV file not found at: {csv_path}")
32
- raise FileNotFoundError(f"Artist CSV file not found at: {csv_path}")
33
 
34
- # Specify encoding if necessary, common ones are 'utf-8' or 'latin1'
35
  try:
36
- ARTIST_DATA = pd.read_csv(csv_path, on_bad_lines='skip', engine='python', encoding='utf-8')
37
  except UnicodeDecodeError:
38
- logger.warning("UTF-8 decoding failed, trying latin1 for artist CSV.")
39
- ARTIST_DATA = pd.read_csv(csv_path, on_bad_lines='skip', engine='python', encoding='latin1')
40
-
41
- logger.info(f"Loaded artist dataset: {csv_path} with {len(ARTIST_DATA)} rows.")
42
- logger.info(f"Artist data columns: {ARTIST_DATA.columns.tolist()}")
43
-
44
- # --- Adjust column mapping based on actual CSV columns ---
45
- # From error log, we found: ['artist_name', 'artist_genre', 'artist_img', 'artist_id', 'country', 'Unnamed: 5']
46
- # Map them to the expected column names
47
- column_mapping = {
48
- 'Artist': 'artist_name',
49
- 'Artist Img': 'artist_img',
50
- 'Country': 'country',
51
- 'Artist Genre': 'artist_genre'
52
- }
53
-
54
- # Check which of our expected columns are actually present in the CSV
55
- actual_columns = ARTIST_DATA.columns.tolist()
56
-
57
- # Create a mapping from expected column names to actual ones
58
- adjusted_mapping = {expected: actual for expected, actual in column_mapping.items()
59
- if actual in actual_columns}
60
-
61
- # Log the mapping being used
62
- logger.info(f"Using column mapping: {adjusted_mapping}")
63
-
64
- # Modified column check
65
- if 'artist_name' not in actual_columns:
66
- logger.error(f"Critical: 'artist_name' column not found in artist CSV.")
67
- return False
68
-
69
- # Pre-process and build the lookup map (lowercase artist names for case-insensitive matching)
70
- # Ensure 'artist_name' column exists (which replaces 'Artist')
71
  if 'artist_name' in ARTIST_DATA.columns:
72
- ARTIST_DATA['artist_lower'] = ARTIST_DATA['artist_name'].str.lower()
73
- ARTIST_DATA.set_index('artist_lower', inplace=True)
74
- # Convert relevant parts to a dictionary for faster lookup
75
- # Select only the columns we intend to return
76
- lookup_cols = ['artist_img', 'country', 'artist_genre']
77
- lookup_cols = [col for col in lookup_cols if col in ARTIST_DATA.columns]
78
- if lookup_cols:
79
- # Fill NaN values to avoid issues during conversion
80
- ARTIST_MAP = ARTIST_DATA[lookup_cols].fillna('').to_dict(orient='index')
81
- logger.info(f"Built artist lookup map with {len(ARTIST_MAP)} entries.")
82
- else:
83
- logger.error("Could not build artist map, required columns missing after check.")
84
- ARTIST_MAP = {}
85
  else:
86
- logger.error("Critical: 'artist_name' column not found in artist CSV.")
87
- ARTIST_MAP = {}
88
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  return True
90
-
91
  except Exception as e:
92
  logger.error(f"Error loading artist data: {e}", exc_info=True)
93
- ARTIST_DATA = None # Ensure it's None on failure
94
- ARTIST_MAP = {}
95
  return False
96
 
97
- def get_bulk_artist_info(artist_names: list) -> dict:
98
  """
99
- Gets information for a list of artist names from the loaded data.
100
-
101
- Args:
102
- artist_names: A list of artist names (strings).
103
-
104
- Returns:
105
- A dictionary where keys are the original artist names and values are
106
- dictionaries containing artist info (e.g., {'artist_img': ..., 'country': ...}).
107
- Returns info for found artists only.
108
  """
109
- if not ARTIST_MAP:
110
- logger.warning("Artist map is not loaded or empty. Cannot fetch artist info.")
111
- return {}
112
-
113
- results = {}
114
  default_img = "https://media.istockphoto.com/id/1298261537/vector/blank-man-profile-head-icon-placeholder.jpg?s=612x612&w=0&k=20&c=CeT1RVWZzQDay4t54ookMaFsdi7ZHVFg2Y5v7hxigCA="
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
 
 
 
 
 
 
 
116
  for name in artist_names:
117
- if not name or not isinstance(name, str):
118
- continue
119
-
120
- # --- Adjust keys to match the actual CSV column names ---
121
- artist_info_raw = ARTIST_MAP.get(name.lower())
122
- if artist_info_raw:
123
- results[name] = {
124
- 'artist_img': artist_info_raw.get('artist_img') or default_img,
125
- 'country': artist_info_raw.get('country') or "Unknown",
126
- 'artist_genre': artist_info_raw.get('artist_genre') or "Unknown"
127
- }
128
- else:
129
- # Artist not found in CSV, provide default structure
130
- results[name] = {
131
- 'artist_img': default_img,
132
- 'country': "Unknown",
133
- 'artist_genre': "Unknown"
134
- }
135
- # --- ---
136
-
137
  return results
138
 
139
- # Load data when the module is imported
140
- load_artist_data()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
  import pandas as pd
3
  import os
4
+ import re
5
 
6
+ # Configure logging
7
  logger = logging.getLogger(__name__)
8
 
9
+ # Global artist data
10
  ARTIST_DATA = None
11
+ # Artist map for faster lookups {artist_name_lower: {info}}
12
+ ARTIST_MAP = {}
13
 
14
  def load_artist_data():
15
+ """Load artist data from CSV file and build lookup maps"""
16
  global ARTIST_DATA, ARTIST_MAP
17
+
 
 
18
  try:
19
  # Get the directory of the current script
20
  current_dir = os.path.dirname(os.path.abspath(__file__))
21
  datasets_path = os.path.join(current_dir, 'datasets')
22
+
23
+ # Load artist dataset
24
+ artist_path = os.path.join(datasets_path, 'Global Music Artists.csv')
25
+ if not os.path.exists(artist_path):
26
  # Try alternative filename
27
+ artist_path = os.path.join(datasets_path, 'Global_Music_Artists.csv')
28
+ if not os.path.exists(artist_path):
29
+ logger.error(f"Artist dataset not found in datasets directory")
30
+ return False
 
 
 
 
 
31
 
32
+ # Load with appropriate encoding
33
  try:
34
+ ARTIST_DATA = pd.read_csv(artist_path, on_bad_lines='skip', engine='python', encoding='utf-8')
35
  except UnicodeDecodeError:
36
+ logger.warning("UTF-8 decoding failed, trying latin1 for artist CSV")
37
+ ARTIST_DATA = pd.read_csv(artist_path, on_bad_lines='skip', engine='python', encoding='latin1')
38
+
39
+ logger.info(f"Loaded artist dataset with {len(ARTIST_DATA)} entries")
40
+ logger.info(f"Artist columns: {ARTIST_DATA.columns.tolist()}")
41
+
42
+ # Build artist lookup map
43
+ # Handle both column naming conventions (Original and Fixed)
44
+ artist_name_col = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  if 'artist_name' in ARTIST_DATA.columns:
46
+ artist_name_col = 'artist_name'
47
+ elif 'Artist' in ARTIST_DATA.columns:
48
+ artist_name_col = 'Artist'
 
 
 
 
 
 
 
 
 
 
49
  else:
50
+ logger.error(f"Critical: Artist name column not found in artist CSV. Found: {ARTIST_DATA.columns.tolist()}")
51
+ return False
52
+
53
+ # Map columns to expected names (based on CSV format)
54
+ column_mapping = {}
55
+ # Default to the CVS column names we've found
56
+ if 'artist_name' in ARTIST_DATA.columns:
57
+ column_mapping = {
58
+ 'name': 'artist_name',
59
+ 'image': 'artist_img',
60
+ 'country': 'country',
61
+ 'genre': 'artist_genre'
62
+ }
63
+ # Use the alternative mapping if needed
64
+ elif 'Artist' in ARTIST_DATA.columns:
65
+ column_mapping = {
66
+ 'name': 'Artist',
67
+ 'image': 'Artist Img' if 'Artist Img' in ARTIST_DATA.columns else 'artist_img',
68
+ 'country': 'Country' if 'Country' in ARTIST_DATA.columns else 'country',
69
+ 'genre': 'Artist Genre' if 'Artist Genre' in ARTIST_DATA.columns else 'artist_genre'
70
+ }
71
+
72
+ # Create lookup map with lowercase artist names as keys
73
+ ARTIST_DATA['artist_lower'] = ARTIST_DATA[artist_name_col].str.lower()
74
+
75
+ # Build the map for faster lookups
76
+ for _, row in ARTIST_DATA.iterrows():
77
+ artist_lower = row['artist_lower']
78
+ ARTIST_MAP[artist_lower] = {
79
+ 'artist_name': row[column_mapping['name']],
80
+ 'artist_img': row[column_mapping['image']] if column_mapping['image'] in row else '',
81
+ 'country': row[column_mapping['country']] if column_mapping['country'] in row else 'Unknown',
82
+ 'artist_genre': row[column_mapping['genre']] if column_mapping['genre'] in row else 'Unknown'
83
+ }
84
+
85
+ logger.info(f"Built artist lookup map with {len(ARTIST_MAP)} entries")
86
  return True
87
+
88
  except Exception as e:
89
  logger.error(f"Error loading artist data: {e}", exc_info=True)
 
 
90
  return False
91
 
92
+ def get_artist_info(artist_name):
93
  """
94
+ Get artist information from the artist map
95
+ Returns information in the format expected by Django views
 
 
 
 
 
 
 
96
  """
 
 
 
 
 
97
  default_img = "https://media.istockphoto.com/id/1298261537/vector/blank-man-profile-head-icon-placeholder.jpg?s=612x612&w=0&k=20&c=CeT1RVWZzQDay4t54ookMaFsdi7ZHVFg2Y5v7hxigCA="
98
+
99
+ if not artist_name or not ARTIST_MAP:
100
+ return {
101
+ 'artist': artist_name or "Unknown Artist",
102
+ 'artist_img': default_img,
103
+ 'country': 'Unknown',
104
+ 'artist_genre': 'Unknown'
105
+ }
106
+
107
+ artist_lower = artist_name.lower()
108
+ artist_info = ARTIST_MAP.get(artist_lower)
109
+
110
+ if artist_info:
111
+ return {
112
+ 'artist': artist_name,
113
+ 'artist_img': artist_info.get('artist_img') or default_img,
114
+ 'country': artist_info.get('country', 'Unknown'),
115
+ 'artist_genre': artist_info.get('artist_genre', 'Unknown')
116
+ }
117
+ else:
118
+ # Try fuzzy matching
119
+ best_match = None
120
+ best_score = 0
121
+
122
+ for key in ARTIST_MAP.keys():
123
+ # Simple substring matching
124
+ if artist_lower in key or key in artist_lower:
125
+ score = len(key) / max(len(key), len(artist_lower))
126
+ if score > best_score:
127
+ best_score = score
128
+ best_match = key
129
+
130
+ if best_match and best_score > 0.5: # Threshold for accepting a match
131
+ artist_info = ARTIST_MAP.get(best_match)
132
+ return {
133
+ 'artist': artist_name,
134
+ 'artist_img': artist_info.get('artist_img') or default_img,
135
+ 'country': artist_info.get('country', 'Unknown'),
136
+ 'artist_genre': artist_info.get('artist_genre', 'Unknown')
137
+ }
138
+
139
+ # Default values if no match
140
+ return {
141
+ 'artist': artist_name,
142
+ 'artist_img': default_img,
143
+ 'country': 'Unknown',
144
+ 'artist_genre': 'Unknown'
145
+ }
146
 
147
+ def get_bulk_artist_info(artist_names):
148
+ """
149
+ Get information for multiple artists at once
150
+ Returns a dictionary mapping artist names to their information
151
+ """
152
+ results = {}
153
+
154
  for name in artist_names:
155
+ results[name] = get_artist_info(name)
156
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  return results
158
 
159
+ def normalize_artist_name(name):
160
+ """Normalize artist name for better matching"""
161
+ if not name:
162
+ return ""
163
+
164
+ # Convert to lowercase
165
+ name = name.lower()
166
+
167
+ # Remove common prefixes
168
+ prefixes = ["the ", "dj ", "mc "]
169
+ for prefix in prefixes:
170
+ if name.startswith(prefix):
171
+ name = name[len(prefix):]
172
+
173
+ # Remove special characters
174
+ name = re.sub(r'[^\w\s]', '', name)
175
+
176
+ # Remove extra spaces
177
+ name = re.sub(r'\s+', ' ', name).strip()
178
+
179
+ return name