Spaces:
Running
Running
HF Spaces FIx#7
Browse files- app.py +45 -35
- artist_utils.py +148 -109
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import logging
|
2 |
-
from fastapi import FastAPI, HTTPException, Request
|
3 |
from pydantic import BaseModel, Field
|
4 |
from typing import List, Dict, Optional
|
5 |
import uvicorn
|
@@ -10,7 +10,7 @@ import time
|
|
10 |
|
11 |
# Import recommender and artist utils
|
12 |
from recommendation import MusicRecommender, get_hardcoded_recommendations
|
13 |
-
from artist_utils import get_bulk_artist_info, load_artist_data
|
14 |
|
15 |
# Configure logging
|
16 |
logging.basicConfig(
|
@@ -157,50 +157,60 @@ async def get_recommendations_endpoint(request_data: RecommendationRequestData):
|
|
157 |
|
158 |
|
159 |
@app.post("/artist-info/")
|
160 |
-
# --- FIX: Update endpoint signature and logic ---
|
161 |
async def get_artist_info_endpoint(request_data: ArtistInfoRequestData):
|
162 |
"""
|
163 |
-
Endpoint to get information for a single artist
|
164 |
-
|
165 |
"""
|
166 |
try:
|
167 |
-
artist_name = request_data.artist_name
|
168 |
logger.info(f"Received artist info request for artist: {artist_name}")
|
|
|
169 |
if not artist_name:
|
170 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
|
172 |
-
|
173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
|
175 |
-
|
176 |
-
artist_info = artist_info_map.get(artist_name, {})
|
177 |
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
# Format the
|
182 |
-
|
183 |
-
'artist': {'
|
184 |
-
'artist_img': {'
|
185 |
-
'country': {'
|
186 |
-
'artist_genre': {'
|
187 |
}
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
# Return default values in the expected dictionary format
|
192 |
-
default_img = "https://media.istockphoto.com/id/1298261537/vector/blank-man-profile-head-icon-placeholder.jpg?s=612x612&w=0&k=20&c=CeT1RVWZzQDay4t54ookMaFsdi7ZHVFg2Y5v7hxigCA="
|
193 |
-
default_response = {
|
194 |
-
'artist': {'name': artist_name},
|
195 |
-
'artist_img': {'url': default_img},
|
196 |
-
'country': {'name': 'Unknown'},
|
197 |
-
'artist_genre': {'name': 'Unknown'}
|
198 |
-
}
|
199 |
-
logger.warning(f"No info found for artist: {artist_name}, returning default values")
|
200 |
-
return default_response
|
201 |
-
|
202 |
except Exception as e:
|
203 |
-
logger.error(f"Error fetching artist info: {e}", exc_info=True)
|
204 |
raise HTTPException(status_code=500, detail=f"Failed to fetch artist info: {str(e)}")
|
205 |
|
206 |
@app.get("/")
|
|
|
1 |
import logging
|
2 |
+
from fastapi import FastAPI, HTTPException, Request, Body
|
3 |
from pydantic import BaseModel, Field
|
4 |
from typing import List, Dict, Optional
|
5 |
import uvicorn
|
|
|
10 |
|
11 |
# Import recommender and artist utils
|
12 |
from recommendation import MusicRecommender, get_hardcoded_recommendations
|
13 |
+
from artist_utils import get_bulk_artist_info, load_artist_data, get_artist_info
|
14 |
|
15 |
# Configure logging
|
16 |
logging.basicConfig(
|
|
|
157 |
|
158 |
|
159 |
@app.post("/artist-info/")
|
|
|
160 |
async def get_artist_info_endpoint(request_data: ArtistInfoRequestData):
|
161 |
"""
|
162 |
+
Endpoint to get information for a single artist in the format expected by Django views.
|
163 |
+
Returns data with values directly in braces, not as key-value pairs.
|
164 |
"""
|
165 |
try:
|
166 |
+
artist_name = request_data.artist_name
|
167 |
logger.info(f"Received artist info request for artist: {artist_name}")
|
168 |
+
|
169 |
if not artist_name:
|
170 |
+
raise HTTPException(status_code=400, detail="artist_name field cannot be empty")
|
171 |
+
|
172 |
+
# Get artist info from CSV data
|
173 |
+
artist_info = get_artist_info(artist_name)
|
174 |
+
logger.info(f"Returning info for artist: {artist_name}")
|
175 |
+
|
176 |
+
# Return in the exact format expected by Django's ArtistSerializer
|
177 |
+
return {
|
178 |
+
'artist': {artist_info['artist']},
|
179 |
+
'artist_img': {artist_info['artist_img']},
|
180 |
+
'country': {artist_info['country']},
|
181 |
+
'artist_genre': {artist_info['artist_genre']}
|
182 |
+
}
|
183 |
+
|
184 |
+
except Exception as e:
|
185 |
+
logger.error(f"Error fetching artist info: {e}", exc_info=True)
|
186 |
+
raise HTTPException(status_code=500, detail=f"Failed to fetch artist info: {str(e)}")
|
187 |
|
188 |
+
@app.post("/artists/")
|
189 |
+
async def get_multiple_artist_info(artist_names: List[str] = Body(...)):
|
190 |
+
"""
|
191 |
+
Get information for multiple artists in one request.
|
192 |
+
Format matches the single artist endpoint with values directly in braces, not as key-value pairs.
|
193 |
+
"""
|
194 |
+
try:
|
195 |
+
logger.info(f"Received request for multiple artists: {artist_names}")
|
196 |
|
197 |
+
results = {}
|
|
|
198 |
|
199 |
+
for name in artist_names:
|
200 |
+
info = get_artist_info(name)
|
201 |
+
|
202 |
+
# Format response in the expected format with values directly in braces
|
203 |
+
results[name] = {
|
204 |
+
'artist': {info['artist']},
|
205 |
+
'artist_img': {info['artist_img']},
|
206 |
+
'country': {info['country']},
|
207 |
+
'artist_genre': {info['artist_genre']}
|
208 |
}
|
209 |
+
|
210 |
+
return JSONResponse(content=results)
|
211 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
except Exception as e:
|
213 |
+
logger.error(f"Error fetching multiple artist info: {e}", exc_info=True)
|
214 |
raise HTTPException(status_code=500, detail=f"Failed to fetch artist info: {str(e)}")
|
215 |
|
216 |
@app.get("/")
|
artist_utils.py
CHANGED
@@ -1,140 +1,179 @@
|
|
|
|
1 |
import pandas as pd
|
2 |
import os
|
3 |
-
import
|
4 |
|
|
|
5 |
logger = logging.getLogger(__name__)
|
6 |
|
|
|
7 |
ARTIST_DATA = None
|
8 |
-
|
|
|
9 |
|
10 |
def load_artist_data():
|
11 |
-
"""
|
12 |
global ARTIST_DATA, ARTIST_MAP
|
13 |
-
|
14 |
-
return True # Already loaded
|
15 |
-
|
16 |
try:
|
17 |
# Get the directory of the current script
|
18 |
current_dir = os.path.dirname(os.path.abspath(__file__))
|
19 |
datasets_path = os.path.join(current_dir, 'datasets')
|
20 |
-
|
21 |
-
|
22 |
-
|
|
|
23 |
# Try alternative filename
|
24 |
-
|
25 |
-
if not os.path.exists(
|
26 |
-
logger.error(f"Artist
|
27 |
-
|
28 |
-
# --- ---
|
29 |
-
|
30 |
-
if not os.path.exists(csv_path):
|
31 |
-
logger.error(f"Artist CSV file not found at: {csv_path}")
|
32 |
-
raise FileNotFoundError(f"Artist CSV file not found at: {csv_path}")
|
33 |
|
34 |
-
#
|
35 |
try:
|
36 |
-
ARTIST_DATA = pd.read_csv(
|
37 |
except UnicodeDecodeError:
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
logger.info(f"Loaded artist dataset
|
42 |
-
logger.info(f"Artist
|
43 |
-
|
44 |
-
#
|
45 |
-
#
|
46 |
-
|
47 |
-
column_mapping = {
|
48 |
-
'Artist': 'artist_name',
|
49 |
-
'Artist Img': 'artist_img',
|
50 |
-
'Country': 'country',
|
51 |
-
'Artist Genre': 'artist_genre'
|
52 |
-
}
|
53 |
-
|
54 |
-
# Check which of our expected columns are actually present in the CSV
|
55 |
-
actual_columns = ARTIST_DATA.columns.tolist()
|
56 |
-
|
57 |
-
# Create a mapping from expected column names to actual ones
|
58 |
-
adjusted_mapping = {expected: actual for expected, actual in column_mapping.items()
|
59 |
-
if actual in actual_columns}
|
60 |
-
|
61 |
-
# Log the mapping being used
|
62 |
-
logger.info(f"Using column mapping: {adjusted_mapping}")
|
63 |
-
|
64 |
-
# Modified column check
|
65 |
-
if 'artist_name' not in actual_columns:
|
66 |
-
logger.error(f"Critical: 'artist_name' column not found in artist CSV.")
|
67 |
-
return False
|
68 |
-
|
69 |
-
# Pre-process and build the lookup map (lowercase artist names for case-insensitive matching)
|
70 |
-
# Ensure 'artist_name' column exists (which replaces 'Artist')
|
71 |
if 'artist_name' in ARTIST_DATA.columns:
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
# Select only the columns we intend to return
|
76 |
-
lookup_cols = ['artist_img', 'country', 'artist_genre']
|
77 |
-
lookup_cols = [col for col in lookup_cols if col in ARTIST_DATA.columns]
|
78 |
-
if lookup_cols:
|
79 |
-
# Fill NaN values to avoid issues during conversion
|
80 |
-
ARTIST_MAP = ARTIST_DATA[lookup_cols].fillna('').to_dict(orient='index')
|
81 |
-
logger.info(f"Built artist lookup map with {len(ARTIST_MAP)} entries.")
|
82 |
-
else:
|
83 |
-
logger.error("Could not build artist map, required columns missing after check.")
|
84 |
-
ARTIST_MAP = {}
|
85 |
else:
|
86 |
-
|
87 |
-
|
88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
return True
|
90 |
-
|
91 |
except Exception as e:
|
92 |
logger.error(f"Error loading artist data: {e}", exc_info=True)
|
93 |
-
ARTIST_DATA = None # Ensure it's None on failure
|
94 |
-
ARTIST_MAP = {}
|
95 |
return False
|
96 |
|
97 |
-
def
|
98 |
"""
|
99 |
-
|
100 |
-
|
101 |
-
Args:
|
102 |
-
artist_names: A list of artist names (strings).
|
103 |
-
|
104 |
-
Returns:
|
105 |
-
A dictionary where keys are the original artist names and values are
|
106 |
-
dictionaries containing artist info (e.g., {'artist_img': ..., 'country': ...}).
|
107 |
-
Returns info for found artists only.
|
108 |
"""
|
109 |
-
if not ARTIST_MAP:
|
110 |
-
logger.warning("Artist map is not loaded or empty. Cannot fetch artist info.")
|
111 |
-
return {}
|
112 |
-
|
113 |
-
results = {}
|
114 |
default_img = "https://media.istockphoto.com/id/1298261537/vector/blank-man-profile-head-icon-placeholder.jpg?s=612x612&w=0&k=20&c=CeT1RVWZzQDay4t54ookMaFsdi7ZHVFg2Y5v7hxigCA="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
for name in artist_names:
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
# --- Adjust keys to match the actual CSV column names ---
|
121 |
-
artist_info_raw = ARTIST_MAP.get(name.lower())
|
122 |
-
if artist_info_raw:
|
123 |
-
results[name] = {
|
124 |
-
'artist_img': artist_info_raw.get('artist_img') or default_img,
|
125 |
-
'country': artist_info_raw.get('country') or "Unknown",
|
126 |
-
'artist_genre': artist_info_raw.get('artist_genre') or "Unknown"
|
127 |
-
}
|
128 |
-
else:
|
129 |
-
# Artist not found in CSV, provide default structure
|
130 |
-
results[name] = {
|
131 |
-
'artist_img': default_img,
|
132 |
-
'country': "Unknown",
|
133 |
-
'artist_genre': "Unknown"
|
134 |
-
}
|
135 |
-
# --- ---
|
136 |
-
|
137 |
return results
|
138 |
|
139 |
-
|
140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
import pandas as pd
|
3 |
import os
|
4 |
+
import re
|
5 |
|
6 |
+
# Configure logging
|
7 |
logger = logging.getLogger(__name__)
|
8 |
|
9 |
+
# Global artist data
|
10 |
ARTIST_DATA = None
|
11 |
+
# Artist map for faster lookups {artist_name_lower: {info}}
|
12 |
+
ARTIST_MAP = {}
|
13 |
|
14 |
def load_artist_data():
|
15 |
+
"""Load artist data from CSV file and build lookup maps"""
|
16 |
global ARTIST_DATA, ARTIST_MAP
|
17 |
+
|
|
|
|
|
18 |
try:
|
19 |
# Get the directory of the current script
|
20 |
current_dir = os.path.dirname(os.path.abspath(__file__))
|
21 |
datasets_path = os.path.join(current_dir, 'datasets')
|
22 |
+
|
23 |
+
# Load artist dataset
|
24 |
+
artist_path = os.path.join(datasets_path, 'Global Music Artists.csv')
|
25 |
+
if not os.path.exists(artist_path):
|
26 |
# Try alternative filename
|
27 |
+
artist_path = os.path.join(datasets_path, 'Global_Music_Artists.csv')
|
28 |
+
if not os.path.exists(artist_path):
|
29 |
+
logger.error(f"Artist dataset not found in datasets directory")
|
30 |
+
return False
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
+
# Load with appropriate encoding
|
33 |
try:
|
34 |
+
ARTIST_DATA = pd.read_csv(artist_path, on_bad_lines='skip', engine='python', encoding='utf-8')
|
35 |
except UnicodeDecodeError:
|
36 |
+
logger.warning("UTF-8 decoding failed, trying latin1 for artist CSV")
|
37 |
+
ARTIST_DATA = pd.read_csv(artist_path, on_bad_lines='skip', engine='python', encoding='latin1')
|
38 |
+
|
39 |
+
logger.info(f"Loaded artist dataset with {len(ARTIST_DATA)} entries")
|
40 |
+
logger.info(f"Artist columns: {ARTIST_DATA.columns.tolist()}")
|
41 |
+
|
42 |
+
# Build artist lookup map
|
43 |
+
# Handle both column naming conventions (Original and Fixed)
|
44 |
+
artist_name_col = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
if 'artist_name' in ARTIST_DATA.columns:
|
46 |
+
artist_name_col = 'artist_name'
|
47 |
+
elif 'Artist' in ARTIST_DATA.columns:
|
48 |
+
artist_name_col = 'Artist'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
else:
|
50 |
+
logger.error(f"Critical: Artist name column not found in artist CSV. Found: {ARTIST_DATA.columns.tolist()}")
|
51 |
+
return False
|
52 |
+
|
53 |
+
# Map columns to expected names (based on CSV format)
|
54 |
+
column_mapping = {}
|
55 |
+
# Default to the CVS column names we've found
|
56 |
+
if 'artist_name' in ARTIST_DATA.columns:
|
57 |
+
column_mapping = {
|
58 |
+
'name': 'artist_name',
|
59 |
+
'image': 'artist_img',
|
60 |
+
'country': 'country',
|
61 |
+
'genre': 'artist_genre'
|
62 |
+
}
|
63 |
+
# Use the alternative mapping if needed
|
64 |
+
elif 'Artist' in ARTIST_DATA.columns:
|
65 |
+
column_mapping = {
|
66 |
+
'name': 'Artist',
|
67 |
+
'image': 'Artist Img' if 'Artist Img' in ARTIST_DATA.columns else 'artist_img',
|
68 |
+
'country': 'Country' if 'Country' in ARTIST_DATA.columns else 'country',
|
69 |
+
'genre': 'Artist Genre' if 'Artist Genre' in ARTIST_DATA.columns else 'artist_genre'
|
70 |
+
}
|
71 |
+
|
72 |
+
# Create lookup map with lowercase artist names as keys
|
73 |
+
ARTIST_DATA['artist_lower'] = ARTIST_DATA[artist_name_col].str.lower()
|
74 |
+
|
75 |
+
# Build the map for faster lookups
|
76 |
+
for _, row in ARTIST_DATA.iterrows():
|
77 |
+
artist_lower = row['artist_lower']
|
78 |
+
ARTIST_MAP[artist_lower] = {
|
79 |
+
'artist_name': row[column_mapping['name']],
|
80 |
+
'artist_img': row[column_mapping['image']] if column_mapping['image'] in row else '',
|
81 |
+
'country': row[column_mapping['country']] if column_mapping['country'] in row else 'Unknown',
|
82 |
+
'artist_genre': row[column_mapping['genre']] if column_mapping['genre'] in row else 'Unknown'
|
83 |
+
}
|
84 |
+
|
85 |
+
logger.info(f"Built artist lookup map with {len(ARTIST_MAP)} entries")
|
86 |
return True
|
87 |
+
|
88 |
except Exception as e:
|
89 |
logger.error(f"Error loading artist data: {e}", exc_info=True)
|
|
|
|
|
90 |
return False
|
91 |
|
92 |
+
def get_artist_info(artist_name):
|
93 |
"""
|
94 |
+
Get artist information from the artist map
|
95 |
+
Returns information in the format expected by Django views
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
"""
|
|
|
|
|
|
|
|
|
|
|
97 |
default_img = "https://media.istockphoto.com/id/1298261537/vector/blank-man-profile-head-icon-placeholder.jpg?s=612x612&w=0&k=20&c=CeT1RVWZzQDay4t54ookMaFsdi7ZHVFg2Y5v7hxigCA="
|
98 |
+
|
99 |
+
if not artist_name or not ARTIST_MAP:
|
100 |
+
return {
|
101 |
+
'artist': artist_name or "Unknown Artist",
|
102 |
+
'artist_img': default_img,
|
103 |
+
'country': 'Unknown',
|
104 |
+
'artist_genre': 'Unknown'
|
105 |
+
}
|
106 |
+
|
107 |
+
artist_lower = artist_name.lower()
|
108 |
+
artist_info = ARTIST_MAP.get(artist_lower)
|
109 |
+
|
110 |
+
if artist_info:
|
111 |
+
return {
|
112 |
+
'artist': artist_name,
|
113 |
+
'artist_img': artist_info.get('artist_img') or default_img,
|
114 |
+
'country': artist_info.get('country', 'Unknown'),
|
115 |
+
'artist_genre': artist_info.get('artist_genre', 'Unknown')
|
116 |
+
}
|
117 |
+
else:
|
118 |
+
# Try fuzzy matching
|
119 |
+
best_match = None
|
120 |
+
best_score = 0
|
121 |
+
|
122 |
+
for key in ARTIST_MAP.keys():
|
123 |
+
# Simple substring matching
|
124 |
+
if artist_lower in key or key in artist_lower:
|
125 |
+
score = len(key) / max(len(key), len(artist_lower))
|
126 |
+
if score > best_score:
|
127 |
+
best_score = score
|
128 |
+
best_match = key
|
129 |
+
|
130 |
+
if best_match and best_score > 0.5: # Threshold for accepting a match
|
131 |
+
artist_info = ARTIST_MAP.get(best_match)
|
132 |
+
return {
|
133 |
+
'artist': artist_name,
|
134 |
+
'artist_img': artist_info.get('artist_img') or default_img,
|
135 |
+
'country': artist_info.get('country', 'Unknown'),
|
136 |
+
'artist_genre': artist_info.get('artist_genre', 'Unknown')
|
137 |
+
}
|
138 |
+
|
139 |
+
# Default values if no match
|
140 |
+
return {
|
141 |
+
'artist': artist_name,
|
142 |
+
'artist_img': default_img,
|
143 |
+
'country': 'Unknown',
|
144 |
+
'artist_genre': 'Unknown'
|
145 |
+
}
|
146 |
|
147 |
+
def get_bulk_artist_info(artist_names):
|
148 |
+
"""
|
149 |
+
Get information for multiple artists at once
|
150 |
+
Returns a dictionary mapping artist names to their information
|
151 |
+
"""
|
152 |
+
results = {}
|
153 |
+
|
154 |
for name in artist_names:
|
155 |
+
results[name] = get_artist_info(name)
|
156 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
return results
|
158 |
|
159 |
+
def normalize_artist_name(name):
|
160 |
+
"""Normalize artist name for better matching"""
|
161 |
+
if not name:
|
162 |
+
return ""
|
163 |
+
|
164 |
+
# Convert to lowercase
|
165 |
+
name = name.lower()
|
166 |
+
|
167 |
+
# Remove common prefixes
|
168 |
+
prefixes = ["the ", "dj ", "mc "]
|
169 |
+
for prefix in prefixes:
|
170 |
+
if name.startswith(prefix):
|
171 |
+
name = name[len(prefix):]
|
172 |
+
|
173 |
+
# Remove special characters
|
174 |
+
name = re.sub(r'[^\w\s]', '', name)
|
175 |
+
|
176 |
+
# Remove extra spaces
|
177 |
+
name = re.sub(r'\s+', ' ', name).strip()
|
178 |
+
|
179 |
+
return name
|