Spaces:

MonilM
/

SongPorter

Running

App Files Files Community

MonilM commited on Apr 22

Commit

b5976f7

1 Parent(s): 2478f41

HF Spaces FIx#7

Browse files

Files changed (2) hide show

app.py +45 -35
artist_utils.py +148 -109

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import logging
-from fastapi import FastAPI, HTTPException, Request
 from pydantic import BaseModel, Field
 from typing import List, Dict, Optional
 import uvicorn
@@ -10,7 +10,7 @@ import time
 # Import recommender and artist utils
 from recommendation import MusicRecommender, get_hardcoded_recommendations
-from artist_utils import get_bulk_artist_info, load_artist_data
 # Configure logging
 logging.basicConfig(
@@ -157,50 +157,60 @@ async def get_recommendations_endpoint(request_data: RecommendationRequestData):
 @app.post("/artist-info/")
-# --- FIX: Update endpoint signature and logic ---
 async def get_artist_info_endpoint(request_data: ArtistInfoRequestData):
     """
-    Endpoint to get information for a single artist (adapted for backend).
-    IMPORTANT: Returns the info in the format expected by the API schema.
     """
     try:
-        artist_name = request_data.artist_name # Get the single name
         logger.info(f"Received artist info request for artist: {artist_name}")
         if not artist_name:
-             raise HTTPException(status_code=400, detail="artist_name field cannot be empty")
-        # Call the bulk function with a list containing the single name
-        artist_info_map = get_bulk_artist_info([artist_name])
-        # Get the artist info for this specific artist
-        artist_info = artist_info_map.get(artist_name, {})
-        # Format the response according to the API schema expectations - wrap values in a dictionary
-        # The response validation is expecting dictionary values, not string values
-        if artist_info:
-            # Format the response to match what the API schema expects
-            response = {
-                'artist': {'name': artist_name},
-                'artist_img': {'url': artist_info.get('artist_img', '')},
-                'country': {'name': artist_info.get('country', 'Unknown')},
-                'artist_genre': {'name': artist_info.get('artist_genre', 'Unknown')}
             }
-            logger.info(f"Returning info for artist: {artist_name}")
-            return response
-        else:
-            # Return default values in the expected dictionary format
-            default_img = "https://media.istockphoto.com/id/1298261537/vector/blank-man-profile-head-icon-placeholder.jpg?s=612x612&w=0&k=20&c=CeT1RVWZzQDay4t54ookMaFsdi7ZHVFg2Y5v7hxigCA="
-            default_response = {
-                'artist': {'name': artist_name},
-                'artist_img': {'url': default_img},
-                'country': {'name': 'Unknown'},
-                'artist_genre': {'name': 'Unknown'}
-            }
-            logger.warning(f"No info found for artist: {artist_name}, returning default values")
-            return default_response
     except Exception as e:
-        logger.error(f"Error fetching artist info: {e}", exc_info=True)
         raise HTTPException(status_code=500, detail=f"Failed to fetch artist info: {str(e)}")
 @app.get("/")

 import logging
+from fastapi import FastAPI, HTTPException, Request, Body
 from pydantic import BaseModel, Field
 from typing import List, Dict, Optional
 import uvicorn
 # Import recommender and artist utils
 from recommendation import MusicRecommender, get_hardcoded_recommendations
+from artist_utils import get_bulk_artist_info, load_artist_data, get_artist_info
 # Configure logging
 logging.basicConfig(
 @app.post("/artist-info/")
 async def get_artist_info_endpoint(request_data: ArtistInfoRequestData):
     """
+    Endpoint to get information for a single artist in the format expected by Django views.
+    Returns data with values directly in braces, not as key-value pairs.
     """
     try:
+        artist_name = request_data.artist_name
         logger.info(f"Received artist info request for artist: {artist_name}")
         if not artist_name:
+            raise HTTPException(status_code=400, detail="artist_name field cannot be empty")
+        # Get artist info from CSV data
+        artist_info = get_artist_info(artist_name)
+        logger.info(f"Returning info for artist: {artist_name}")
+        # Return in the exact format expected by Django's ArtistSerializer
+        return {
+            'artist': {artist_info['artist']},
+            'artist_img': {artist_info['artist_img']},
+            'country': {artist_info['country']},
+            'artist_genre': {artist_info['artist_genre']}
+        }
+    except Exception as e:
+        logger.error(f"Error fetching artist info: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Failed to fetch artist info: {str(e)}")
+@app.post("/artists/")
+async def get_multiple_artist_info(artist_names: List[str] = Body(...)):
+    """
+    Get information for multiple artists in one request.
+    Format matches the single artist endpoint with values directly in braces, not as key-value pairs.
+    """
+    try:
+        logger.info(f"Received request for multiple artists: {artist_names}")
+        results = {}
+        for name in artist_names:
+            info = get_artist_info(name)
+            # Format response in the expected format with values directly in braces
+            results[name] = {
+                'artist': {info['artist']},
+                'artist_img': {info['artist_img']},
+                'country': {info['country']},
+                'artist_genre': {info['artist_genre']}
             }
+        return JSONResponse(content=results)
     except Exception as e:
+        logger.error(f"Error fetching multiple artist info: {e}", exc_info=True)
         raise HTTPException(status_code=500, detail=f"Failed to fetch artist info: {str(e)}")
 @app.get("/")

artist_utils.py CHANGED Viewed

@@ -1,140 +1,179 @@
 import pandas as pd
 import os
-import logging
 logger = logging.getLogger(__name__)
 ARTIST_DATA = None
-ARTIST_MAP = {} # Cache for faster lookups {artist_name_lower: {info}}
 def load_artist_data():
-    """Loads the artist dataset from the CSV file."""
     global ARTIST_DATA, ARTIST_MAP
-    if ARTIST_DATA is not None:
-        return True # Already loaded
     try:
         # Get the directory of the current script
         current_dir = os.path.dirname(os.path.abspath(__file__))
         datasets_path = os.path.join(current_dir, 'datasets')
-        # --- IMPORTANT: Check for both filename variants ---
-        csv_path = os.path.join(datasets_path, 'Global Music Artists.csv')
-        if not os.path.exists(csv_path):
             # Try alternative filename
-            csv_path = os.path.join(datasets_path, 'Global_Music_Artists.csv')
-            if not os.path.exists(csv_path):
-                logger.error(f"Artist CSV file not found at: {csv_path}")
-                raise FileNotFoundError(f"Artist CSV file not found in datasets directory")
-        # --- ---
-        if not os.path.exists(csv_path):
-             logger.error(f"Artist CSV file not found at: {csv_path}")
-             raise FileNotFoundError(f"Artist CSV file not found at: {csv_path}")
-        # Specify encoding if necessary, common ones are 'utf-8' or 'latin1'
         try:
-            ARTIST_DATA = pd.read_csv(csv_path, on_bad_lines='skip', engine='python', encoding='utf-8')
         except UnicodeDecodeError:
-             logger.warning("UTF-8 decoding failed, trying latin1 for artist CSV.")
-             ARTIST_DATA = pd.read_csv(csv_path, on_bad_lines='skip', engine='python', encoding='latin1')
-        logger.info(f"Loaded artist dataset: {csv_path} with {len(ARTIST_DATA)} rows.")
-        logger.info(f"Artist data columns: {ARTIST_DATA.columns.tolist()}")
-        # --- Adjust column mapping based on actual CSV columns ---
-        # From error log, we found: ['artist_name', 'artist_genre', 'artist_img', 'artist_id', 'country', 'Unnamed: 5']
-        # Map them to the expected column names
-        column_mapping = {
-            'Artist': 'artist_name',
-            'Artist Img': 'artist_img',
-            'Country': 'country',
-            'Artist Genre': 'artist_genre'
-        }
-        # Check which of our expected columns are actually present in the CSV
-        actual_columns = ARTIST_DATA.columns.tolist()
-        # Create a mapping from expected column names to actual ones
-        adjusted_mapping = {expected: actual for expected, actual in column_mapping.items()
-                           if actual in actual_columns}
-        # Log the mapping being used
-        logger.info(f"Using column mapping: {adjusted_mapping}")
-        # Modified column check
-        if 'artist_name' not in actual_columns:
-            logger.error(f"Critical: 'artist_name' column not found in artist CSV.")
-            return False
-        # Pre-process and build the lookup map (lowercase artist names for case-insensitive matching)
-        # Ensure 'artist_name' column exists (which replaces 'Artist')
         if 'artist_name' in ARTIST_DATA.columns:
-            ARTIST_DATA['artist_lower'] = ARTIST_DATA['artist_name'].str.lower()
-            ARTIST_DATA.set_index('artist_lower', inplace=True)
-            # Convert relevant parts to a dictionary for faster lookup
-            # Select only the columns we intend to return
-            lookup_cols = ['artist_img', 'country', 'artist_genre']
-            lookup_cols = [col for col in lookup_cols if col in ARTIST_DATA.columns]
-            if lookup_cols:
-                 # Fill NaN values to avoid issues during conversion
-                 ARTIST_MAP = ARTIST_DATA[lookup_cols].fillna('').to_dict(orient='index')
-                 logger.info(f"Built artist lookup map with {len(ARTIST_MAP)} entries.")
-            else:
-                 logger.error("Could not build artist map, required columns missing after check.")
-                 ARTIST_MAP = {}
         else:
-             logger.error("Critical: 'artist_name' column not found in artist CSV.")
-             ARTIST_MAP = {}
         return True
     except Exception as e:
         logger.error(f"Error loading artist data: {e}", exc_info=True)
-        ARTIST_DATA = None # Ensure it's None on failure
-        ARTIST_MAP = {}
         return False
-def get_bulk_artist_info(artist_names: list) -> dict:
     """
-    Gets information for a list of artist names from the loaded data.
-    Args:
-        artist_names: A list of artist names (strings).
-    Returns:
-        A dictionary where keys are the original artist names and values are
-        dictionaries containing artist info (e.g., {'artist_img': ..., 'country': ...}).
-        Returns info for found artists only.
     """
-    if not ARTIST_MAP:
-        logger.warning("Artist map is not loaded or empty. Cannot fetch artist info.")
-        return {}
-    results = {}
     default_img = "https://media.istockphoto.com/id/1298261537/vector/blank-man-profile-head-icon-placeholder.jpg?s=612x612&w=0&k=20&c=CeT1RVWZzQDay4t54ookMaFsdi7ZHVFg2Y5v7hxigCA="
     for name in artist_names:
-        if not name or not isinstance(name, str):
-            continue
-        # --- Adjust keys to match the actual CSV column names ---
-        artist_info_raw = ARTIST_MAP.get(name.lower())
-        if artist_info_raw:
-             results[name] = {
-                 'artist_img': artist_info_raw.get('artist_img') or default_img,
-                 'country': artist_info_raw.get('country') or "Unknown",
-                 'artist_genre': artist_info_raw.get('artist_genre') or "Unknown"
-             }
-        else:
-             # Artist not found in CSV, provide default structure
-             results[name] = {
-                 'artist_img': default_img,
-                 'country': "Unknown",
-                 'artist_genre': "Unknown"
-             }
-         # --- ---
     return results
-# Load data when the module is imported
-load_artist_data()

+import logging
 import pandas as pd
 import os
+import re
+# Configure logging
 logger = logging.getLogger(__name__)
+# Global artist data
 ARTIST_DATA = None
+# Artist map for faster lookups {artist_name_lower: {info}}
+ARTIST_MAP = {}
 def load_artist_data():
+    """Load artist data from CSV file and build lookup maps"""
     global ARTIST_DATA, ARTIST_MAP
     try:
         # Get the directory of the current script
         current_dir = os.path.dirname(os.path.abspath(__file__))
         datasets_path = os.path.join(current_dir, 'datasets')
+        # Load artist dataset
+        artist_path = os.path.join(datasets_path, 'Global Music Artists.csv')
+        if not os.path.exists(artist_path):
             # Try alternative filename
+            artist_path = os.path.join(datasets_path, 'Global_Music_Artists.csv')
+            if not os.path.exists(artist_path):
+                logger.error(f"Artist dataset not found in datasets directory")
+                return False
+        # Load with appropriate encoding
         try:
+            ARTIST_DATA = pd.read_csv(artist_path, on_bad_lines='skip', engine='python', encoding='utf-8')
         except UnicodeDecodeError:
+            logger.warning("UTF-8 decoding failed, trying latin1 for artist CSV")
+            ARTIST_DATA = pd.read_csv(artist_path, on_bad_lines='skip', engine='python', encoding='latin1')
+        logger.info(f"Loaded artist dataset with {len(ARTIST_DATA)} entries")
+        logger.info(f"Artist columns: {ARTIST_DATA.columns.tolist()}")
+        # Build artist lookup map
+        # Handle both column naming conventions (Original and Fixed)
+        artist_name_col = None
         if 'artist_name' in ARTIST_DATA.columns:
+            artist_name_col = 'artist_name'
+        elif 'Artist' in ARTIST_DATA.columns:
+            artist_name_col = 'Artist'
         else:
+            logger.error(f"Critical: Artist name column not found in artist CSV. Found: {ARTIST_DATA.columns.tolist()}")
+            return False
+        # Map columns to expected names (based on CSV format)
+        column_mapping = {}
+        # Default to the CVS column names we've found
+        if 'artist_name' in ARTIST_DATA.columns:
+            column_mapping = {
+                'name': 'artist_name',
+                'image': 'artist_img',
+                'country': 'country',
+                'genre': 'artist_genre'
+            }
+        # Use the alternative mapping if needed
+        elif 'Artist' in ARTIST_DATA.columns:
+            column_mapping = {
+                'name': 'Artist',
+                'image': 'Artist Img' if 'Artist Img' in ARTIST_DATA.columns else 'artist_img',
+                'country': 'Country' if 'Country' in ARTIST_DATA.columns else 'country',
+                'genre': 'Artist Genre' if 'Artist Genre' in ARTIST_DATA.columns else 'artist_genre'
+            }
+        # Create lookup map with lowercase artist names as keys
+        ARTIST_DATA['artist_lower'] = ARTIST_DATA[artist_name_col].str.lower()
+        # Build the map for faster lookups
+        for _, row in ARTIST_DATA.iterrows():
+            artist_lower = row['artist_lower']
+            ARTIST_MAP[artist_lower] = {
+                'artist_name': row[column_mapping['name']],
+                'artist_img': row[column_mapping['image']] if column_mapping['image'] in row else '',
+                'country': row[column_mapping['country']] if column_mapping['country'] in row else 'Unknown',
+                'artist_genre': row[column_mapping['genre']] if column_mapping['genre'] in row else 'Unknown'
+            }
+        logger.info(f"Built artist lookup map with {len(ARTIST_MAP)} entries")
         return True
     except Exception as e:
         logger.error(f"Error loading artist data: {e}", exc_info=True)
         return False
+def get_artist_info(artist_name):
     """
+    Get artist information from the artist map
+    Returns information in the format expected by Django views
     """
     default_img = "https://media.istockphoto.com/id/1298261537/vector/blank-man-profile-head-icon-placeholder.jpg?s=612x612&w=0&k=20&c=CeT1RVWZzQDay4t54ookMaFsdi7ZHVFg2Y5v7hxigCA="
+    if not artist_name or not ARTIST_MAP:
+        return {
+            'artist': artist_name or "Unknown Artist",
+            'artist_img': default_img,
+            'country': 'Unknown',
+            'artist_genre': 'Unknown'
+        }
+    artist_lower = artist_name.lower()
+    artist_info = ARTIST_MAP.get(artist_lower)
+    if artist_info:
+        return {
+            'artist': artist_name,
+            'artist_img': artist_info.get('artist_img') or default_img,
+            'country': artist_info.get('country', 'Unknown'),
+            'artist_genre': artist_info.get('artist_genre', 'Unknown')
+        }
+    else:
+        # Try fuzzy matching
+        best_match = None
+        best_score = 0
+        for key in ARTIST_MAP.keys():
+            # Simple substring matching
+            if artist_lower in key or key in artist_lower:
+                score = len(key) / max(len(key), len(artist_lower))
+                if score > best_score:
+                    best_score = score
+                    best_match = key
+        if best_match and best_score > 0.5:  # Threshold for accepting a match
+            artist_info = ARTIST_MAP.get(best_match)
+            return {
+                'artist': artist_name,
+                'artist_img': artist_info.get('artist_img') or default_img,
+                'country': artist_info.get('country', 'Unknown'),
+                'artist_genre': artist_info.get('artist_genre', 'Unknown')
+            }
+        # Default values if no match
+        return {
+            'artist': artist_name,
+            'artist_img': default_img,
+            'country': 'Unknown',
+            'artist_genre': 'Unknown'
+        }
+def get_bulk_artist_info(artist_names):
+    """
+    Get information for multiple artists at once
+    Returns a dictionary mapping artist names to their information
+    """
+    results = {}
     for name in artist_names:
+        results[name] = get_artist_info(name)
     return results
+def normalize_artist_name(name):
+    """Normalize artist name for better matching"""
+    if not name:
+        return ""
+    # Convert to lowercase
+    name = name.lower()
+    # Remove common prefixes
+    prefixes = ["the ", "dj ", "mc "]
+    for prefix in prefixes:
+        if name.startswith(prefix):
+            name = name[len(prefix):]
+    # Remove special characters
+    name = re.sub(r'[^\w\s]', '', name)
+    # Remove extra spaces
+    name = re.sub(r'\s+', ' ', name).strip()
+    return name