Spaces:
Sleeping
Sleeping
| """ | |
| NOAA Climate Data Online (CDO) API Client | |
| Enhanced weather data from NOAA's historical databases | |
| """ | |
| import requests | |
| import logging | |
| from typing import List, Dict, Optional, Any | |
| from datetime import datetime, timedelta | |
| import json | |
| import time | |
| logger = logging.getLogger(__name__) | |
| class NOAACDOClient: | |
| """Client for NOAA Climate Data Online API""" | |
| def __init__(self, api_token: Optional[str] = None): | |
| """ | |
| Initialize NOAA CDO client | |
| Args: | |
| api_token: NOAA CDO API token (get from https://www.ncdc.noaa.gov/cdo-web/token) | |
| If None, will provide instructions to user | |
| """ | |
| self.base_url = "https://www.ncei.noaa.gov/cdo-web/api/v2" | |
| self.api_token = api_token | |
| self.session = requests.Session() | |
| if api_token: | |
| self.session.headers.update({ | |
| 'token': api_token, | |
| 'User-Agent': 'WeatherAppPro/1.0 (enhanced-weather-app)' | |
| }) | |
| else: | |
| logger.warning("No NOAA CDO API token provided. Historical data features will be limited.") | |
| def is_available(self) -> bool: | |
| """Check if CDO API is available with token""" | |
| return self.api_token is not None | |
| def get_datasets(self) -> List[Dict]: | |
| """Get available datasets""" | |
| if not self.is_available(): | |
| return [] | |
| try: | |
| url = f"{self.base_url}/datasets" | |
| response = self.session.get(url, timeout=10) | |
| response.raise_for_status() | |
| data = response.json() | |
| return data.get('results', []) | |
| except Exception as e: | |
| logger.error(f"Error getting datasets: {e}") | |
| return [] | |
| def get_stations_near(self, lat: float, lon: float, radius: float = 50) -> List[Dict]: | |
| """ | |
| Get weather stations near coordinates | |
| Args: | |
| lat: Latitude | |
| lon: Longitude | |
| radius: Search radius in kilometers (default 50km) | |
| """ | |
| if not self.is_available(): | |
| return [] | |
| try: | |
| # Convert radius to degrees (approximate) | |
| radius_deg = radius / 111.0 # 1 degree ≈ 111 km | |
| # Define bounding box | |
| north = lat + radius_deg | |
| south = lat - radius_deg | |
| east = lon + radius_deg | |
| west = lon - radius_deg | |
| url = f"{self.base_url}/stations" | |
| params = { | |
| 'extent': f'{north},{west},{south},{east}', | |
| 'limit': 20, | |
| 'offset': 0 | |
| } | |
| response = self.session.get(url, params=params, timeout=10) | |
| response.raise_for_status() | |
| data = response.json() | |
| return data.get('results', []) | |
| except Exception as e: | |
| logger.error(f"Error getting stations: {e}") | |
| return [] | |
| def get_historical_daily_data( | |
| self, | |
| station_id: str, | |
| start_date: str, | |
| end_date: str, | |
| data_types: List[str] = None | |
| ) -> List[Dict]: | |
| """ | |
| Get historical daily weather data | |
| Args: | |
| station_id: Station identifier | |
| start_date: Start date in YYYY-MM-DD format | |
| end_date: End date in YYYY-MM-DD format | |
| data_types: List of data types (TMAX, TMIN, PRCP, SNOW, etc.) | |
| """ | |
| if not self.is_available(): | |
| return [] | |
| if data_types is None: | |
| data_types = ['TMAX', 'TMIN', 'PRCP', 'SNOW'] | |
| try: | |
| url = f"{self.base_url}/data" | |
| params = { | |
| 'datasetid': 'GHCND', # Global Historical Climatology Network Daily | |
| 'stationid': station_id, | |
| 'startdate': start_date, | |
| 'enddate': end_date, | |
| 'datatypeid': ','.join(data_types), | |
| 'limit': 1000, | |
| 'units': 'standard' | |
| } | |
| response = self.session.get(url, params=params, timeout=15) | |
| response.raise_for_status() | |
| data = response.json() | |
| return data.get('results', []) | |
| except Exception as e: | |
| logger.error(f"Error getting historical data: {e}") | |
| return [] | |
| def get_climate_normals( | |
| self, | |
| lat: float, | |
| lon: float, | |
| period: str = "2010-2020" | |
| ) -> Dict[str, Any]: | |
| """ | |
| Get climate normals for a location | |
| Args: | |
| lat: Latitude | |
| lon: Longitude | |
| period: Climate normal period | |
| """ | |
| if not self.is_available(): | |
| return {} | |
| try: | |
| # Find nearby stations first | |
| stations = self.get_stations_near(lat, lon, radius=25) | |
| if not stations: | |
| return {} | |
| station_id = stations[0]['id'] | |
| # Get normals data | |
| url = f"{self.base_url}/data" | |
| params = { | |
| 'datasetid': 'NORMAL_DLY', # Daily Climate Normals | |
| 'stationid': station_id, | |
| 'startdate': '2010-01-01', | |
| 'enddate': '2010-12-31', | |
| 'datatypeid': 'DLY-TMAX-NORMAL,DLY-TMIN-NORMAL,DLY-PRCP-NORMAL', | |
| 'limit': 1000 | |
| } | |
| response = self.session.get(url, params=params, timeout=15) | |
| response.raise_for_status() | |
| data = response.json() | |
| return { | |
| 'station': stations[0], | |
| 'normals': data.get('results', []) | |
| } | |
| except Exception as e: | |
| logger.error(f"Error getting climate normals: {e}") | |
| return {} | |
| def get_monthly_summary( | |
| self, | |
| lat: float, | |
| lon: float, | |
| year: int, | |
| month: int | |
| ) -> Dict[str, Any]: | |
| """Get monthly weather summary""" | |
| if not self.is_available(): | |
| return {} | |
| try: | |
| stations = self.get_stations_near(lat, lon) | |
| if not stations: | |
| return {} | |
| station_id = stations[0]['id'] | |
| start_date = f"{year}-{month:02d}-01" | |
| # Calculate end date | |
| if month == 12: | |
| end_date = f"{year + 1}-01-01" | |
| else: | |
| end_date = f"{year}-{month + 1:02d}-01" | |
| data = self.get_historical_daily_data( | |
| station_id, start_date, end_date, | |
| ['TMAX', 'TMIN', 'PRCP', 'SNOW', 'AWND'] | |
| ) | |
| return { | |
| 'station': stations[0], | |
| 'data': data, | |
| 'summary': self._calculate_monthly_summary(data) | |
| } | |
| except Exception as e: | |
| logger.error(f"Error getting monthly summary: {e}") | |
| return {} | |
| def _calculate_monthly_summary(self, daily_data: List[Dict]) -> Dict: | |
| """Calculate summary statistics from daily data""" | |
| if not daily_data: | |
| return {} | |
| temps_max = [] | |
| temps_min = [] | |
| precip_total = 0 | |
| for record in daily_data: | |
| datatype = record.get('datatype') | |
| value = record.get('value', 0) | |
| if datatype == 'TMAX': | |
| temps_max.append(value / 10.0) # Convert tenths of Celsius | |
| elif datatype == 'TMIN': | |
| temps_min.append(value / 10.0) | |
| elif datatype == 'PRCP': | |
| precip_total += value / 10.0 # Convert tenths of mm | |
| summary = {} | |
| if temps_max: | |
| summary['avg_high_c'] = sum(temps_max) / len(temps_max) | |
| summary['max_temp_c'] = max(temps_max) | |
| summary['min_high_c'] = min(temps_max) | |
| if temps_min: | |
| summary['avg_low_c'] = sum(temps_min) / len(temps_min) | |
| summary['min_temp_c'] = min(temps_min) | |
| summary['max_low_c'] = max(temps_min) | |
| summary['total_precip_mm'] = precip_total | |
| return summary | |
| def get_extreme_events( | |
| self, | |
| lat: float, | |
| lon: float, | |
| start_date: str, | |
| end_date: str, | |
| event_types: List[str] = None | |
| ) -> List[Dict]: | |
| """ | |
| Get extreme weather events | |
| Args: | |
| lat: Latitude | |
| lon: Longitude | |
| start_date: Start date YYYY-MM-DD | |
| end_date: End date YYYY-MM-DD | |
| event_types: Types of events to search for | |
| """ | |
| if not self.is_available(): | |
| return [] | |
| if event_types is None: | |
| event_types = ['TMAX', 'TMIN', 'PRCP'] | |
| try: | |
| stations = self.get_stations_near(lat, lon) | |
| if not stations: | |
| return [] | |
| station_id = stations[0]['id'] | |
| data = self.get_historical_daily_data( | |
| station_id, start_date, end_date, event_types | |
| ) | |
| # Identify extreme events | |
| extremes = [] | |
| for record in data: | |
| value = record.get('value', 0) | |
| datatype = record.get('datatype') | |
| date = record.get('date') | |
| # Define thresholds for extreme events | |
| if datatype == 'TMAX' and value > 350: # > 35°C (95°F) | |
| extremes.append({ | |
| 'date': date, | |
| 'type': 'Extreme Heat', | |
| 'value': value / 10.0, | |
| 'unit': '°C' | |
| }) | |
| elif datatype == 'TMIN' and value < -100: # < -10°C (14°F) | |
| extremes.append({ | |
| 'date': date, | |
| 'type': 'Extreme Cold', | |
| 'value': value / 10.0, | |
| 'unit': '°C' | |
| }) | |
| elif datatype == 'PRCP' and value > 500: # > 50mm (2 inches) | |
| extremes.append({ | |
| 'date': date, | |
| 'type': 'Heavy Precipitation', | |
| 'value': value / 10.0, | |
| 'unit': 'mm' | |
| }) | |
| return extremes | |
| except Exception as e: | |
| logger.error(f"Error getting extreme events: {e}") | |
| return [] | |
| async def get_historical_daily_data_by_location( | |
| self, | |
| location: str, | |
| days_back: int = 7, | |
| data_types: List[str] = None | |
| ) -> List[Dict]: | |
| """ | |
| Get historical daily weather data for a location | |
| Args: | |
| location: Location name (e.g., "New York, NY" or "Seattle") | |
| days_back: Number of days back from today | |
| data_types: List of data types (TMAX, TMIN, PRCP, SNOW, etc.) | |
| """ | |
| if not self.is_available(): | |
| # Return mock data for testing when no API token | |
| return self._generate_mock_historical_data(location, days_back) | |
| if data_types is None: | |
| data_types = ['TMAX', 'TMIN', 'PRCP', 'SNOW'] | |
| try: | |
| # For this method, we'll need to geocode the location first | |
| # Since we don't have geocoding in this client, we'll use major cities mapping | |
| coords = self._get_location_coords(location) | |
| if not coords: | |
| logger.warning(f"Could not find coordinates for location: {location}") | |
| return self._generate_mock_historical_data(location, days_back) | |
| lat, lon = coords | |
| # Find nearby stations | |
| stations = self.get_stations_near(lat, lon, radius=50) | |
| if not stations: | |
| logger.warning(f"No weather stations found near {location}") | |
| return self._generate_mock_historical_data(location, days_back) | |
| # Calculate date range | |
| end_date = datetime.now() | |
| start_date = end_date - timedelta(days=days_back) | |
| start_date_str = start_date.strftime('%Y-%m-%d') | |
| end_date_str = end_date.strftime('%Y-%m-%d') | |
| # Get data from the best station | |
| station_id = stations[0]['id'] | |
| historical_data = self.get_historical_daily_data( | |
| station_id, start_date_str, end_date_str, data_types | |
| ) | |
| if not historical_data: | |
| # Fallback to mock data if no real data available | |
| return self._generate_mock_historical_data(location, days_back) | |
| # Process and format the data | |
| processed_data = self._process_historical_data(historical_data, location) | |
| return processed_data | |
| except Exception as e: | |
| logger.error(f"Error getting historical data for {location}: {e}") | |
| return self._generate_mock_historical_data(location, days_back) | |
| def _get_location_coords(self, location: str) -> Optional[tuple]: | |
| """Get coordinates for major cities (simplified mapping)""" | |
| city_coords = { | |
| 'new york': (40.7128, -74.0060), | |
| 'nyc': (40.7128, -74.0060), | |
| 'new york, ny': (40.7128, -74.0060), | |
| 'los angeles': (34.0522, -118.2437), | |
| 'chicago': (41.8781, -87.6298), | |
| 'houston': (29.7604, -95.3698), | |
| 'phoenix': (33.4484, -112.0740), | |
| 'philadelphia': (39.9526, -75.1652), | |
| 'san antonio': (29.4241, -98.4936), | |
| 'san diego': (32.7157, -117.1611), | |
| 'dallas': (32.7767, -96.7970), | |
| 'san jose': (37.3382, -121.8863), | |
| 'austin': (30.2672, -97.7431), | |
| 'jacksonville': (30.3322, -81.6557), | |
| 'fort worth': (32.7555, -97.3308), | |
| 'columbus': (39.9612, -82.9988), | |
| 'charlotte': (35.2271, -80.8431), | |
| 'seattle': (47.6062, -122.3321), | |
| 'denver': (39.7392, -104.9903), | |
| 'boston': (42.3601, -71.0589), | |
| 'miami': (25.7617, -80.1918), | |
| 'atlanta': (33.7490, -84.3880), | |
| 'detroit': (42.3314, -83.0458), | |
| 'washington': (38.9072, -77.0369), | |
| 'washington dc': (38.9072, -77.0369), | |
| 'las vegas': (36.1699, -115.1398), | |
| 'portland': (45.5152, -122.6784), | |
| 'orlando': (28.5383, -81.3792), | |
| 'nashville': (36.1627, -86.7816) | |
| } | |
| location_lower = location.lower().strip() | |
| return city_coords.get(location_lower) | |
| def _process_historical_data(self, raw_data: List[Dict], location: str) -> List[Dict]: | |
| """Process raw NOAA data into standardized format""" | |
| daily_data = {} | |
| # Group data by date | |
| for record in raw_data: | |
| date = record.get('date', '')[:10] # Get YYYY-MM-DD | |
| if date not in daily_data: | |
| daily_data[date] = { | |
| 'date': date, | |
| 'location': location, | |
| 'temperature_max_c': None, | |
| 'temperature_min_c': None, | |
| 'precipitation_mm': None, | |
| 'snow_mm': None | |
| } | |
| datatype = record.get('datatype') | |
| value = record.get('value', 0) | |
| if datatype == 'TMAX': | |
| daily_data[date]['temperature_max_c'] = value / 10.0 | |
| elif datatype == 'TMIN': | |
| daily_data[date]['temperature_min_c'] = value / 10.0 | |
| elif datatype == 'PRCP': | |
| daily_data[date]['precipitation_mm'] = value / 10.0 | |
| elif datatype == 'SNOW': | |
| daily_data[date]['snow_mm'] = value / 10.0 | |
| # Convert to list and sort by date | |
| processed = list(daily_data.values()) | |
| processed.sort(key=lambda x: x['date'], reverse=True) | |
| return processed | |
| def _generate_mock_historical_data(self, location: str, days_back: int) -> List[Dict]: | |
| """Generate mock historical data for testing""" | |
| import random | |
| mock_data = [] | |
| base_date = datetime.now() | |
| for i in range(days_back): | |
| date = base_date - timedelta(days=i) | |
| date_str = date.strftime('%Y-%m-%d') | |
| # Generate realistic but fake data | |
| base_temp = 20 + random.uniform(-10, 15) # Base temp around 20°C | |
| temp_variation = random.uniform(5, 15) | |
| mock_data.append({ | |
| 'date': date_str, | |
| 'location': location, | |
| 'temperature_max_c': round(base_temp + temp_variation, 1), | |
| 'temperature_min_c': round(base_temp - temp_variation, 1), | |
| 'precipitation_mm': round(random.uniform(0, 25), 1) if random.random() > 0.7 else 0, | |
| 'snow_mm': round(random.uniform(0, 10), 1) if random.random() > 0.9 and base_temp < 5 else 0, | |
| 'data_source': 'mock' # Indicate this is test data | |
| }) | |
| return mock_data | |
| def get_setup_instructions(self) -> str: | |
| """Return instructions for setting up NOAA CDO API access""" | |
| return """ | |
| # 🌤️ Enhanced Historical Weather Data Setup | |
| To unlock historical weather data and climate analysis features: | |
| ## 1. Get a FREE NOAA API Token | |
| 1. Visit: https://www.ncdc.noaa.gov/cdo-web/token | |
| 2. Provide your email address | |
| 3. You'll receive a token via email (usually within minutes) | |
| ## 2. Configure Your Token | |
| Add your token to the environment or configuration: | |
| ```bash | |
| export NOAA_CDO_TOKEN="your_token_here" | |
| ``` | |
| ## 3. Available Historical Features | |
| With the token configured, you'll unlock: | |
| - **Historical Weather Data**: Daily records back to 1763 | |
| - **Climate Normals**: 30-year averages for comparison | |
| - **Extreme Events**: Heat waves, cold snaps, heavy rain | |
| - **Monthly Summaries**: Statistical analysis of past weather | |
| - **Trend Analysis**: Long-term climate patterns | |
| - **Station Data**: Access to 100,000+ weather stations | |
| ## 4. Enhanced AI Responses | |
| The AI will be able to answer questions like: | |
| - "How does today compare to historical averages?" | |
| - "What were the extreme weather events last year?" | |
| - "Show me climate trends for this city" | |
| - "Historical temperature records for this date" | |
| **Note**: The basic weather app works without this token, but historical | |
| features will show setup instructions instead of data. | |
| """ | |
| def create_noaa_cdo_client(api_token: Optional[str] = None) -> NOAACDOClient: | |
| """Factory function to create NOAA CDO client""" | |
| return NOAACDOClient(api_token) | |