weather-app_v1 / src /api /noaa_cdo_client.py
chirfort's picture
Add Weather MCP Client and Server for Weather Intelligence Tools
bf6a6f7
"""
NOAA Climate Data Online (CDO) API Client
Enhanced weather data from NOAA's historical databases
"""
import requests
import logging
from typing import List, Dict, Optional, Any
from datetime import datetime, timedelta
import json
import time
logger = logging.getLogger(__name__)
class NOAACDOClient:
"""Client for NOAA Climate Data Online API"""
def __init__(self, api_token: Optional[str] = None):
"""
Initialize NOAA CDO client
Args:
api_token: NOAA CDO API token (get from https://www.ncdc.noaa.gov/cdo-web/token)
If None, will provide instructions to user
"""
self.base_url = "https://www.ncei.noaa.gov/cdo-web/api/v2"
self.api_token = api_token
self.session = requests.Session()
if api_token:
self.session.headers.update({
'token': api_token,
'User-Agent': 'WeatherAppPro/1.0 (enhanced-weather-app)'
})
else:
logger.warning("No NOAA CDO API token provided. Historical data features will be limited.")
def is_available(self) -> bool:
"""Check if CDO API is available with token"""
return self.api_token is not None
def get_datasets(self) -> List[Dict]:
"""Get available datasets"""
if not self.is_available():
return []
try:
url = f"{self.base_url}/datasets"
response = self.session.get(url, timeout=10)
response.raise_for_status()
data = response.json()
return data.get('results', [])
except Exception as e:
logger.error(f"Error getting datasets: {e}")
return []
def get_stations_near(self, lat: float, lon: float, radius: float = 50) -> List[Dict]:
"""
Get weather stations near coordinates
Args:
lat: Latitude
lon: Longitude
radius: Search radius in kilometers (default 50km)
"""
if not self.is_available():
return []
try:
# Convert radius to degrees (approximate)
radius_deg = radius / 111.0 # 1 degree ≈ 111 km
# Define bounding box
north = lat + radius_deg
south = lat - radius_deg
east = lon + radius_deg
west = lon - radius_deg
url = f"{self.base_url}/stations"
params = {
'extent': f'{north},{west},{south},{east}',
'limit': 20,
'offset': 0
}
response = self.session.get(url, params=params, timeout=10)
response.raise_for_status()
data = response.json()
return data.get('results', [])
except Exception as e:
logger.error(f"Error getting stations: {e}")
return []
def get_historical_daily_data(
self,
station_id: str,
start_date: str,
end_date: str,
data_types: List[str] = None
) -> List[Dict]:
"""
Get historical daily weather data
Args:
station_id: Station identifier
start_date: Start date in YYYY-MM-DD format
end_date: End date in YYYY-MM-DD format
data_types: List of data types (TMAX, TMIN, PRCP, SNOW, etc.)
"""
if not self.is_available():
return []
if data_types is None:
data_types = ['TMAX', 'TMIN', 'PRCP', 'SNOW']
try:
url = f"{self.base_url}/data"
params = {
'datasetid': 'GHCND', # Global Historical Climatology Network Daily
'stationid': station_id,
'startdate': start_date,
'enddate': end_date,
'datatypeid': ','.join(data_types),
'limit': 1000,
'units': 'standard'
}
response = self.session.get(url, params=params, timeout=15)
response.raise_for_status()
data = response.json()
return data.get('results', [])
except Exception as e:
logger.error(f"Error getting historical data: {e}")
return []
def get_climate_normals(
self,
lat: float,
lon: float,
period: str = "2010-2020"
) -> Dict[str, Any]:
"""
Get climate normals for a location
Args:
lat: Latitude
lon: Longitude
period: Climate normal period
"""
if not self.is_available():
return {}
try:
# Find nearby stations first
stations = self.get_stations_near(lat, lon, radius=25)
if not stations:
return {}
station_id = stations[0]['id']
# Get normals data
url = f"{self.base_url}/data"
params = {
'datasetid': 'NORMAL_DLY', # Daily Climate Normals
'stationid': station_id,
'startdate': '2010-01-01',
'enddate': '2010-12-31',
'datatypeid': 'DLY-TMAX-NORMAL,DLY-TMIN-NORMAL,DLY-PRCP-NORMAL',
'limit': 1000
}
response = self.session.get(url, params=params, timeout=15)
response.raise_for_status()
data = response.json()
return {
'station': stations[0],
'normals': data.get('results', [])
}
except Exception as e:
logger.error(f"Error getting climate normals: {e}")
return {}
def get_monthly_summary(
self,
lat: float,
lon: float,
year: int,
month: int
) -> Dict[str, Any]:
"""Get monthly weather summary"""
if not self.is_available():
return {}
try:
stations = self.get_stations_near(lat, lon)
if not stations:
return {}
station_id = stations[0]['id']
start_date = f"{year}-{month:02d}-01"
# Calculate end date
if month == 12:
end_date = f"{year + 1}-01-01"
else:
end_date = f"{year}-{month + 1:02d}-01"
data = self.get_historical_daily_data(
station_id, start_date, end_date,
['TMAX', 'TMIN', 'PRCP', 'SNOW', 'AWND']
)
return {
'station': stations[0],
'data': data,
'summary': self._calculate_monthly_summary(data)
}
except Exception as e:
logger.error(f"Error getting monthly summary: {e}")
return {}
def _calculate_monthly_summary(self, daily_data: List[Dict]) -> Dict:
"""Calculate summary statistics from daily data"""
if not daily_data:
return {}
temps_max = []
temps_min = []
precip_total = 0
for record in daily_data:
datatype = record.get('datatype')
value = record.get('value', 0)
if datatype == 'TMAX':
temps_max.append(value / 10.0) # Convert tenths of Celsius
elif datatype == 'TMIN':
temps_min.append(value / 10.0)
elif datatype == 'PRCP':
precip_total += value / 10.0 # Convert tenths of mm
summary = {}
if temps_max:
summary['avg_high_c'] = sum(temps_max) / len(temps_max)
summary['max_temp_c'] = max(temps_max)
summary['min_high_c'] = min(temps_max)
if temps_min:
summary['avg_low_c'] = sum(temps_min) / len(temps_min)
summary['min_temp_c'] = min(temps_min)
summary['max_low_c'] = max(temps_min)
summary['total_precip_mm'] = precip_total
return summary
def get_extreme_events(
self,
lat: float,
lon: float,
start_date: str,
end_date: str,
event_types: List[str] = None
) -> List[Dict]:
"""
Get extreme weather events
Args:
lat: Latitude
lon: Longitude
start_date: Start date YYYY-MM-DD
end_date: End date YYYY-MM-DD
event_types: Types of events to search for
"""
if not self.is_available():
return []
if event_types is None:
event_types = ['TMAX', 'TMIN', 'PRCP']
try:
stations = self.get_stations_near(lat, lon)
if not stations:
return []
station_id = stations[0]['id']
data = self.get_historical_daily_data(
station_id, start_date, end_date, event_types
)
# Identify extreme events
extremes = []
for record in data:
value = record.get('value', 0)
datatype = record.get('datatype')
date = record.get('date')
# Define thresholds for extreme events
if datatype == 'TMAX' and value > 350: # > 35°C (95°F)
extremes.append({
'date': date,
'type': 'Extreme Heat',
'value': value / 10.0,
'unit': '°C'
})
elif datatype == 'TMIN' and value < -100: # < -10°C (14°F)
extremes.append({
'date': date,
'type': 'Extreme Cold',
'value': value / 10.0,
'unit': '°C'
})
elif datatype == 'PRCP' and value > 500: # > 50mm (2 inches)
extremes.append({
'date': date,
'type': 'Heavy Precipitation',
'value': value / 10.0,
'unit': 'mm'
})
return extremes
except Exception as e:
logger.error(f"Error getting extreme events: {e}")
return []
async def get_historical_daily_data_by_location(
self,
location: str,
days_back: int = 7,
data_types: List[str] = None
) -> List[Dict]:
"""
Get historical daily weather data for a location
Args:
location: Location name (e.g., "New York, NY" or "Seattle")
days_back: Number of days back from today
data_types: List of data types (TMAX, TMIN, PRCP, SNOW, etc.)
"""
if not self.is_available():
# Return mock data for testing when no API token
return self._generate_mock_historical_data(location, days_back)
if data_types is None:
data_types = ['TMAX', 'TMIN', 'PRCP', 'SNOW']
try:
# For this method, we'll need to geocode the location first
# Since we don't have geocoding in this client, we'll use major cities mapping
coords = self._get_location_coords(location)
if not coords:
logger.warning(f"Could not find coordinates for location: {location}")
return self._generate_mock_historical_data(location, days_back)
lat, lon = coords
# Find nearby stations
stations = self.get_stations_near(lat, lon, radius=50)
if not stations:
logger.warning(f"No weather stations found near {location}")
return self._generate_mock_historical_data(location, days_back)
# Calculate date range
end_date = datetime.now()
start_date = end_date - timedelta(days=days_back)
start_date_str = start_date.strftime('%Y-%m-%d')
end_date_str = end_date.strftime('%Y-%m-%d')
# Get data from the best station
station_id = stations[0]['id']
historical_data = self.get_historical_daily_data(
station_id, start_date_str, end_date_str, data_types
)
if not historical_data:
# Fallback to mock data if no real data available
return self._generate_mock_historical_data(location, days_back)
# Process and format the data
processed_data = self._process_historical_data(historical_data, location)
return processed_data
except Exception as e:
logger.error(f"Error getting historical data for {location}: {e}")
return self._generate_mock_historical_data(location, days_back)
def _get_location_coords(self, location: str) -> Optional[tuple]:
"""Get coordinates for major cities (simplified mapping)"""
city_coords = {
'new york': (40.7128, -74.0060),
'nyc': (40.7128, -74.0060),
'new york, ny': (40.7128, -74.0060),
'los angeles': (34.0522, -118.2437),
'chicago': (41.8781, -87.6298),
'houston': (29.7604, -95.3698),
'phoenix': (33.4484, -112.0740),
'philadelphia': (39.9526, -75.1652),
'san antonio': (29.4241, -98.4936),
'san diego': (32.7157, -117.1611),
'dallas': (32.7767, -96.7970),
'san jose': (37.3382, -121.8863),
'austin': (30.2672, -97.7431),
'jacksonville': (30.3322, -81.6557),
'fort worth': (32.7555, -97.3308),
'columbus': (39.9612, -82.9988),
'charlotte': (35.2271, -80.8431),
'seattle': (47.6062, -122.3321),
'denver': (39.7392, -104.9903),
'boston': (42.3601, -71.0589),
'miami': (25.7617, -80.1918),
'atlanta': (33.7490, -84.3880),
'detroit': (42.3314, -83.0458),
'washington': (38.9072, -77.0369),
'washington dc': (38.9072, -77.0369),
'las vegas': (36.1699, -115.1398),
'portland': (45.5152, -122.6784),
'orlando': (28.5383, -81.3792),
'nashville': (36.1627, -86.7816)
}
location_lower = location.lower().strip()
return city_coords.get(location_lower)
def _process_historical_data(self, raw_data: List[Dict], location: str) -> List[Dict]:
"""Process raw NOAA data into standardized format"""
daily_data = {}
# Group data by date
for record in raw_data:
date = record.get('date', '')[:10] # Get YYYY-MM-DD
if date not in daily_data:
daily_data[date] = {
'date': date,
'location': location,
'temperature_max_c': None,
'temperature_min_c': None,
'precipitation_mm': None,
'snow_mm': None
}
datatype = record.get('datatype')
value = record.get('value', 0)
if datatype == 'TMAX':
daily_data[date]['temperature_max_c'] = value / 10.0
elif datatype == 'TMIN':
daily_data[date]['temperature_min_c'] = value / 10.0
elif datatype == 'PRCP':
daily_data[date]['precipitation_mm'] = value / 10.0
elif datatype == 'SNOW':
daily_data[date]['snow_mm'] = value / 10.0
# Convert to list and sort by date
processed = list(daily_data.values())
processed.sort(key=lambda x: x['date'], reverse=True)
return processed
def _generate_mock_historical_data(self, location: str, days_back: int) -> List[Dict]:
"""Generate mock historical data for testing"""
import random
mock_data = []
base_date = datetime.now()
for i in range(days_back):
date = base_date - timedelta(days=i)
date_str = date.strftime('%Y-%m-%d')
# Generate realistic but fake data
base_temp = 20 + random.uniform(-10, 15) # Base temp around 20°C
temp_variation = random.uniform(5, 15)
mock_data.append({
'date': date_str,
'location': location,
'temperature_max_c': round(base_temp + temp_variation, 1),
'temperature_min_c': round(base_temp - temp_variation, 1),
'precipitation_mm': round(random.uniform(0, 25), 1) if random.random() > 0.7 else 0,
'snow_mm': round(random.uniform(0, 10), 1) if random.random() > 0.9 and base_temp < 5 else 0,
'data_source': 'mock' # Indicate this is test data
})
return mock_data
def get_setup_instructions(self) -> str:
"""Return instructions for setting up NOAA CDO API access"""
return """
# 🌤️ Enhanced Historical Weather Data Setup
To unlock historical weather data and climate analysis features:
## 1. Get a FREE NOAA API Token
1. Visit: https://www.ncdc.noaa.gov/cdo-web/token
2. Provide your email address
3. You'll receive a token via email (usually within minutes)
## 2. Configure Your Token
Add your token to the environment or configuration:
```bash
export NOAA_CDO_TOKEN="your_token_here"
```
## 3. Available Historical Features
With the token configured, you'll unlock:
- **Historical Weather Data**: Daily records back to 1763
- **Climate Normals**: 30-year averages for comparison
- **Extreme Events**: Heat waves, cold snaps, heavy rain
- **Monthly Summaries**: Statistical analysis of past weather
- **Trend Analysis**: Long-term climate patterns
- **Station Data**: Access to 100,000+ weather stations
## 4. Enhanced AI Responses
The AI will be able to answer questions like:
- "How does today compare to historical averages?"
- "What were the extreme weather events last year?"
- "Show me climate trends for this city"
- "Historical temperature records for this date"
**Note**: The basic weather app works without this token, but historical
features will show setup instructions instead of data.
"""
def create_noaa_cdo_client(api_token: Optional[str] = None) -> NOAACDOClient:
"""Factory function to create NOAA CDO client"""
return NOAACDOClient(api_token)