Spaces:

Agents-MCP-Hackathon
/

weather-app_v1

Sleeping

App Files Files Community

weather-app_v1 / src /api /noaa_cdo_client.py

chirfort

Add Weather MCP Client and Server for Weather Intelligence Tools

bf6a6f7 3 months ago

raw

history blame contribute delete

19.3 kB

	"""
	NOAA Climate Data Online (CDO) API Client
	Enhanced weather data from NOAA's historical databases
	"""

	import requests
	import logging
	from typing import List, Dict, Optional, Any
	from datetime import datetime, timedelta
	import json
	import time

	logger = logging.getLogger(__name__)

	class NOAACDOClient:
	"""Client for NOAA Climate Data Online API"""

	def __init__(self, api_token: Optional[str] = None):
	"""
	Initialize NOAA CDO client

	Args:
	api_token: NOAA CDO API token (get from https://www.ncdc.noaa.gov/cdo-web/token)
	If None, will provide instructions to user
	"""
	self.base_url = "https://www.ncei.noaa.gov/cdo-web/api/v2"
	self.api_token = api_token
	self.session = requests.Session()

	if api_token:
	self.session.headers.update({
	'token': api_token,
	'User-Agent': 'WeatherAppPro/1.0 (enhanced-weather-app)'
	})
	else:
	logger.warning("No NOAA CDO API token provided. Historical data features will be limited.")

	def is_available(self) -> bool:
	"""Check if CDO API is available with token"""
	return self.api_token is not None

	def get_datasets(self) -> List[Dict]:
	"""Get available datasets"""
	if not self.is_available():
	return []

	try:
	url = f"{self.base_url}/datasets"
	response = self.session.get(url, timeout=10)
	response.raise_for_status()

	data = response.json()
	return data.get('results', [])
	except Exception as e:
	logger.error(f"Error getting datasets: {e}")
	return []

	def get_stations_near(self, lat: float, lon: float, radius: float = 50) -> List[Dict]:
	"""
	Get weather stations near coordinates

	Args:
	lat: Latitude
	lon: Longitude
	radius: Search radius in kilometers (default 50km)
	"""
	if not self.is_available():
	return []

	try:
	# Convert radius to degrees (approximate)
	radius_deg = radius / 111.0 # 1 degree ≈ 111 km

	# Define bounding box
	north = lat + radius_deg
	south = lat - radius_deg
	east = lon + radius_deg
	west = lon - radius_deg

	url = f"{self.base_url}/stations"
	params = {
	'extent': f'{north},{west},{south},{east}',
	'limit': 20,
	'offset': 0
	}

	response = self.session.get(url, params=params, timeout=10)
	response.raise_for_status()

	data = response.json()
	return data.get('results', [])
	except Exception as e:
	logger.error(f"Error getting stations: {e}")
	return []

	def get_historical_daily_data(
	self,
	station_id: str,
	start_date: str,
	end_date: str,
	data_types: List[str] = None
	) -> List[Dict]:
	"""
	Get historical daily weather data

	Args:
	station_id: Station identifier
	start_date: Start date in YYYY-MM-DD format
	end_date: End date in YYYY-MM-DD format
	data_types: List of data types (TMAX, TMIN, PRCP, SNOW, etc.)
	"""
	if not self.is_available():
	return []

	if data_types is None:
	data_types = ['TMAX', 'TMIN', 'PRCP', 'SNOW']

	try:
	url = f"{self.base_url}/data"
	params = {
	'datasetid': 'GHCND', # Global Historical Climatology Network Daily
	'stationid': station_id,
	'startdate': start_date,
	'enddate': end_date,
	'datatypeid': ','.join(data_types),
	'limit': 1000,
	'units': 'standard'
	}

	response = self.session.get(url, params=params, timeout=15)
	response.raise_for_status()

	data = response.json()
	return data.get('results', [])
	except Exception as e:
	logger.error(f"Error getting historical data: {e}")
	return []

	def get_climate_normals(
	self,
	lat: float,
	lon: float,
	period: str = "2010-2020"
	) -> Dict[str, Any]:
	"""
	Get climate normals for a location

	Args:
	lat: Latitude
	lon: Longitude
	period: Climate normal period
	"""
	if not self.is_available():
	return {}

	try:
	# Find nearby stations first
	stations = self.get_stations_near(lat, lon, radius=25)
	if not stations:
	return {}

	station_id = stations[0]['id']

	# Get normals data
	url = f"{self.base_url}/data"
	params = {
	'datasetid': 'NORMAL_DLY', # Daily Climate Normals
	'stationid': station_id,
	'startdate': '2010-01-01',
	'enddate': '2010-12-31',
	'datatypeid': 'DLY-TMAX-NORMAL,DLY-TMIN-NORMAL,DLY-PRCP-NORMAL',
	'limit': 1000
	}

	response = self.session.get(url, params=params, timeout=15)
	response.raise_for_status()

	data = response.json()
	return {
	'station': stations[0],
	'normals': data.get('results', [])
	}
	except Exception as e:
	logger.error(f"Error getting climate normals: {e}")
	return {}

	def get_monthly_summary(
	self,
	lat: float,
	lon: float,
	year: int,
	month: int
	) -> Dict[str, Any]:
	"""Get monthly weather summary"""
	if not self.is_available():
	return {}

	try:
	stations = self.get_stations_near(lat, lon)
	if not stations:
	return {}

	station_id = stations[0]['id']
	start_date = f"{year}-{month:02d}-01"

	# Calculate end date
	if month == 12:
	end_date = f"{year + 1}-01-01"
	else:
	end_date = f"{year}-{month + 1:02d}-01"

	data = self.get_historical_daily_data(
	station_id, start_date, end_date,
	['TMAX', 'TMIN', 'PRCP', 'SNOW', 'AWND']
	)

	return {
	'station': stations[0],
	'data': data,
	'summary': self._calculate_monthly_summary(data)
	}
	except Exception as e:
	logger.error(f"Error getting monthly summary: {e}")
	return {}

	def _calculate_monthly_summary(self, daily_data: List[Dict]) -> Dict:
	"""Calculate summary statistics from daily data"""
	if not daily_data:
	return {}

	temps_max = []
	temps_min = []
	precip_total = 0

	for record in daily_data:
	datatype = record.get('datatype')
	value = record.get('value', 0)

	if datatype == 'TMAX':
	temps_max.append(value / 10.0) # Convert tenths of Celsius
	elif datatype == 'TMIN':
	temps_min.append(value / 10.0)
	elif datatype == 'PRCP':
	precip_total += value / 10.0 # Convert tenths of mm

	summary = {}

	if temps_max:
	summary['avg_high_c'] = sum(temps_max) / len(temps_max)
	summary['max_temp_c'] = max(temps_max)
	summary['min_high_c'] = min(temps_max)

	if temps_min:
	summary['avg_low_c'] = sum(temps_min) / len(temps_min)
	summary['min_temp_c'] = min(temps_min)
	summary['max_low_c'] = max(temps_min)

	summary['total_precip_mm'] = precip_total

	return summary

	def get_extreme_events(
	self,
	lat: float,
	lon: float,
	start_date: str,
	end_date: str,
	event_types: List[str] = None
	) -> List[Dict]:
	"""
	Get extreme weather events

	Args:
	lat: Latitude
	lon: Longitude
	start_date: Start date YYYY-MM-DD
	end_date: End date YYYY-MM-DD
	event_types: Types of events to search for
	"""
	if not self.is_available():
	return []

	if event_types is None:
	event_types = ['TMAX', 'TMIN', 'PRCP']

	try:
	stations = self.get_stations_near(lat, lon)
	if not stations:
	return []

	station_id = stations[0]['id']
	data = self.get_historical_daily_data(
	station_id, start_date, end_date, event_types
	)

	# Identify extreme events
	extremes = []
	for record in data:
	value = record.get('value', 0)
	datatype = record.get('datatype')
	date = record.get('date')

	# Define thresholds for extreme events
	if datatype == 'TMAX' and value > 350: # > 35°C (95°F)
	extremes.append({
	'date': date,
	'type': 'Extreme Heat',
	'value': value / 10.0,
	'unit': '°C'
	})
	elif datatype == 'TMIN' and value < -100: # < -10°C (14°F)
	extremes.append({
	'date': date,
	'type': 'Extreme Cold',
	'value': value / 10.0,
	'unit': '°C'
	})
	elif datatype == 'PRCP' and value > 500: # > 50mm (2 inches)
	extremes.append({
	'date': date,
	'type': 'Heavy Precipitation',
	'value': value / 10.0,
	'unit': 'mm'
	})
	return extremes
	except Exception as e:
	logger.error(f"Error getting extreme events: {e}")
	return []

	async def get_historical_daily_data_by_location(
	self,
	location: str,
	days_back: int = 7,
	data_types: List[str] = None
	) -> List[Dict]:
	"""
	Get historical daily weather data for a location

	Args:
	location: Location name (e.g., "New York, NY" or "Seattle")
	days_back: Number of days back from today
	data_types: List of data types (TMAX, TMIN, PRCP, SNOW, etc.)
	"""
	if not self.is_available():
	# Return mock data for testing when no API token
	return self._generate_mock_historical_data(location, days_back)

	if data_types is None:
	data_types = ['TMAX', 'TMIN', 'PRCP', 'SNOW']

	try:
	# For this method, we'll need to geocode the location first
	# Since we don't have geocoding in this client, we'll use major cities mapping
	coords = self._get_location_coords(location)
	if not coords:
	logger.warning(f"Could not find coordinates for location: {location}")
	return self._generate_mock_historical_data(location, days_back)

	lat, lon = coords

	# Find nearby stations
	stations = self.get_stations_near(lat, lon, radius=50)
	if not stations:
	logger.warning(f"No weather stations found near {location}")
	return self._generate_mock_historical_data(location, days_back)

	# Calculate date range
	end_date = datetime.now()
	start_date = end_date - timedelta(days=days_back)

	start_date_str = start_date.strftime('%Y-%m-%d')
	end_date_str = end_date.strftime('%Y-%m-%d')

	# Get data from the best station
	station_id = stations[0]['id']
	historical_data = self.get_historical_daily_data(
	station_id, start_date_str, end_date_str, data_types
	)

	if not historical_data:
	# Fallback to mock data if no real data available
	return self._generate_mock_historical_data(location, days_back)

	# Process and format the data
	processed_data = self._process_historical_data(historical_data, location)
	return processed_data

	except Exception as e:
	logger.error(f"Error getting historical data for {location}: {e}")
	return self._generate_mock_historical_data(location, days_back)

	def _get_location_coords(self, location: str) -> Optional[tuple]:
	"""Get coordinates for major cities (simplified mapping)"""
	city_coords = {
	'new york': (40.7128, -74.0060),
	'nyc': (40.7128, -74.0060),
	'new york, ny': (40.7128, -74.0060),
	'los angeles': (34.0522, -118.2437),
	'chicago': (41.8781, -87.6298),
	'houston': (29.7604, -95.3698),
	'phoenix': (33.4484, -112.0740),
	'philadelphia': (39.9526, -75.1652),
	'san antonio': (29.4241, -98.4936),
	'san diego': (32.7157, -117.1611),
	'dallas': (32.7767, -96.7970),
	'san jose': (37.3382, -121.8863),
	'austin': (30.2672, -97.7431),
	'jacksonville': (30.3322, -81.6557),
	'fort worth': (32.7555, -97.3308),
	'columbus': (39.9612, -82.9988),
	'charlotte': (35.2271, -80.8431),
	'seattle': (47.6062, -122.3321),
	'denver': (39.7392, -104.9903),
	'boston': (42.3601, -71.0589),
	'miami': (25.7617, -80.1918),
	'atlanta': (33.7490, -84.3880),
	'detroit': (42.3314, -83.0458),
	'washington': (38.9072, -77.0369),
	'washington dc': (38.9072, -77.0369),
	'las vegas': (36.1699, -115.1398),
	'portland': (45.5152, -122.6784),
	'orlando': (28.5383, -81.3792),
	'nashville': (36.1627, -86.7816)
	}

	location_lower = location.lower().strip()
	return city_coords.get(location_lower)

	def _process_historical_data(self, raw_data: List[Dict], location: str) -> List[Dict]:
	"""Process raw NOAA data into standardized format"""
	daily_data = {}

	# Group data by date
	for record in raw_data:
	date = record.get('date', '')[:10] # Get YYYY-MM-DD
	if date not in daily_data:
	daily_data[date] = {
	'date': date,
	'location': location,
	'temperature_max_c': None,
	'temperature_min_c': None,
	'precipitation_mm': None,
	'snow_mm': None
	}

	datatype = record.get('datatype')
	value = record.get('value', 0)

	if datatype == 'TMAX':
	daily_data[date]['temperature_max_c'] = value / 10.0
	elif datatype == 'TMIN':
	daily_data[date]['temperature_min_c'] = value / 10.0
	elif datatype == 'PRCP':
	daily_data[date]['precipitation_mm'] = value / 10.0
	elif datatype == 'SNOW':
	daily_data[date]['snow_mm'] = value / 10.0

	# Convert to list and sort by date
	processed = list(daily_data.values())
	processed.sort(key=lambda x: x['date'], reverse=True)

	return processed

	def _generate_mock_historical_data(self, location: str, days_back: int) -> List[Dict]:
	"""Generate mock historical data for testing"""
	import random

	mock_data = []
	base_date = datetime.now()

	for i in range(days_back):
	date = base_date - timedelta(days=i)
	date_str = date.strftime('%Y-%m-%d')

	# Generate realistic but fake data
	base_temp = 20 + random.uniform(-10, 15) # Base temp around 20°C
	temp_variation = random.uniform(5, 15)

	mock_data.append({
	'date': date_str,
	'location': location,
	'temperature_max_c': round(base_temp + temp_variation, 1),
	'temperature_min_c': round(base_temp - temp_variation, 1),
	'precipitation_mm': round(random.uniform(0, 25), 1) if random.random() > 0.7 else 0,
	'snow_mm': round(random.uniform(0, 10), 1) if random.random() > 0.9 and base_temp < 5 else 0,
	'data_source': 'mock' # Indicate this is test data
	})

	return mock_data

	def get_setup_instructions(self) -> str:
	"""Return instructions for setting up NOAA CDO API access"""
	return """
	# 🌤️ Enhanced Historical Weather Data Setup

	To unlock historical weather data and climate analysis features:

	## 1. Get a FREE NOAA API Token

	1. Visit: https://www.ncdc.noaa.gov/cdo-web/token
	2. Provide your email address
	3. You'll receive a token via email (usually within minutes)

	## 2. Configure Your Token

	Add your token to the environment or configuration:

	```bash
	export NOAA_CDO_TOKEN="your_token_here"
	```

	## 3. Available Historical Features

	With the token configured, you'll unlock:

	- Historical Weather Data: Daily records back to 1763
	- Climate Normals: 30-year averages for comparison
	- Extreme Events: Heat waves, cold snaps, heavy rain
	- Monthly Summaries: Statistical analysis of past weather
	- Trend Analysis: Long-term climate patterns
	- Station Data: Access to 100,000+ weather stations

	## 4. Enhanced AI Responses

	The AI will be able to answer questions like:
	- "How does today compare to historical averages?"
	- "What were the extreme weather events last year?"
	- "Show me climate trends for this city"
	- "Historical temperature records for this date"

	Note: The basic weather app works without this token, but historical
	features will show setup instructions instead of data.
	"""


	def create_noaa_cdo_client(api_token: Optional[str] = None) -> NOAACDOClient:
	"""Factory function to create NOAA CDO client"""
	return NOAACDOClient(api_token)