armanddemasson commited on
Commit
e8d5bc9
·
1 Parent(s): c0fd277

feat: created queries for talk to ipcc

Browse files
climateqa/engine/talk_to_data/ipcc/queries.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import TypedDict, Optional
2
+
3
+ from climateqa.engine.talk_to_data.ipcc.config import IPCC_DATASET_URL, MACRO_COUNTRIES
4
+
5
+ class IndicatorPerYearAtLocationQueryParams(TypedDict, total=False):
6
+ """
7
+ Parameters for querying the evolution of an indicator per year at a specific location.
8
+
9
+ Attributes:
10
+ indicator_column (str): Name of the climate indicator column.
11
+ latitude (str): Latitude of the location.
12
+ longitude (str): Longitude of the location.
13
+ country_code (str): Country code.
14
+ admin1 (str): Administrative region (optional).
15
+ """
16
+ indicator_column: str
17
+ latitude: str
18
+ longitude: str
19
+ country_code: str
20
+ admin1: Optional[str]
21
+
22
+ def indicator_per_year_at_location_query(
23
+ table: str, params: IndicatorPerYearAtLocationQueryParams
24
+ ) -> str:
25
+ """
26
+ Builds an SQL query to get the evolution of an indicator per year at a specific location.
27
+
28
+ Args:
29
+ table (str): SQL table of the indicator.
30
+ params (IndicatorPerYearAtLocationQueryParams): Dictionary with the required params for the query.
31
+
32
+ Returns:
33
+ str: The SQL query string, or an empty string if required parameters are missing.
34
+ """
35
+ indicator_column = params.get("indicator_column")
36
+ latitude = params.get("latitude")
37
+ longitude = params.get("longitude")
38
+ country_code = params.get("country_code")
39
+ admin1 = params.get("admin1")
40
+
41
+ if not all([indicator_column, latitude, longitude, country_code]):
42
+ return ""
43
+
44
+ if country_code in MACRO_COUNTRIES:
45
+ table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}_macro.parquet'"
46
+ sql_query = f"""
47
+ SELECT year, scenario, {indicator_column}
48
+ FROM {table_path}
49
+ WHERE admin1 = '{admin1}' AND year >= 1950
50
+ ORDER BY year, scenario
51
+ """
52
+ else:
53
+ table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}.parquet'"
54
+ sql_query = f"""
55
+ SELECT year, scenario, AVG({indicator_column}) AS {indicator_column}
56
+ FROM {table_path}
57
+ WHERE latitude = {latitude} AND longitude = {longitude} AND year >= 1950
58
+ GROUP BY scenario, year
59
+ ORDER BY year, scenario
60
+ """
61
+ return sql_query.strip()
62
+
63
+ class IndicatorForGivenYearQueryParams(TypedDict, total=False):
64
+ """
65
+ Parameters for querying an indicator's values across locations for a specific year.
66
+
67
+ Attributes:
68
+ indicator_column (str): The column name for the climate indicator.
69
+ year (str): The year to query.
70
+ country_code (str): The country code.
71
+ """
72
+ indicator_column: str
73
+ year: str
74
+ country_code: str
75
+
76
+ def indicator_for_given_year_query(
77
+ table: str, params: IndicatorForGivenYearQueryParams
78
+ ) -> str:
79
+ """
80
+ Builds an SQL query to get the values of an indicator with their latitudes, longitudes,
81
+ and scenarios for a given year.
82
+
83
+ Args:
84
+ table (str): SQL table of the indicator.
85
+ params (IndicatorForGivenYearQueryParams): Dictionary with the required params for the query.
86
+
87
+ Returns:
88
+ str: The SQL query string, or an empty string if required parameters are missing.
89
+ """
90
+ indicator_column = params.get("indicator_column")
91
+ year = params.get("year") or 2050
92
+ country_code = params.get("country_code")
93
+
94
+ if not all([indicator_column, year, country_code]):
95
+ return ""
96
+
97
+ if country_code in MACRO_COUNTRIES:
98
+ table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}_macro.parquet'"
99
+ sql_query = f"""
100
+ SELECT {indicator_column}, c.latitude, c.longitude, c.admin1, scenario
101
+ FROM {table_path} AS t
102
+ RIGHT JOIN '{IPCC_DATASET_URL}/coordinates.parquet' AS c
103
+ ON c.admin1 = t.admin1 AND c.country_code = t.country_code
104
+ WHERE year = {year}
105
+ ORDER BY latitude, longitude, scenario
106
+ """
107
+ else:
108
+ table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}.parquet'"
109
+ sql_query = f"""
110
+ SELECT AVG({indicator_column}) AS {indicator_column}, latitude, longitude, scenario
111
+ FROM {table_path}
112
+ WHERE year = {year}
113
+ GROUP BY latitude, longitude, scenario
114
+ ORDER BY latitude, longitude, scenario
115
+ """
116
+ return sql_query.strip()