timeki commited on
Commit
161aa8c
·
1 Parent(s): 0bdf2f6

add documentation

Browse files
climateqa/engine/talk_to_data/main.py CHANGED
@@ -4,16 +4,62 @@ import ast
4
 
5
  llm = get_llm(provider="openai")
6
 
7
- def ask_llm_to_add_table_names(sql_query, llm):
 
 
 
 
 
 
 
 
 
 
 
 
8
  sql_with_table_names = llm.invoke(f"Make the following sql query display the source table in the rows {sql_query}. Just answer the query. The answer should not include ```sql\n").content
9
  return sql_with_table_names
10
 
11
- def ask_llm_column_names(sql_query, llm):
 
 
 
 
 
 
 
 
 
 
 
 
12
  columns = llm.invoke(f"From the given sql query, list the columns that are being selected. The answer should only be a python list. Just answer the list. The SQL query : {sql_query}").content
13
  columns_list = ast.literal_eval(columns.strip("```python\n").strip())
14
  return columns_list
15
 
16
- def ask_drias(query:str, index_state: int = 0):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  final_state = drias_workflow(query)
18
  sql_queries = []
19
  result_dataframes = []
 
4
 
5
  llm = get_llm(provider="openai")
6
 
7
+ def ask_llm_to_add_table_names(sql_query: str, llm) -> str:
8
+ """Adds table names to the SQL query result rows using LLM.
9
+
10
+ This function modifies the SQL query to include the source table name in each row
11
+ of the result set, making it easier to track which data comes from which table.
12
+
13
+ Args:
14
+ sql_query (str): The original SQL query to modify
15
+ llm: The language model instance to use for generating the modified query
16
+
17
+ Returns:
18
+ str: The modified SQL query with table names included in the result rows
19
+ """
20
  sql_with_table_names = llm.invoke(f"Make the following sql query display the source table in the rows {sql_query}. Just answer the query. The answer should not include ```sql\n").content
21
  return sql_with_table_names
22
 
23
+ def ask_llm_column_names(sql_query: str, llm) -> list[str]:
24
+ """Extracts column names from a SQL query using LLM.
25
+
26
+ This function analyzes a SQL query to identify which columns are being selected
27
+ in the result set.
28
+
29
+ Args:
30
+ sql_query (str): The SQL query to analyze
31
+ llm: The language model instance to use for column extraction
32
+
33
+ Returns:
34
+ list[str]: A list of column names being selected in the query
35
+ """
36
  columns = llm.invoke(f"From the given sql query, list the columns that are being selected. The answer should only be a python list. Just answer the list. The SQL query : {sql_query}").content
37
  columns_list = ast.literal_eval(columns.strip("```python\n").strip())
38
  return columns_list
39
 
40
+ def ask_drias(query: str, index_state: int = 0) -> tuple:
41
+ """Main function to process a DRIAS query and return results.
42
+
43
+ This function orchestrates the DRIAS workflow, processing a user query to generate
44
+ SQL queries, dataframes, and visualizations. It handles multiple results and allows
45
+ pagination through them.
46
+
47
+ Args:
48
+ query (str): The user's question about climate data
49
+ index_state (int, optional): The index of the result to return. Defaults to 0.
50
+
51
+ Returns:
52
+ tuple: A tuple containing:
53
+ - sql_query (str): The SQL query used
54
+ - dataframe (pd.DataFrame): The resulting data
55
+ - figure (Callable): Function to generate the visualization
56
+ - sql_queries (list): All generated SQL queries
57
+ - result_dataframes (list): All resulting dataframes
58
+ - figures (list): All figure generation functions
59
+ - index_state (int): Current result index
60
+ - table_list (list): List of table names used
61
+ - error (str): Error message if any
62
+ """
63
  final_state = drias_workflow(query)
64
  sql_queries = []
65
  result_dataframes = []
climateqa/engine/talk_to_data/plot.py CHANGED
@@ -12,6 +12,18 @@ from climateqa.engine.talk_to_data.sql_query import (
12
 
13
 
14
  class Plot(TypedDict):
 
 
 
 
 
 
 
 
 
 
 
 
15
  name: str
16
  description: str
17
  params: list[str]
@@ -20,26 +32,41 @@ class Plot(TypedDict):
20
 
21
 
22
  def plot_indicator_evolution_at_location(params: dict) -> Callable[..., Figure]:
23
- """Generate the function to plot a line plot of an indicator per year at a certain location
24
-
 
 
 
 
25
  Args:
26
- params (dict): dictionnary with the required params : model, indicator_column, location
27
-
 
 
 
28
  Returns:
29
- Callable[..., Figure]: Function which can be call to create the figure with the associated dataframe
 
 
 
 
 
 
 
 
30
  """
31
  indicator = params["indicator_column"]
32
  location = params["location"]
33
  indicator_label = " ".join([word.capitalize() for word in indicator.split("_")])
34
 
35
  def plot_data(df: pd.DataFrame) -> Figure:
36
- """Generate the figure thanks to the dataframe
37
-
38
  Args:
39
- df (pd.DataFrame): pandas dataframe with the required data
40
-
41
  Returns:
42
- Figure: Plotly figure
43
  """
44
  fig = go.Figure()
45
  if df['model'].nunique() != 1:
@@ -118,15 +145,20 @@ indicator_evolution_at_location: Plot = {
118
  def plot_indicator_number_of_days_per_year_at_location(
119
  params: dict,
120
  ) -> Callable[..., Figure]:
121
- """Generate the function to plot a line plot of an indicator per year at a certain location
122
-
 
 
 
123
  Args:
124
- params (dict): dictionnary with the required params : model, indicator_column, location
125
-
 
 
 
126
  Returns:
127
- Callable[..., Figure]: Function which can be call to create the figure with the associated dataframe
128
  """
129
-
130
  indicator = params["indicator_column"]
131
  location = params["location"]
132
 
@@ -194,13 +226,19 @@ indicator_number_of_days_per_year_at_location: Plot = {
194
  def plot_distribution_of_indicator_for_given_year(
195
  params: dict,
196
  ) -> Callable[..., Figure]:
197
- """Generate an histogram of the distribution of an indicator for a given year
198
-
 
 
 
199
  Args:
200
- params (dict): dictionnary with the required params : model, indicator_column, year
201
-
 
 
 
202
  Returns:
203
- Callable[..., Figure]: Function which can be call to create the figure with the associated dataframe
204
  """
205
  indicator = params["indicator_column"]
206
  year = params["year"]
@@ -257,7 +295,7 @@ def plot_distribution_of_indicator_for_given_year(
257
 
258
  distribution_of_indicator_for_given_year: Plot = {
259
  "name": "Distribution of an indicator for a given year",
260
- "description": "Plot an histogram of the distribution for a given year of the values of an indicator ",
261
  "params": ["indicator_column", "model", "year"],
262
  "plot_function": plot_distribution_of_indicator_for_given_year,
263
  "sql_query": indicator_for_given_year_query,
@@ -267,15 +305,20 @@ distribution_of_indicator_for_given_year: Plot = {
267
  def plot_map_of_france_of_indicator_for_given_year(
268
  params: dict,
269
  ) -> Callable[..., Figure]:
270
- """Generate a plot of the map of France for an indicator at a given year
271
-
 
 
 
272
  Args:
273
- params (dict): dictionnary with the required params : model, indicator_column, year
274
-
 
 
 
275
  Returns:
276
- Callable[..., Figure]: Function which can be call to create the figure with the associated dataframe
277
  """
278
-
279
  indicator = params["indicator_column"]
280
  year = params["year"]
281
  indicator_label = " ".join([word.capitalize() for word in indicator.split("_")])
 
12
 
13
 
14
  class Plot(TypedDict):
15
+ """Represents a plot configuration in the DRIAS system.
16
+
17
+ This class defines the structure for configuring different types of plots
18
+ that can be generated from climate data.
19
+
20
+ Attributes:
21
+ name (str): The name of the plot type
22
+ description (str): A description of what the plot shows
23
+ params (list[str]): List of required parameters for the plot
24
+ plot_function (Callable[..., Callable[..., Figure]]): Function to generate the plot
25
+ sql_query (Callable[..., str]): Function to generate the SQL query for the plot
26
+ """
27
  name: str
28
  description: str
29
  params: list[str]
 
32
 
33
 
34
  def plot_indicator_evolution_at_location(params: dict) -> Callable[..., Figure]:
35
+ """Generates a function to plot indicator evolution over time at a location.
36
+
37
+ This function creates a line plot showing how a climate indicator changes
38
+ over time at a specific location. It handles temperature, precipitation,
39
+ and other climate indicators.
40
+
41
  Args:
42
+ params (dict): Dictionary containing:
43
+ - indicator_column (str): The column name for the indicator
44
+ - location (str): The location to plot
45
+ - model (str): The climate model to use
46
+
47
  Returns:
48
+ Callable[..., Figure]: A function that takes a DataFrame and returns a plotly Figure
49
+
50
+ Example:
51
+ >>> plot_func = plot_indicator_evolution_at_location({
52
+ ... 'indicator_column': 'mean_temperature',
53
+ ... 'location': 'Paris',
54
+ ... 'model': 'ALL'
55
+ ... })
56
+ >>> fig = plot_func(df)
57
  """
58
  indicator = params["indicator_column"]
59
  location = params["location"]
60
  indicator_label = " ".join([word.capitalize() for word in indicator.split("_")])
61
 
62
  def plot_data(df: pd.DataFrame) -> Figure:
63
+ """Generates the actual plot from the data.
64
+
65
  Args:
66
+ df (pd.DataFrame): DataFrame containing the data to plot
67
+
68
  Returns:
69
+ Figure: A plotly Figure object showing the indicator evolution
70
  """
71
  fig = go.Figure()
72
  if df['model'].nunique() != 1:
 
145
  def plot_indicator_number_of_days_per_year_at_location(
146
  params: dict,
147
  ) -> Callable[..., Figure]:
148
+ """Generates a function to plot the number of days per year for an indicator.
149
+
150
+ This function creates a bar chart showing the frequency of certain climate
151
+ events (like days above a temperature threshold) per year at a specific location.
152
+
153
  Args:
154
+ params (dict): Dictionary containing:
155
+ - indicator_column (str): The column name for the indicator
156
+ - location (str): The location to plot
157
+ - model (str): The climate model to use
158
+
159
  Returns:
160
+ Callable[..., Figure]: A function that takes a DataFrame and returns a plotly Figure
161
  """
 
162
  indicator = params["indicator_column"]
163
  location = params["location"]
164
 
 
226
  def plot_distribution_of_indicator_for_given_year(
227
  params: dict,
228
  ) -> Callable[..., Figure]:
229
+ """Generates a function to plot the distribution of an indicator for a year.
230
+
231
+ This function creates a histogram showing the distribution of a climate
232
+ indicator across different locations for a specific year.
233
+
234
  Args:
235
+ params (dict): Dictionary containing:
236
+ - indicator_column (str): The column name for the indicator
237
+ - year (str): The year to plot
238
+ - model (str): The climate model to use
239
+
240
  Returns:
241
+ Callable[..., Figure]: A function that takes a DataFrame and returns a plotly Figure
242
  """
243
  indicator = params["indicator_column"]
244
  year = params["year"]
 
295
 
296
  distribution_of_indicator_for_given_year: Plot = {
297
  "name": "Distribution of an indicator for a given year",
298
+ "description": "Plot an histogram of the distribution for a given year of the values of an indicator",
299
  "params": ["indicator_column", "model", "year"],
300
  "plot_function": plot_distribution_of_indicator_for_given_year,
301
  "sql_query": indicator_for_given_year_query,
 
305
  def plot_map_of_france_of_indicator_for_given_year(
306
  params: dict,
307
  ) -> Callable[..., Figure]:
308
+ """Generates a function to plot a map of France for an indicator.
309
+
310
+ This function creates a choropleth map of France showing the spatial
311
+ distribution of a climate indicator for a specific year.
312
+
313
  Args:
314
+ params (dict): Dictionary containing:
315
+ - indicator_column (str): The column name for the indicator
316
+ - year (str): The year to plot
317
+ - model (str): The climate model to use
318
+
319
  Returns:
320
+ Callable[..., Figure]: A function that takes a DataFrame and returns a plotly Figure
321
  """
 
322
  indicator = params["indicator_column"]
323
  year = params["year"]
324
  indicator_label = " ".join([word.capitalize() for word in indicator.split("_")])
climateqa/engine/talk_to_data/sql_query.py CHANGED
@@ -3,16 +3,21 @@ import duckdb
3
  import pandas as pd
4
 
5
  def execute_sql_query(sql_query: str) -> pd.DataFrame:
6
- """Execute the SQL Query on the sqlite database
7
-
 
 
 
 
8
  Args:
9
- sql_query (str): sql query to execute
10
-
11
  Returns:
12
- SqlQueryOutput: labels of the selected column and fetched data
 
 
 
13
  """
14
-
15
-
16
  # Execute the query
17
  results = duckdb.sql(sql_query)
18
 
@@ -21,6 +26,17 @@ def execute_sql_query(sql_query: str) -> pd.DataFrame:
21
 
22
 
23
  class IndicatorPerYearAtLocationQueryParams(TypedDict, total=False):
 
 
 
 
 
 
 
 
 
 
 
24
  indicator_column: str
25
  latitude: str
26
  longitude: str
@@ -53,6 +69,16 @@ def indicator_per_year_at_location_query(
53
  return sql_query
54
 
55
  class IndicatorForGivenYearQueryParams(TypedDict, total=False):
 
 
 
 
 
 
 
 
 
 
56
  indicator_column: str
57
  year: str
58
  model: str
 
3
  import pandas as pd
4
 
5
  def execute_sql_query(sql_query: str) -> pd.DataFrame:
6
+ """Executes a SQL query on the DRIAS database and returns the results.
7
+
8
+ This function connects to the DuckDB database containing DRIAS climate data
9
+ and executes the provided SQL query. It handles the database connection and
10
+ returns the results as a pandas DataFrame.
11
+
12
  Args:
13
+ sql_query (str): The SQL query to execute
14
+
15
  Returns:
16
+ pd.DataFrame: A DataFrame containing the query results
17
+
18
+ Raises:
19
+ duckdb.Error: If there is an error executing the SQL query
20
  """
 
 
21
  # Execute the query
22
  results = duckdb.sql(sql_query)
23
 
 
26
 
27
 
28
  class IndicatorPerYearAtLocationQueryParams(TypedDict, total=False):
29
+ """Parameters for querying an indicator's values over time at a location.
30
+
31
+ This class defines the parameters needed to query climate indicator data
32
+ for a specific location over multiple years.
33
+
34
+ Attributes:
35
+ indicator_column (str): The column name for the climate indicator
36
+ latitude (str): The latitude coordinate of the location
37
+ longitude (str): The longitude coordinate of the location
38
+ model (str): The climate model to use (optional)
39
+ """
40
  indicator_column: str
41
  latitude: str
42
  longitude: str
 
69
  return sql_query
70
 
71
  class IndicatorForGivenYearQueryParams(TypedDict, total=False):
72
+ """Parameters for querying an indicator's values across locations for a year.
73
+
74
+ This class defines the parameters needed to query climate indicator data
75
+ across different locations for a specific year.
76
+
77
+ Attributes:
78
+ indicator_column (str): The column name for the climate indicator
79
+ year (str): The year to query
80
+ model (str): The climate model to use (optional)
81
+ """
82
  indicator_column: str
83
  year: str
84
  model: str
climateqa/engine/talk_to_data/utils.py CHANGED
@@ -30,9 +30,15 @@ def detect_location_with_openai(sentence):
30
  return ""
31
 
32
  class ArrayOutput(TypedDict):
33
- """Generated SQL query."""
34
-
35
- array: Annotated[str, ..., "Syntactically valid python array."]
 
 
 
 
 
 
36
 
37
  def detect_year_with_openai(sentence: str) -> str:
38
  """
@@ -58,19 +64,63 @@ def detect_year_with_openai(sentence: str) -> str:
58
  return ""
59
 
60
 
61
- def detectTable(sql_query):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  pattern = r'(?i)\bFROM\s+((?:`[^`]+`|"[^"]+"|\'[^\']+\'|\w+)(?:\.(?:`[^`]+`|"[^"]+"|\'[^\']+\'|\w+))*)'
63
  matches = re.findall(pattern, sql_query)
64
  return matches
65
 
66
 
67
- def loc2coords(location: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  geolocator = Nominatim(user_agent="city_to_latlong")
69
  coords = geolocator.geocode(location)
70
  return (coords.latitude, coords.longitude)
71
 
72
 
73
- def coords2loc(coords: tuple):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  geolocator = Nominatim(user_agent="coords_to_city")
75
  try:
76
  location = geolocator.reverse(coords)
@@ -97,17 +147,28 @@ def nearestNeighbourSQL(location: tuple, table: str) -> tuple[str, str]:
97
 
98
 
99
  def detect_relevant_tables(user_question: str, plot: Plot, llm) -> list[str]:
100
- """Detect relevant tables regarding the plot and the user input
101
-
 
 
 
 
102
  Args:
103
- user_question (str): initial user input
104
- plot (Plot): plot object for which we wanna plot
105
- llm (_type_): LLM
106
-
107
  Returns:
108
- list[str]: list of table names
 
 
 
 
 
 
 
 
109
  """
110
-
111
  # Get all table names
112
  table_names_list = DRIAS_TABLES
113
 
@@ -121,7 +182,6 @@ def detect_relevant_tables(user_question: str, plot: Plot, llm) -> list[str]:
121
  f"### List of table name : "
122
  )
123
 
124
-
125
  table_names = ast.literal_eval(
126
  llm.invoke(prompt).content.strip("```python\n").strip()
127
  )
 
30
  return ""
31
 
32
  class ArrayOutput(TypedDict):
33
+ """Represents the output of a function that returns an array.
34
+
35
+ This class is used to type-hint functions that return arrays,
36
+ ensuring consistent return types across the codebase.
37
+
38
+ Attributes:
39
+ array (str): A syntactically valid Python array string
40
+ """
41
+ array: Annotated[str, "Syntactically valid python array."]
42
 
43
  def detect_year_with_openai(sentence: str) -> str:
44
  """
 
64
  return ""
65
 
66
 
67
+ def detectTable(sql_query: str) -> list[str]:
68
+ """Extracts table names from a SQL query.
69
+
70
+ This function uses regular expressions to find all table names
71
+ referenced in a SQL query's FROM clause.
72
+
73
+ Args:
74
+ sql_query (str): The SQL query to analyze
75
+
76
+ Returns:
77
+ list[str]: A list of table names found in the query
78
+
79
+ Example:
80
+ >>> detectTable("SELECT * FROM temperature_data WHERE year > 2000")
81
+ ['temperature_data']
82
+ """
83
  pattern = r'(?i)\bFROM\s+((?:`[^`]+`|"[^"]+"|\'[^\']+\'|\w+)(?:\.(?:`[^`]+`|"[^"]+"|\'[^\']+\'|\w+))*)'
84
  matches = re.findall(pattern, sql_query)
85
  return matches
86
 
87
 
88
+ def loc2coords(location: str) -> tuple[float, float]:
89
+ """Converts a location name to geographic coordinates.
90
+
91
+ This function uses the Nominatim geocoding service to convert
92
+ a location name (e.g., city name) to its latitude and longitude.
93
+
94
+ Args:
95
+ location (str): The name of the location to geocode
96
+
97
+ Returns:
98
+ tuple[float, float]: A tuple containing (latitude, longitude)
99
+
100
+ Raises:
101
+ AttributeError: If the location cannot be found
102
+ """
103
  geolocator = Nominatim(user_agent="city_to_latlong")
104
  coords = geolocator.geocode(location)
105
  return (coords.latitude, coords.longitude)
106
 
107
 
108
+ def coords2loc(coords: tuple[float, float]) -> str:
109
+ """Converts geographic coordinates to a location name.
110
+
111
+ This function uses the Nominatim reverse geocoding service to convert
112
+ latitude and longitude coordinates to a human-readable location name.
113
+
114
+ Args:
115
+ coords (tuple[float, float]): A tuple containing (latitude, longitude)
116
+
117
+ Returns:
118
+ str: The address of the location, or "Unknown Location" if not found
119
+
120
+ Example:
121
+ >>> coords2loc((48.8566, 2.3522))
122
+ 'Paris, France'
123
+ """
124
  geolocator = Nominatim(user_agent="coords_to_city")
125
  try:
126
  location = geolocator.reverse(coords)
 
147
 
148
 
149
  def detect_relevant_tables(user_question: str, plot: Plot, llm) -> list[str]:
150
+ """Identifies relevant tables for a plot based on user input.
151
+
152
+ This function uses an LLM to analyze the user's question and the plot
153
+ description to determine which tables in the DRIAS database would be
154
+ most relevant for generating the requested visualization.
155
+
156
  Args:
157
+ user_question (str): The user's question about climate data
158
+ plot (Plot): The plot configuration object
159
+ llm: The language model instance to use for analysis
160
+
161
  Returns:
162
+ list[str]: A list of table names that are relevant for the plot
163
+
164
+ Example:
165
+ >>> detect_relevant_tables(
166
+ ... "What will the temperature be like in Paris?",
167
+ ... indicator_evolution_at_location,
168
+ ... llm
169
+ ... )
170
+ ['mean_annual_temperature', 'mean_summer_temperature']
171
  """
 
172
  # Get all table names
173
  table_names_list = DRIAS_TABLES
174
 
 
182
  f"### List of table name : "
183
  )
184
 
 
185
  table_names = ast.literal_eval(
186
  llm.invoke(prompt).content.strip("```python\n").strip()
187
  )
climateqa/engine/talk_to_data/workflow.py CHANGED
@@ -22,6 +22,19 @@ ROOT_PATH = os.path.dirname(os.path.dirname(os.getcwd()))
22
  DRIAS_DB_PATH = ROOT_PATH + "/data/drias/drias.db"
23
 
24
  class TableState(TypedDict):
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  table_name: str
26
  params: dict[str, Any]
27
  sql_query: NotRequired[str]
@@ -30,6 +43,16 @@ class TableState(TypedDict):
30
  status: str
31
 
32
  class PlotState(TypedDict):
 
 
 
 
 
 
 
 
 
 
33
  plot_name: str
34
  tables: list[str]
35
  table_states: dict[str, TableState]
@@ -190,22 +213,37 @@ def find_location(user_input: str, table: str) -> Location:
190
  return output
191
 
192
  def find_year(user_input: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
193
  print(f"---- Find year ---")
194
  year = detect_year_with_openai(user_input)
195
  return year
196
 
197
  def find_indicator_column(table: str) -> str:
198
- """Retrieve the name of the indicator column within the table in the database
199
-
 
 
 
200
  Args:
201
- table (str): name of the table
202
-
203
  Returns:
204
- str: name of the indicator column
 
 
 
205
  """
206
-
207
  print(f"---- Find indicator column in table {table} ----")
208
-
209
  return INDICATOR_COLUMNS_PER_TABLE[table]
210
 
211
 
 
22
  DRIAS_DB_PATH = ROOT_PATH + "/data/drias/drias.db"
23
 
24
  class TableState(TypedDict):
25
+ """Represents the state of a table in the DRIAS workflow.
26
+
27
+ This class defines the structure for tracking the state of a table during the
28
+ data processing workflow, including its name, parameters, SQL query, and results.
29
+
30
+ Attributes:
31
+ table_name (str): The name of the table in the database
32
+ params (dict[str, Any]): Parameters used for querying the table
33
+ sql_query (str, optional): The SQL query used to fetch data
34
+ dataframe (pd.DataFrame | None, optional): The resulting data
35
+ figure (Callable[..., Figure], optional): Function to generate visualization
36
+ status (str): The current status of the table processing ('OK' or 'ERROR')
37
+ """
38
  table_name: str
39
  params: dict[str, Any]
40
  sql_query: NotRequired[str]
 
43
  status: str
44
 
45
  class PlotState(TypedDict):
46
+ """Represents the state of a plot in the DRIAS workflow.
47
+
48
+ This class defines the structure for tracking the state of a plot during the
49
+ data processing workflow, including its name and associated tables.
50
+
51
+ Attributes:
52
+ plot_name (str): The name of the plot
53
+ tables (list[str]): List of tables used in the plot
54
+ table_states (dict[str, TableState]): States of the tables used in the plot
55
+ """
56
  plot_name: str
57
  tables: list[str]
58
  table_states: dict[str, TableState]
 
213
  return output
214
 
215
  def find_year(user_input: str) -> str:
216
+ """Extracts year information from user input using LLM.
217
+
218
+ This function uses an LLM to identify and extract year information from the
219
+ user's query, which is used to filter data in subsequent queries.
220
+
221
+ Args:
222
+ user_input (str): The user's query text
223
+
224
+ Returns:
225
+ str: The extracted year, or empty string if no year found
226
+ """
227
  print(f"---- Find year ---")
228
  year = detect_year_with_openai(user_input)
229
  return year
230
 
231
  def find_indicator_column(table: str) -> str:
232
+ """Retrieves the name of the indicator column within a table.
233
+
234
+ This function maps table names to their corresponding indicator columns
235
+ using the predefined mapping in INDICATOR_COLUMNS_PER_TABLE.
236
+
237
  Args:
238
+ table (str): Name of the table in the database
239
+
240
  Returns:
241
+ str: Name of the indicator column for the specified table
242
+
243
+ Raises:
244
+ KeyError: If the table name is not found in the mapping
245
  """
 
246
  print(f"---- Find indicator column in table {table} ----")
 
247
  return INDICATOR_COLUMNS_PER_TABLE[table]
248
 
249