Spaces:

Ekimetrics
/

climate-question-answering

Running

App Files Files Community

armanddemasson commited on 30 days ago

Commit

11ab5fb

2 Parent(s): e92e8dc 819e3c0

Merged in feature/talk_to_data (pull request #19)

Browse files

Files changed (29) hide show

app.py +3 -2
climateqa/engine/chains/retrieve_documents.py +6 -4
climateqa/engine/talk_to_data/config.py +8 -96
climateqa/engine/talk_to_data/drias/config.py +124 -0
climateqa/engine/talk_to_data/drias/plot_informations.py +88 -0
climateqa/engine/talk_to_data/{plot.py → drias/plots.py} +72 -56
climateqa/engine/talk_to_data/{sql_query.py → drias/queries.py} +5 -36
climateqa/engine/talk_to_data/{utils.py → input_processing.py} +144 -168
climateqa/engine/talk_to_data/ipcc/config.py +98 -0
climateqa/engine/talk_to_data/ipcc/plot_informations.py +50 -0
climateqa/engine/talk_to_data/ipcc/plots.py +189 -0
climateqa/engine/talk_to_data/ipcc/queries.py +143 -0
climateqa/engine/talk_to_data/main.py +77 -71
climateqa/engine/talk_to_data/objects/llm_outputs.py +13 -0
climateqa/engine/talk_to_data/objects/location.py +12 -0
climateqa/engine/talk_to_data/objects/plot.py +23 -0
climateqa/engine/talk_to_data/objects/states.py +19 -0
climateqa/engine/talk_to_data/prompt.py +44 -0
climateqa/engine/talk_to_data/query.py +57 -0
climateqa/engine/talk_to_data/talk_to_drias.py +0 -317
climateqa/engine/talk_to_data/ui_config.py +27 -0
climateqa/engine/talk_to_data/{myVanna.py → vanna/myVanna.py} +0 -0
climateqa/engine/talk_to_data/{vanna_class.py → vanna/vanna_class.py} +0 -0
climateqa/engine/talk_to_data/workflow/drias.py +163 -0
climateqa/engine/talk_to_data/workflow/ipcc.py +161 -0
front/tabs/tab_drias.py +60 -149
front/tabs/tab_ipcc.py +300 -0
requirements.txt +2 -1
style.css +39 -7

app.py CHANGED Viewed

@@ -16,6 +16,7 @@ from climateqa.chat import start_chat, chat_stream, finish_chat
 from front.tabs import create_config_modal, cqa_tab, create_about_tab
 from front.tabs import MainTabPanel, ConfigPanel
 from front.tabs.tab_drias import create_drias_tab
 from front.utils import process_figures
 from gradio_modal import Modal
@@ -532,8 +533,8 @@ def main_ui():
         with gr.Tabs():
             cqa_components = cqa_tab(tab_name="ClimateQ&A")
             local_cqa_components = cqa_tab(tab_name="France - Local Q&A")
-            create_drias_tab(share_client=share_client, user_id=user_id)
             create_about_tab()
         event_handling(cqa_components, config_components, tab_name="ClimateQ&A")

 from front.tabs import create_config_modal, cqa_tab, create_about_tab
 from front.tabs import MainTabPanel, ConfigPanel
 from front.tabs.tab_drias import create_drias_tab
+from front.tabs.tab_ipcc import create_ipcc_tab
 from front.utils import process_figures
 from gradio_modal import Modal
         with gr.Tabs():
             cqa_components = cqa_tab(tab_name="ClimateQ&A")
             local_cqa_components = cqa_tab(tab_name="France - Local Q&A")
+            drias_components = create_drias_tab(share_client=share_client, user_id=user_id)
+            ipcc_components = create_ipcc_tab(share_client=share_client, user_id=user_id)
             create_about_tab()
         event_handling(cqa_components, config_components, tab_name="ClimateQ&A")

climateqa/engine/chains/retrieve_documents.py CHANGED Viewed

@@ -21,7 +21,7 @@ from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.output_parsers import StrOutputParser
 from ..vectorstore import get_pinecone_vectorstore
 from ..embeddings import get_embeddings_function
 import asyncio
@@ -477,8 +477,10 @@ async def retrieve_documents(
                 docs_question_dict[key] = rerank_and_sort_docs(reranker,docs_question_dict[key],question)
     else:
         # Add a default reranking score
-        for doc in docs_question:
-            doc.metadata["reranking_score"] = doc.metadata["similarity_score"]
     # Keep the right number of documents
     docs_question, images_question = concatenate_documents(index, source_type, docs_question_dict, k_by_question, k_summary_by_question, k_images_by_question)
@@ -580,7 +582,7 @@ async def get_relevant_toc_level_for_query(
     response = chain.invoke({"query": query, "doc_list": doc_list})
     try:
-        relevant_tocs = eval(response)
     except Exception as e:
         print(f" Failed to parse the result because of : {e}")

 from langchain_core.output_parsers import StrOutputParser
 from ..vectorstore import get_pinecone_vectorstore
 from ..embeddings import get_embeddings_function
+import ast
 import asyncio
                 docs_question_dict[key] = rerank_and_sort_docs(reranker,docs_question_dict[key],question)
     else:
         # Add a default reranking score
+        for key in docs_question_dict.keys():
+            if isinstance(docs_question_dict[key], list) and len(docs_question_dict[key]) > 0:
+                for doc in docs_question_dict[key]:
+                    doc.metadata["reranking_score"] = doc.metadata["similarity_score"]
     # Keep the right number of documents
     docs_question, images_question = concatenate_documents(index, source_type, docs_question_dict, k_by_question, k_summary_by_question, k_images_by_question)
     response = chain.invoke({"query": query, "doc_list": doc_list})
     try:
+        relevant_tocs = ast.literal_eval(response)
     except Exception as e:
         print(f" Failed to parse the result because of : {e}")

climateqa/engine/talk_to_data/config.py CHANGED Viewed

@@ -1,99 +1,11 @@
-DRIAS_TABLES = [
-    "total_winter_precipitation",
-    "total_summer_precipiation",
-    "total_annual_precipitation",
-    "total_remarkable_daily_precipitation",
-    "frequency_of_remarkable_daily_precipitation",
-    "extreme_precipitation_intensity",
-    "mean_winter_temperature",
-    "mean_summer_temperature",
-    "mean_annual_temperature",
-    "number_of_tropical_nights",
-    "maximum_summer_temperature",
-    "number_of_days_with_tx_above_30",
-    "number_of_days_with_tx_above_35",
-    "number_of_days_with_a_dry_ground",
-]
-INDICATOR_COLUMNS_PER_TABLE = {
-    "total_winter_precipitation": "total_winter_precipitation",
-    "total_summer_precipiation": "total_summer_precipitation",
-    "total_annual_precipitation": "total_annual_precipitation",
-    "total_remarkable_daily_precipitation": "total_remarkable_daily_precipitation",
-    "frequency_of_remarkable_daily_precipitation": "frequency_of_remarkable_daily_precipitation",
-    "extreme_precipitation_intensity": "extreme_precipitation_intensity",
-    "mean_winter_temperature": "mean_winter_temperature",
-    "mean_summer_temperature": "mean_summer_temperature",
-    "mean_annual_temperature": "mean_annual_temperature",
-    "number_of_tropical_nights": "number_tropical_nights",
-    "maximum_summer_temperature": "maximum_summer_temperature",
-    "number_of_days_with_tx_above_30": "number_of_days_with_tx_above_30",
-    "number_of_days_with_tx_above_35": "number_of_days_with_tx_above_35",
-    "number_of_days_with_a_dry_ground": "number_of_days_with_dry_ground"
-}
-DRIAS_MODELS = [
-    'ALL',
-    'RegCM4-6_MPI-ESM-LR',
-    'RACMO22E_EC-EARTH',
-    'RegCM4-6_HadGEM2-ES',
-    'HadREM3-GA7_EC-EARTH',
-    'HadREM3-GA7_CNRM-CM5',
-    'REMO2015_NorESM1-M',
-    'SMHI-RCA4_EC-EARTH',
-    'WRF381P_NorESM1-M',
-    'ALADIN63_CNRM-CM5',
-    'CCLM4-8-17_MPI-ESM-LR',
-    'HIRHAM5_IPSL-CM5A-MR',
-    'HadREM3-GA7_HadGEM2-ES',
-    'SMHI-RCA4_IPSL-CM5A-MR',
-    'HIRHAM5_NorESM1-M',
-    'REMO2009_MPI-ESM-LR',
-    'CCLM4-8-17_HadGEM2-ES'
-]
-# Mapping between indicator columns and their units
-INDICATOR_TO_UNIT = {
-    "total_winter_precipitation": "mm",
-    "total_summer_precipitation": "mm",
-    "total_annual_precipitation": "mm",
-    "total_remarkable_daily_precipitation": "mm",
-    "frequency_of_remarkable_daily_precipitation": "days",
-    "extreme_precipitation_intensity": "mm",
-    "mean_winter_temperature": "°C",
-    "mean_summer_temperature": "°C",
-    "mean_annual_temperature": "°C",
-    "number_tropical_nights": "days",
-    "maximum_summer_temperature": "°C",
-    "number_of_days_with_tx_above_30": "days",
-    "number_of_days_with_tx_above_35": "days",
-    "number_of_days_with_dry_ground": "days"
-}
-DRIAS_UI_TEXT = """
-Hi, I'm **Talk to Drias**, designed to answer your questions using [**DRIAS - TRACC 2023**](https://www.drias-climat.fr/accompagnement/sections/401) data.
-I'll answer by displaying a list of SQL queries, graphs and data most relevant to your question.
-❓ **How to use?**
-You can ask me anything about these climate indicators: **temperature**, **precipitation** or **drought**.
-You can specify **location** and/or **year**.
-You can choose from a list of climate models. By default, we take the **average of each model**.
-For example, you can ask:
-- What will the temperature be like in Paris?
-- What will be the total rainfall in France in 2030?
-- How frequent will extreme events be in Lyon?
-**Example of indicators in the data**:
-- Mean temperature (annual, winter, summer)
-- Total precipitation (annual, winter, summer)
-- Number of days with remarkable precipitations, with dry ground, with temperature above 30°C
-⚠️ **Limitations**:
-- You can't ask anything that isn't related to **DRIAS - TRACC 2023** data.
-- You can only ask about **locations in France**.
-- If you specify a year, there may be **no data for that year for some models**.
-- You **cannot compare two models**.
-🛈 **Information**
-Please note that we **log your questions for meta-analysis purposes**, so avoid sharing any sensitive or personal information.
-"""

+# Path configuration for climateqa project
+# IPCC dataset path
+IPCC_DATASET_URL = "hf://datasets/ekimetrics/ipcc-atlas"
+# DRIAS dataset paths
+DRIAS_DATASET_URL = "hf://datasets/timeki/drias_db"
+# Table paths
+DRIAS_MEAN_ANNUAL_TEMPERATURE_PATH = f"{DRIAS_DATASET_URL}/mean_annual_temperature.parquet"
+IPCC_COORDINATES_PATH = f"{IPCC_DATASET_URL}/coordinates.parquet"

climateqa/engine/talk_to_data/drias/config.py ADDED Viewed

	@@ -0,0 +1,124 @@

+from climateqa.engine.talk_to_data.ui_config import PRECIPITATION_COLORSCALE, TEMPERATURE_COLORSCALE
+DRIAS_TABLES = [
+    "total_winter_precipitation",
+    "total_summer_precipitation",
+    "total_annual_precipitation",
+    "total_remarkable_daily_precipitation",
+    "frequency_of_remarkable_daily_precipitation",
+    "extreme_precipitation_intensity",
+    "mean_winter_temperature",
+    "mean_summer_temperature",
+    "mean_annual_temperature",
+    "number_of_tropical_nights",
+    "maximum_summer_temperature",
+    "number_of_days_with_tx_above_30",
+    "number_of_days_with_tx_above_35",
+    "number_of_days_with_a_dry_ground",
+]
+DRIAS_INDICATOR_COLUMNS_PER_TABLE = {
+    "total_winter_precipitation": "total_winter_precipitation",
+    "total_summer_precipitation": "total_summer_precipitation",
+    "total_annual_precipitation": "total_annual_precipitation",
+    "total_remarkable_daily_precipitation": "total_remarkable_daily_precipitation",
+    "frequency_of_remarkable_daily_precipitation": "frequency_of_remarkable_daily_precipitation",
+    "extreme_precipitation_intensity": "extreme_precipitation_intensity",
+    "mean_winter_temperature": "mean_winter_temperature",
+    "mean_summer_temperature": "mean_summer_temperature",
+    "mean_annual_temperature": "mean_annual_temperature",
+    "number_of_tropical_nights": "number_tropical_nights",
+    "maximum_summer_temperature": "maximum_summer_temperature",
+    "number_of_days_with_tx_above_30": "number_of_days_with_tx_above_30",
+    "number_of_days_with_tx_above_35": "number_of_days_with_tx_above_35",
+    "number_of_days_with_a_dry_ground": "number_of_days_with_dry_ground"
+}
+DRIAS_MODELS = [
+    'ALL',
+    'RegCM4-6_MPI-ESM-LR',
+    'RACMO22E_EC-EARTH',
+    'RegCM4-6_HadGEM2-ES',
+    'HadREM3-GA7_EC-EARTH',
+    'HadREM3-GA7_CNRM-CM5',
+    'REMO2015_NorESM1-M',
+    'SMHI-RCA4_EC-EARTH',
+    'WRF381P_NorESM1-M',
+    'ALADIN63_CNRM-CM5',
+    'CCLM4-8-17_MPI-ESM-LR',
+    'HIRHAM5_IPSL-CM5A-MR',
+    'HadREM3-GA7_HadGEM2-ES',
+    'SMHI-RCA4_IPSL-CM5A-MR',
+    'HIRHAM5_NorESM1-M',
+    'REMO2009_MPI-ESM-LR',
+    'CCLM4-8-17_HadGEM2-ES'
+]
+# Mapping between indicator columns and their units
+DRIAS_INDICATOR_TO_UNIT = {
+    "total_winter_precipitation": "mm",
+    "total_summer_precipitation": "mm",
+    "total_annual_precipitation": "mm",
+    "total_remarkable_daily_precipitation": "mm",
+    "frequency_of_remarkable_daily_precipitation": "days",
+    "extreme_precipitation_intensity": "mm",
+    "mean_winter_temperature": "°C",
+    "mean_summer_temperature": "°C",
+    "mean_annual_temperature": "°C",
+    "number_tropical_nights": "days",
+    "maximum_summer_temperature": "°C",
+    "number_of_days_with_tx_above_30": "days",
+    "number_of_days_with_tx_above_35": "days",
+    "number_of_days_with_dry_ground": "days"
+}
+DRIAS_PLOT_PARAMETERS = [
+    'year',
+    'location'
+]
+DRIAS_INDICATOR_TO_COLORSCALE = {
+    "total_winter_precipitation": PRECIPITATION_COLORSCALE,
+    "total_summer_precipitation": PRECIPITATION_COLORSCALE,
+    "total_annual_precipitation": PRECIPITATION_COLORSCALE,
+    "total_remarkable_daily_precipitation": PRECIPITATION_COLORSCALE,
+    "frequency_of_remarkable_daily_precipitation": PRECIPITATION_COLORSCALE,
+    "extreme_precipitation_intensity": PRECIPITATION_COLORSCALE,
+    "mean_winter_temperature":TEMPERATURE_COLORSCALE,
+    "mean_summer_temperature":TEMPERATURE_COLORSCALE,
+    "mean_annual_temperature":TEMPERATURE_COLORSCALE,
+    "number_tropical_nights": TEMPERATURE_COLORSCALE,
+    "maximum_summer_temperature":TEMPERATURE_COLORSCALE,
+    "number_of_days_with_tx_above_30": TEMPERATURE_COLORSCALE,
+    "number_of_days_with_tx_above_35": TEMPERATURE_COLORSCALE,
+    "number_of_days_with_dry_ground": TEMPERATURE_COLORSCALE
+}
+DRIAS_UI_TEXT = """
+Hi, I'm **Talk to Drias**, designed to answer your questions using [**DRIAS - TRACC 2023**](https://www.drias-climat.fr/accompagnement/sections/401) data.
+I'll answer by displaying a list of SQL queries, graphs and data most relevant to your question.
+You can ask me anything about these climate indicators: **temperature**, **precipitation** or **drought**.
+You can specify **location** and/or **year**.
+You can choose from a list of climate models. By default, we take the **average of each model**.
+For example, you can ask:
+- What will the temperature be like in Paris?
+- What will be the total rainfall in France in 2030?
+- How frequent will extreme events be in Lyon?
+**Example of indicators in the data**:
+- Mean temperature (annual, winter, summer)
+- Total precipitation (annual, winter, summer)
+- Number of days with remarkable precipitations, with dry ground, with temperature above 30°C
+⚠️ **Limitations**:
+- You can't ask anything that isn't related to **DRIAS - TRACC 2023** data.
+- You can only ask about **locations in France**.
+- If you specify a year, there may be **no data for that year for some models**.
+- You **cannot compare two models**.
+🛈 **Information**
+Please note that we **log your questions for meta-analysis purposes**, so avoid sharing any sensitive or personal information.
+"""

climateqa/engine/talk_to_data/drias/plot_informations.py ADDED Viewed

	@@ -0,0 +1,88 @@

+from climateqa.engine.talk_to_data.drias.config import DRIAS_INDICATOR_TO_UNIT
+def indicator_evolution_informations(
+        indicator: str,
+        params: dict[str, str]
+) -> str:
+    unit = DRIAS_INDICATOR_TO_UNIT[indicator]
+    if "location" not in params:
+        raise ValueError('"location" must be provided in params')
+    location = params["location"]
+    return f"""
+This plot shows how the climate indicator **{indicator}** evolves over time in **{location}**.
+It combines both historical observations and future projections according to the climate scenario RCP8.5.
+The x-axis represents the years, and the y-axis shows the value of the indicator ({unit}).
+A 10-year rolling average curve is displayed to give a better idea of the overall trend.
+**Data source:**
+- The data come from the DRIAS TRACC data. The data were initially extracted from [the DRIAS website](https://www.drias-climat.fr/drias_prod/accueil/okapiWebDrias/index.jsp?iddrias=climat) and then preprocessed to a tabular format and uploaded as parquet in this [Hugging Face dataset](https://huggingface.co/datasets/timeki/drias_db).
+- For each year and climate model, the value of {indicator} in {location} is collected, to build the time series.
+- The coordinates used for {location} correspond to the closest available point in the DRIAS database, which uses a regular grid with a spatial resolution of 8 km.
+- The indicator values shown are those for the selected climate model.
+- If ALL climate model is selected, the average value of the indicator between all the climate models is used.
+"""
+def indicator_number_of_days_per_year_informations(
+        indicator: str,
+        params: dict[str, str]
+) -> str:
+    unit = DRIAS_INDICATOR_TO_UNIT[indicator]
+    if "location" not in params:
+        raise ValueError('"location" must be provided in params')
+    location = params["location"]
+    return f"""
+This plot displays a bar chart showing the yearly frequency of the climate indicator **{indicator}** in **{location}**.
+The x-axis represents the years, and the y-axis shows the frequency of {indicator} ({unit}) per year.
+**Data source:**
+- The data come from the DRIAS TRACC data. The data were initially extracted from [the DRIAS website](https://www.drias-climat.fr/drias_prod/accueil/okapiWebDrias/index.jsp?iddrias=climat) and then preprocessed to a tabular format and uploaded as parquet in this [Hugging Face dataset](https://huggingface.co/datasets/timeki/drias_db).
+- For each year and climate model, the value of {indicator} in {location} is collected, to build the time series.
+- The coordinates used for {location} correspond to the closest available point in the DRIAS database, which uses a regular grid with a spatial resolution of 8 km.
+- The indicator values shown are those for the selected climate model.
+- If ALL climate model is selected, the average value of the indicator between all the climate models is used.
+"""
+def distribution_of_indicator_for_given_year_informations(
+        indicator: str,
+        params: dict[str, str]
+) -> str:
+    unit = DRIAS_INDICATOR_TO_UNIT[indicator]
+    year = params["year"]
+    if year is None:
+        year = 2030
+    return f"""
+This plot shows a histogram of the distribution of the climate indicator **{indicator}** across all locations for the year **{year}**.
+It allows you to visualize how the values of {indicator} ({unit}) are spread for a given year.
+**Data source:**
+- The data come from the DRIAS TRACC data. The data were initially extracted from [the DRIAS website](https://www.drias-climat.fr/drias_prod/accueil/okapiWebDrias/index.jsp?iddrias=climat) and then preprocessed to a tabular format and uploaded as parquet in this [Hugging Face dataset](https://huggingface.co/datasets/timeki/drias_db).
+- For each grid point in the dataset and climate model, the value of {indicator} for the year {year} is extracted.
+- The indicator values shown are those for the selected climate model.
+- If ALL climate model is selected, the average value of the indicator between all the climate models is used.
+"""
+def map_of_france_of_indicator_for_given_year_informations(
+        indicator: str,
+        params: dict[str, str]
+) -> str:
+    unit = DRIAS_INDICATOR_TO_UNIT[indicator]
+    year = params["year"]
+    if year is None:
+        year = 2030
+    return f"""
+This plot displays a choropleth map showing the spatial distribution of **{indicator}** across all regions of France for the year **{year}**.
+Each region is colored according to the value of the indicator ({unit}), allowing you to visually compare how {indicator} varies geographically within France for the selected year and climate model.
+**Data source:**
+- The data come from the DRIAS TRACC data. The data were initially extracted from [the DRIAS website](https://www.drias-climat.fr/drias_prod/accueil/okapiWebDrias/index.jsp?iddrias=climat) and then preprocessed to a tabular format and uploaded as parquet in this [Hugging Face dataset](https://huggingface.co/datasets/timeki/drias_db).
+- For each region of France, the value of {indicator} in {year} and for the selected climate model is extracted and mapped to its geographic coordinates.
+- The regions correspond to 8 km squares centered on the grid points of the DRIAS dataset.
+- The indicator values shown are those for the selected climate model.
+- If ALL climate model is selected, the average value of the indicator between all the climate models is used.
+"""

climateqa/engine/talk_to_data/{plot.py → drias/plots.py} RENAMED Viewed

@@ -1,38 +1,39 @@
-from typing import Callable, TypedDict
-from matplotlib.figure import figaspect
 import pandas as pd
 from plotly.graph_objects import Figure
 import plotly.graph_objects as go
-import plotly.express as px
-from climateqa.engine.talk_to_data.sql_query import (
     indicator_for_given_year_query,
     indicator_per_year_at_location_query,
 )
-from climateqa.engine.talk_to_data.config import INDICATOR_TO_UNIT
-class Plot(TypedDict):
-    """Represents a plot configuration in the DRIAS system.
-    This class defines the structure for configuring different types of plots
-    that can be generated from climate data.
-    Attributes:
-        name (str): The name of the plot type
-        description (str): A description of what the plot shows
-        params (list[str]): List of required parameters for the plot
-        plot_function (Callable[..., Callable[..., Figure]]): Function to generate the plot
-        sql_query (Callable[..., str]): Function to generate the SQL query for the plot
-    """
-    name: str
-    description: str
-    params: list[str]
-    plot_function: Callable[..., Callable[..., Figure]]
-    sql_query: Callable[..., str]
 def plot_indicator_evolution_at_location(params: dict) -> Callable[..., Figure]:
     """Generates a function to plot indicator evolution over time at a location.
@@ -61,7 +62,7 @@ def plot_indicator_evolution_at_location(params: dict) -> Callable[..., Figure]:
     indicator = params["indicator_column"]
     location = params["location"]
     indicator_label = " ".join([word.capitalize() for word in indicator.split("_")])
-    unit = INDICATOR_TO_UNIT.get(indicator, "")
     def plot_data(df: pd.DataFrame) -> Figure:
         """Generates the actual plot from the data.
@@ -145,10 +146,11 @@ def plot_indicator_evolution_at_location(params: dict) -> Callable[..., Figure]:
             hovertemplate=f"{indicator_label}: %{{y:.2f}} {unit}<br>Year: %{{x}}<extra></extra>"
         )
         fig.update_layout(
-            title=f"Plot of {indicator_label} in {location} ({model_label})",
             xaxis_title="Year",
             yaxis_title=f"{indicator_label} ({unit})",
             template="plotly_white",
         )
         return fig
@@ -161,6 +163,8 @@ indicator_evolution_at_location: Plot = {
     "params": ["indicator_column", "location", "model"],
     "plot_function": plot_indicator_evolution_at_location,
     "sql_query": indicator_per_year_at_location_query,
 }
@@ -184,7 +188,7 @@ def plot_indicator_number_of_days_per_year_at_location(
     indicator = params["indicator_column"]
     location = params["location"]
     indicator_label = " ".join([word.capitalize() for word in indicator.split("_")])
-    unit = INDICATOR_TO_UNIT.get(indicator, "")
     def plot_data(df: pd.DataFrame) -> Figure:
         """Generate the figure thanks to the dataframe
@@ -229,6 +233,7 @@ def plot_indicator_number_of_days_per_year_at_location(
             yaxis_title=f"{indicator_label} ({unit})",
             yaxis=dict(range=[0, max(indicators)]),
             bargap=0.5,
             template="plotly_white",
         )
@@ -243,6 +248,8 @@ indicator_number_of_days_per_year_at_location: Plot = {
     "params": ["indicator_column", "location", "model"],
     "plot_function": plot_indicator_number_of_days_per_year_at_location,
     "sql_query": indicator_per_year_at_location_query,
 }
@@ -265,8 +272,10 @@ def plot_distribution_of_indicator_for_given_year(
     """
     indicator = params["indicator_column"]
     year = params["year"]
     indicator_label = " ".join([word.capitalize() for word in indicator.split("_")])
-    unit = INDICATOR_TO_UNIT.get(indicator, "")
     def plot_data(df: pd.DataFrame) -> Figure:
         """Generate the figure thanks to the dataframe
@@ -311,6 +320,7 @@ def plot_distribution_of_indicator_for_given_year(
             yaxis_title="Frequency (%)",
             plot_bgcolor="rgba(0, 0, 0, 0)",
             showlegend=False,
         )
         return fig
@@ -324,6 +334,8 @@ distribution_of_indicator_for_given_year: Plot = {
     "params": ["indicator_column", "model", "year"],
     "plot_function": plot_distribution_of_indicator_for_given_year,
     "sql_query": indicator_for_given_year_query,
 }
@@ -346,8 +358,10 @@ def plot_map_of_france_of_indicator_for_given_year(
     """
     indicator = params["indicator_column"]
     year = params["year"]
     indicator_label = " ".join([word.capitalize() for word in indicator.split("_")])
-    unit = INDICATOR_TO_UNIT.get(indicator, "")
     def plot_data(df: pd.DataFrame) -> Figure:
         fig = go.Figure()
@@ -371,27 +385,28 @@ def plot_map_of_france_of_indicator_for_given_year(
             model_label = f"Model : {df['model'].unique()[0]}"
-        fig.add_trace(
-            go.Scattermapbox(
-                lat=latitudes,
-                lon=longitudes,
-                mode="markers",
-                marker=dict(
-                    size=10,
-                    color=indicators,  # Color mapped to values
-                    colorscale="Turbo",  # Color scale (can be 'Plasma', 'Jet', etc.)
-                    cmin=min(indicators),  # Minimum color range
-                    cmax=max(indicators),  # Maximum color range
-                    showscale=True,  # Show colorbar
-                ),
-                text=[f"{indicator_label}: {value:.2f} {unit}" for value in indicators],  # Add hover text showing the indicator value
-                hoverinfo="text"  # Only show the custom text on hover
-            )
-        )
         fig.update_layout(
             mapbox_style="open-street-map",  # Use OpenStreetMap
-            mapbox_zoom=3,
             mapbox_center={"lat": 46.6, "lon": 2.0},
             coloraxis_colorbar=dict(title=f"{indicator_label} ({unit})"),  # Add legend
             title=f"{indicator_label} in {year} in France ({model_label}) " # Title
@@ -403,16 +418,17 @@ def plot_map_of_france_of_indicator_for_given_year(
 map_of_france_of_indicator_for_given_year: Plot = {
     "name": "Map of France of an indicator for a given year",
-    "description": "Heatmap on the map of France of the values of an in indicator for a given year",
     "params": ["indicator_column", "year", "model"],
     "plot_function": plot_map_of_france_of_indicator_for_given_year,
     "sql_query": indicator_for_given_year_query,
 }
-PLOTS = [
     indicator_evolution_at_location,
     indicator_number_of_days_per_year_at_location,
     distribution_of_indicator_for_given_year,
     map_of_france_of_indicator_for_given_year,
-]

+import os
+import geojson
+from math import cos, radians
+from typing import Callable
 import pandas as pd
 from plotly.graph_objects import Figure
 import plotly.graph_objects as go
+from climateqa.engine.talk_to_data.drias.plot_informations import distribution_of_indicator_for_given_year_informations, indicator_evolution_informations, indicator_number_of_days_per_year_informations, map_of_france_of_indicator_for_given_year_informations
+from climateqa.engine.talk_to_data.objects.plot import Plot
+from climateqa.engine.talk_to_data.drias.queries import (
     indicator_for_given_year_query,
     indicator_per_year_at_location_query,
 )
+from climateqa.engine.talk_to_data.drias.config import DRIAS_INDICATOR_TO_COLORSCALE, DRIAS_INDICATOR_TO_UNIT
+def generate_geojson_polygons(latitudes: list[float], longitudes: list[float], indicators: list[float]) -> geojson.FeatureCollection:
+    side_km = 8
+    delta_lat = side_km / 111
+    features = []
+    for idx, (lat, lon, val) in enumerate(zip(latitudes, longitudes, indicators)):
+        delta_lon  = side_km / (111 * cos(radians(lat)))
+        half_lat = delta_lat / 2
+        half_lon = delta_lon / 2
+        features.append(geojson.Feature(
+                geometry=geojson.Polygon([[
+                    [lon - half_lon, lat - half_lat],
+                    [lon + half_lon, lat - half_lat],
+                    [lon + half_lon, lat + half_lat],
+                    [lon - half_lon, lat + half_lat],
+                    [lon - half_lon, lat - half_lat]
+                ]]),
+                properties={"value": val},
+                id=str(idx)
+            ))
+    return geojson.FeatureCollection(features)
 def plot_indicator_evolution_at_location(params: dict) -> Callable[..., Figure]:
     """Generates a function to plot indicator evolution over time at a location.
     indicator = params["indicator_column"]
     location = params["location"]
     indicator_label = " ".join([word.capitalize() for word in indicator.split("_")])
+    unit = DRIAS_INDICATOR_TO_UNIT.get(indicator, "")
     def plot_data(df: pd.DataFrame) -> Figure:
         """Generates the actual plot from the data.
             hovertemplate=f"{indicator_label}: %{{y:.2f}} {unit}<br>Year: %{{x}}<extra></extra>"
         )
         fig.update_layout(
+            title=f"Evolution of {indicator_label} in {location} ({model_label})",
             xaxis_title="Year",
             yaxis_title=f"{indicator_label} ({unit})",
             template="plotly_white",
+            height=900,
         )
         return fig
     "params": ["indicator_column", "location", "model"],
     "plot_function": plot_indicator_evolution_at_location,
     "sql_query": indicator_per_year_at_location_query,
+    "plot_information": indicator_evolution_informations,
+    'short_name': 'Evolution'
 }
     indicator = params["indicator_column"]
     location = params["location"]
     indicator_label = " ".join([word.capitalize() for word in indicator.split("_")])
+    unit = DRIAS_INDICATOR_TO_UNIT.get(indicator, "")
     def plot_data(df: pd.DataFrame) -> Figure:
         """Generate the figure thanks to the dataframe
             yaxis_title=f"{indicator_label} ({unit})",
             yaxis=dict(range=[0, max(indicators)]),
             bargap=0.5,
+            height=900,
             template="plotly_white",
         )
     "params": ["indicator_column", "location", "model"],
     "plot_function": plot_indicator_number_of_days_per_year_at_location,
     "sql_query": indicator_per_year_at_location_query,
+    "plot_information": indicator_number_of_days_per_year_informations,
+    "short_name": "Yearly Frequency",
 }
     """
     indicator = params["indicator_column"]
     year = params["year"]
+    if year is None:
+        year = 2030
     indicator_label = " ".join([word.capitalize() for word in indicator.split("_")])
+    unit = DRIAS_INDICATOR_TO_UNIT.get(indicator, "")
     def plot_data(df: pd.DataFrame) -> Figure:
         """Generate the figure thanks to the dataframe
             yaxis_title="Frequency (%)",
             plot_bgcolor="rgba(0, 0, 0, 0)",
             showlegend=False,
+            height=900,
         )
         return fig
     "params": ["indicator_column", "model", "year"],
     "plot_function": plot_distribution_of_indicator_for_given_year,
     "sql_query": indicator_for_given_year_query,
+    "plot_information": distribution_of_indicator_for_given_year_informations,
+    'short_name': 'Distribution'
 }
     """
     indicator = params["indicator_column"]
     year = params["year"]
+    if year is None:
+        year = 2030
     indicator_label = " ".join([word.capitalize() for word in indicator.split("_")])
+    unit = DRIAS_INDICATOR_TO_UNIT.get(indicator, "")
     def plot_data(df: pd.DataFrame) -> Figure:
         fig = go.Figure()
             model_label = f"Model : {df['model'].unique()[0]}"
+        geojson_data = generate_geojson_polygons(latitudes, longitudes, indicators)
+        fig = go.Figure(go.Choroplethmapbox(
+            geojson=geojson_data,
+            locations=[str(i) for i in range(len(indicators))],
+            featureidkey="id",
+            z=indicators,
+            colorscale=DRIAS_INDICATOR_TO_COLORSCALE[indicator],
+            zmin=min(indicators),
+            zmax=max(indicators),
+            marker_opacity=0.7,
+            marker_line_width=0,
+            colorbar_title=f"{indicator_label} ({unit})",
+            text=[f"{indicator_label}: {value:.2f} {unit}" for value in indicators],  # Add hover text showing the indicator value
+            hoverinfo="text"
+        ))
         fig.update_layout(
             mapbox_style="open-street-map",  # Use OpenStreetMap
+            mapbox_zoom=5,
+            height=900,
             mapbox_center={"lat": 46.6, "lon": 2.0},
             coloraxis_colorbar=dict(title=f"{indicator_label} ({unit})"),  # Add legend
             title=f"{indicator_label} in {year} in France ({model_label}) " # Title
 map_of_france_of_indicator_for_given_year: Plot = {
     "name": "Map of France of an indicator for a given year",
+    "description": "Heatmap on the map of France of the values of an indicator for a given year",
     "params": ["indicator_column", "year", "model"],
     "plot_function": plot_map_of_france_of_indicator_for_given_year,
     "sql_query": indicator_for_given_year_query,
+    "plot_information": map_of_france_of_indicator_for_given_year_informations,
+    'short_name': 'Map of France'
 }
+DRIAS_PLOTS = [
     indicator_evolution_at_location,
     indicator_number_of_days_per_year_at_location,
     distribution_of_indicator_for_given_year,
     map_of_france_of_indicator_for_given_year,
+]

climateqa/engine/talk_to_data/{sql_query.py → drias/queries.py} RENAMED Viewed

@@ -1,37 +1,5 @@
-import asyncio
-from concurrent.futures import ThreadPoolExecutor
 from typing import TypedDict
-import duckdb
-import pandas as pd
-async def execute_sql_query(sql_query: str) -> pd.DataFrame:
-    """Executes a SQL query on the DRIAS database and returns the results.
-    This function connects to the DuckDB database containing DRIAS climate data
-    and executes the provided SQL query. It handles the database connection and
-    returns the results as a pandas DataFrame.
-    Args:
-        sql_query (str): The SQL query to execute
-    Returns:
-        pd.DataFrame: A DataFrame containing the query results
-    Raises:
-        duckdb.Error: If there is an error executing the SQL query
-    """
-    def _execute_query():
-        # Execute the query
-        con = duckdb.connect()
-        results = con.sql(sql_query).fetchdf()
-        # return fetched data
-        return results
-    # Run the query in a thread pool to avoid blocking
-    loop = asyncio.get_event_loop()
-    with ThreadPoolExecutor() as executor:
-        return await loop.run_in_executor(executor, _execute_query)
 class IndicatorPerYearAtLocationQueryParams(TypedDict, total=False):
     """Parameters for querying an indicator's values over time at a location.
@@ -50,7 +18,6 @@ class IndicatorPerYearAtLocationQueryParams(TypedDict, total=False):
     longitude: str
     model: str
 def indicator_per_year_at_location_query(
     table: str, params: IndicatorPerYearAtLocationQueryParams
 ) -> str:
@@ -70,7 +37,7 @@ def indicator_per_year_at_location_query(
     if indicator_column is None or latitude is None or longitude is None: # If one parameter is missing, returns an empty query
         return ""
-    table = f"'hf://datasets/timeki/drias_db/{table.lower()}.parquet'"
     sql_query = f"SELECT year, {indicator_column}, model\nFROM {table}\nWHERE latitude = {latitude} \nAnd longitude = {longitude} \nOrder by Year"
@@ -105,10 +72,12 @@ def indicator_for_given_year_query(
     """
     indicator_column = params.get("indicator_column")
     year = params.get('year')
     if year is None or indicator_column is None: # If one parameter is missing, returns an empty query
         return ""
-    table = f"'hf://datasets/timeki/drias_db/{table.lower()}.parquet'"
     sql_query = f"Select {indicator_column}, latitude, longitude, model\nFrom {table}\nWhere year = {year}"
     return sql_query

 from typing import TypedDict
+from climateqa.engine.talk_to_data.config import DRIAS_DATASET_URL
 class IndicatorPerYearAtLocationQueryParams(TypedDict, total=False):
     """Parameters for querying an indicator's values over time at a location.
     longitude: str
     model: str
 def indicator_per_year_at_location_query(
     table: str, params: IndicatorPerYearAtLocationQueryParams
 ) -> str:
     if indicator_column is None or latitude is None or longitude is None: # If one parameter is missing, returns an empty query
         return ""
+    table = f"'{DRIAS_DATASET_URL}/{table.lower()}.parquet'"
     sql_query = f"SELECT year, {indicator_column}, model\nFROM {table}\nWHERE latitude = {latitude} \nAnd longitude = {longitude} \nOrder by Year"
     """
     indicator_column = params.get("indicator_column")
     year = params.get('year')
+    if year is None:
+        year = 2050
     if year is None or indicator_column is None: # If one parameter is missing, returns an empty query
         return ""
+    table = f"'{DRIAS_DATASET_URL}/{table.lower()}.parquet'"
     sql_query = f"Select {indicator_column}, latitude, longitude, model\nFrom {table}\nWhere year = {year}"
     return sql_query

climateqa/engine/talk_to_data/{utils.py → input_processing.py} RENAMED Viewed

@@ -1,15 +1,17 @@
-import re
-from typing import Annotated, TypedDict
-import duckdb
-from geopy.geocoders import Nominatim
 import ast
-from climateqa.engine.llm import get_llm
-from climateqa.engine.talk_to_data.config import DRIAS_TABLES
-from climateqa.engine.talk_to_data.plot import PLOTS, Plot
 from langchain_core.prompts import ChatPromptTemplate
-async def detect_location_with_openai(sentence):
     """
     Detects locations in a sentence using OpenAI's API via LangChain.
     """
@@ -29,63 +31,7 @@ async def detect_location_with_openai(sentence):
     else:
         return ""
-class ArrayOutput(TypedDict):
-    """Represents the output of a function that returns an array.
-    This class is used to type-hint functions that return arrays,
-    ensuring consistent return types across the codebase.
-    Attributes:
-        array (str): A syntactically valid Python array string
-    """
-    array: Annotated[str, "Syntactically valid python array."]
-async def detect_year_with_openai(sentence: str) -> str:
-    """
-    Detects years in a sentence using OpenAI's API via LangChain.
-    """
-    llm = get_llm()
-    prompt = """
-    Extract all years mentioned in the following sentence.
-    Return the result as a Python list. If no year are mentioned, return an empty list.
-    Sentence: "{sentence}"
-    """
-    prompt = ChatPromptTemplate.from_template(prompt)
-    structured_llm = llm.with_structured_output(ArrayOutput)
-    chain = prompt | structured_llm
-    response: ArrayOutput = await chain.ainvoke({"sentence": sentence})
-    years_list = eval(response['array'])
-    if len(years_list) > 0:
-        return years_list[0]
-    else:
-        return ""
-def detectTable(sql_query: str) -> list[str]:
-    """Extracts table names from a SQL query.
-    This function uses regular expressions to find all table names
-    referenced in a SQL query's FROM clause.
-    Args:
-        sql_query (str): The SQL query to analyze
-    Returns:
-        list[str]: A list of table names found in the query
-    Example:
-        >>> detectTable("SELECT * FROM temperature_data WHERE year > 2000")
-        ['temperature_data']
-    """
-    pattern = r'(?i)\bFROM\s+((?:`[^`]+`|"[^"]+"|\'[^\']+\'|\w+)(?:\.(?:`[^`]+`|"[^"]+"|\'[^\']+\'|\w+))*)'
-    matches = re.findall(pattern, sql_query)
-    return matches
-def loc2coords(location: str) -> tuple[float, float]:
     """Converts a location name to geographic coordinates.
     This function uses the Nominatim geocoding service to convert
@@ -104,49 +50,77 @@ def loc2coords(location: str) -> tuple[float, float]:
     coords = geolocator.geocode(location)
     return (coords.latitude, coords.longitude)
-def coords2loc(coords: tuple[float, float]) -> str:
-    """Converts geographic coordinates to a location name.
     This function uses the Nominatim reverse geocoding service to convert
-    latitude and longitude coordinates to a human-readable location name.
     Args:
         coords (tuple[float, float]): A tuple containing (latitude, longitude)
     Returns:
-        str: The address of the location, or "Unknown Location" if not found
-    Example:
-        >>> coords2loc((48.8566, 2.3522))
-        'Paris, France'
     """
-    geolocator = Nominatim(user_agent="coords_to_city")
-    try:
-        location = geolocator.reverse(coords)
-        return location.address
-    except Exception as e:
-        print(f"Error: {e}")
-        return "Unknown Location"
-def nearestNeighbourSQL(location: tuple, table: str) -> tuple[str, str]:
     long = round(location[1], 3)
     lat = round(location[0], 3)
-    table = f"'hf://datasets/timeki/drias_db/{table.lower()}.parquet'"
-    results = duckdb.sql(
-        f"SELECT latitude, longitude FROM {table} WHERE latitude BETWEEN {lat - 0.3} AND {lat + 0.3} AND longitude BETWEEN {long - 0.3} AND {long + 0.3}"
-    ).fetchdf()
     if len(results) == 0:
-        return "", ""
-    # cursor.execute(f"SELECT latitude, longitude FROM {table} WHERE latitude BETWEEN {lat - 0.3} AND {lat + 0.3} AND longitude BETWEEN {long - 0.3} AND {long + 0.3}")
-    return results['latitude'].iloc[0], results['longitude'].iloc[0]
-async def detect_relevant_tables(user_question: str, plot: Plot, llm) -> list[str]:
     """Identifies relevant tables for a plot based on user input.
     This function uses an LLM to analyze the user's question and the plot
@@ -170,7 +144,6 @@ async def detect_relevant_tables(user_question: str, plot: Plot, llm) -> list[st
         ['mean_annual_temperature', 'mean_summer_temperature']
     """
     # Get all table names
-    table_names_list = DRIAS_TABLES
     prompt = (
         f"You are helping to build a plot following this description : {plot['description']}."
@@ -187,95 +160,98 @@ async def detect_relevant_tables(user_question: str, plot: Plot, llm) -> list[st
     )
     return table_names
-def replace_coordonates(coords, query, coords_tables):
-    n = query.count(str(coords[0]))
-    for i in range(n):
-        query = query.replace(str(coords[0]), str(coords_tables[i][0]), 1)
-        query = query.replace(str(coords[1]), str(coords_tables[i][1]), 1)
-    return query
-async def detect_relevant_plots(user_question: str, llm):
     plots_description = ""
-    for plot in PLOTS:
         plots_description += "Name: " + plot["name"]
         plots_description += " - Description: " + plot["description"] + "\n"
     prompt = (
-        f"You are helping to answer a quesiton with insightful visualizations."
-        f"You are given an user question and a list of plots with their name and description."
-        f"Based on the descriptions of the plots, which plot is appropriate to answer to this question."
-        f"Write the most relevant tables to use. Answer only a python list of plot name."
         f"### Descriptions of the plots : {plots_description}"
-        f"### User question : {user_question}"
-        f"### Name of the plot : "
     )
-    # prompt = (
-    #     f"You are helping to answer a question with insightful visualizations. "
-    #     f"Given a list of plots with their name and description: "
-    #     f"{plots_description} "
-    #     f"The user question is: {user_question}. "
-    #     f"Choose the most relevant plots to answer the question. "
-    #     f"The answer must be a Python list with the names of the relevant plots, and nothing else. "
-    #     f"Ensure the response is in the exact format: ['PlotName1', 'PlotName2']."
-    # )
     plot_names = ast.literal_eval(
         (await llm.ainvoke(prompt)).content.strip("```python\n").strip()
     )
     return plot_names
-# Next Version
-# class QueryOutput(TypedDict):
-#     """Generated SQL query."""
-#     query: Annotated[str, ..., "Syntactically valid SQL query."]
-# class PlotlyCodeOutput(TypedDict):
-#     """Generated Plotly code"""
-#     code: Annotated[str, ..., "Synatically valid Plotly python code."]
-# def write_sql_query(user_input: str, db: SQLDatabase, relevant_tables: list[str], llm):
-#     """Generate SQL query to fetch information."""
-#     prompt_params = {
-#         "dialect": db.dialect,
-#         "table_info": db.get_table_info(),
-#         "input": user_input,
-#         "relevant_tables": relevant_tables,
-#         "model": "ALADIN63_CNRM-CM5",
-#     }
-#     prompt = ChatPromptTemplate.from_template(query_prompt_template)
-#     structured_llm = llm.with_structured_output(QueryOutput)
-#     chain = prompt | structured_llm
-#     result = chain.invoke(prompt_params)
-#     return result["query"]
-# def fetch_data_from_sql_query(db: str, sql_query: str):
-#     conn = sqlite3.connect(db)
-#     cursor = conn.cursor()
-#     cursor.execute(sql_query)
-#     column_names = [desc[0] for desc in cursor.description]
-#     values = cursor.fetchall()
-#     return {"column_names": column_names, "data": values}
-# def generate_chart_code(user_input: str, sql_query: list[str], llm):
-#     """ "Generate plotly python code for the chart based on the sql query and the user question"""
-#     class PlotlyCodeOutput(TypedDict):
-#         """Generated Plotly code"""
-#         code: Annotated[str, ..., "Synatically valid Plotly python code."]
-#     prompt = ChatPromptTemplate.from_template(plot_prompt_template)
-#     structured_llm = llm.with_structured_output(PlotlyCodeOutput)
-#     chain = prompt | structured_llm
-#     result = chain.invoke({"input": user_input, "sql_query": sql_query})
-#     return result["code"]

+from typing import Any, Literal, Optional, cast
 import ast
 from langchain_core.prompts import ChatPromptTemplate
+from geopy.geocoders import Nominatim
+from climateqa.engine.llm import get_llm
+import duckdb
+import os
+from climateqa.engine.talk_to_data.config import DRIAS_MEAN_ANNUAL_TEMPERATURE_PATH, IPCC_COORDINATES_PATH
+from climateqa.engine.talk_to_data.objects.llm_outputs import ArrayOutput
+from climateqa.engine.talk_to_data.objects.location import Location
+from climateqa.engine.talk_to_data.objects.plot import Plot
+from climateqa.engine.talk_to_data.objects.states import State
+async def detect_location_with_openai(sentence: str) -> str:
     """
     Detects locations in a sentence using OpenAI's API via LangChain.
     """
     else:
         return ""
+def loc_to_coords(location: str) -> tuple[float, float]:
     """Converts a location name to geographic coordinates.
     This function uses the Nominatim geocoding service to convert
     coords = geolocator.geocode(location)
     return (coords.latitude, coords.longitude)
+def coords_to_country(coords: tuple[float, float]) -> tuple[str,str]:
+    """Converts geographic coordinates to a country name.
     This function uses the Nominatim reverse geocoding service to convert
+    latitude and longitude coordinates to a country name.
     Args:
         coords (tuple[float, float]): A tuple containing (latitude, longitude)
     Returns:
+        tuple[str,str]: A tuple containg (country_code, country_name, admin1)
+    Raises:
+        AttributeError: If the coordinates cannot be found
     """
+    geolocator = Nominatim(user_agent="latlong_to_country")
+    location = geolocator.reverse(coords)
+    address = location.raw['address']
+    return address['country_code'].upper(), address['country']
+def nearest_neighbour_sql(location: tuple, mode: Literal['DRIAS', 'IPCC']) -> tuple[str, str, Optional[str]]:
     long = round(location[1], 3)
     lat = round(location[0], 3)
+    conn = duckdb.connect()
+    if mode == 'DRIAS':
+        table_path = f"'{DRIAS_MEAN_ANNUAL_TEMPERATURE_PATH}'"
+        results = conn.sql(
+            f"SELECT latitude, longitude FROM {table_path} WHERE latitude BETWEEN {lat - 0.3} AND {lat + 0.3} AND longitude BETWEEN {long - 0.3} AND {long + 0.3}"
+        ).fetchdf()
+    else:
+        table_path = f"'{IPCC_COORDINATES_PATH}'"
+        results = conn.sql(
+            f"SELECT latitude, longitude, admin1 FROM {table_path} WHERE latitude BETWEEN {lat - 0.5} AND {lat + 0.5} AND longitude BETWEEN {long - 0.5} AND {long + 0.5}"
+        ).fetchdf()
     if len(results) == 0:
+        return "", "", ""
+    if 'admin1' in results.columns:
+        admin1 = results['admin1'].iloc[0]
+    else:
+        admin1 = None
+    return results['latitude'].iloc[0], results['longitude'].iloc[0], admin1
+async def detect_year_with_openai(sentence: str) -> str:
+    """
+    Detects years in a sentence using OpenAI's API via LangChain.
+    """
+    llm = get_llm()
+    prompt = """
+    Extract all years mentioned in the following sentence.
+    Return the result as a Python list. If no year are mentioned, return an empty list.
+    Sentence: "{sentence}"
+    """
+    prompt = ChatPromptTemplate.from_template(prompt)
+    structured_llm = llm.with_structured_output(ArrayOutput)
+    chain = prompt | structured_llm
+    response: ArrayOutput = await chain.ainvoke({"sentence": sentence})
+    years_list = ast.literal_eval(response['array'])
+    if len(years_list) > 0:
+        return years_list[0]
+    else:
+        return ""
+async def detect_relevant_tables(user_question: str, plot: Plot, llm, table_names_list: list[str]) -> list[str]:
     """Identifies relevant tables for a plot based on user input.
     This function uses an LLM to analyze the user's question and the plot
         ['mean_annual_temperature', 'mean_summer_temperature']
     """
     # Get all table names
     prompt = (
         f"You are helping to build a plot following this description : {plot['description']}."
     )
     return table_names
+async def detect_relevant_plots(user_question: str, llm, plot_list: list[Plot]) -> list[str]:
     plots_description = ""
+    for plot in plot_list:
         plots_description += "Name: " + plot["name"]
         plots_description += " - Description: " + plot["description"] + "\n"
     prompt = (
+        "You are helping to answer a question with insightful visualizations.\n"
+        "You are given a user question and a list of plots with their name and description.\n"
+        "Based on the descriptions of the plots, select ALL plots that could provide a useful answer to this question. "
+        "Include any plot that could show relevant information, even if their perspectives (such as time series or spatial distribution) are different.\n"
+        "For example, for a question like 'What will be the total rainfall in China in 2050?', both a time series plot and a spatial map plot could be relevant.\n"
+        "Return only a Python list of plot names sorted from the most relevant one to the less relevant one.\n"
         f"### Descriptions of the plots : {plots_description}"
+        f"### User question : {user_question}\n"
+        f"### Names of the plots : "
     )
     plot_names = ast.literal_eval(
         (await llm.ainvoke(prompt)).content.strip("```python\n").strip()
     )
     return plot_names
+async def find_location(user_input: str, mode: Literal['DRIAS', 'IPCC'] = 'DRIAS') -> Location:
+    print(f"---- Find location in user input ----")
+    location = await detect_location_with_openai(user_input)
+    output: Location = {
+        'location' : location,
+        'longitude' : None,
+        'latitude' : None,
+        'country_code' : None,
+        'country_name' : None,
+        'admin1' : None
+        }
+    if location:
+        coords = loc_to_coords(location)
+        country_code, country_name = coords_to_country(coords)
+        neighbour = nearest_neighbour_sql(coords, mode)
+        output.update({
+            "latitude": neighbour[0],
+            "longitude": neighbour[1],
+            "country_code": country_code,
+            "country_name": country_name,
+            "admin1": neighbour[2]
+        })
+    output = cast(Location, output)
+    return output
+async def find_year(user_input: str) -> str| None:
+    """Extracts year information from user input using LLM.
+    This function uses an LLM to identify and extract year information from the
+    user's query, which is used to filter data in subsequent queries.
+    Args:
+        user_input (str): The user's query text
+    Returns:
+        str: The extracted year, or empty string if no year found
+    """
+    print(f"---- Find year ---")
+    year = await detect_year_with_openai(user_input)
+    if year == "":
+        return None
+    return year
+async def find_relevant_plots(state: State, llm, plots: list[Plot]) -> list[str]:
+    print("---- Find relevant plots ----")
+    relevant_plots = await detect_relevant_plots(state['user_input'], llm, plots)
+    return relevant_plots
+async def find_relevant_tables_per_plot(state: State, plot: Plot, llm, tables: list[str]) -> list[str]:
+    print(f"---- Find relevant tables for {plot['name']} ----")
+    relevant_tables = await detect_relevant_tables(state['user_input'], plot, llm, tables)
+    return relevant_tables
+async def find_param(state: State, param_name:str, mode: Literal['DRIAS', 'IPCC'] = 'DRIAS') -> dict[str, Optional[str]] | Location | None:
+    """Perform the good method to retrieve the desired parameter
+    Args:
+        state (State): state of the workflow
+        param_name (str): name of the desired parameter
+        table (str): name of the table
+    Returns:
+        dict[str, Any] | None:
+    """
+    if param_name == 'location':
+        location = await find_location(state['user_input'], mode)
+        return location
+    if param_name == 'year':
+        year = await find_year(state['user_input'])
+        return {'year': year}
+    return None

climateqa/engine/talk_to_data/ipcc/config.py ADDED Viewed

	@@ -0,0 +1,98 @@

+from climateqa.engine.talk_to_data.ui_config import PRECIPITATION_COLORSCALE, TEMPERATURE_COLORSCALE
+from climateqa.engine.talk_to_data.config import IPCC_DATASET_URL
+# IPCC_DATASET_URL = "hf://datasets/ekimetrics/ipcc-atlas"
+IPCC_TABLES = [
+    "mean_temperature",
+    "total_precipitation",
+]
+IPCC_INDICATOR_COLUMNS_PER_TABLE = {
+    "mean_temperature": "mean_temperature",
+    "total_precipitation": "total_precipitation"
+}
+IPCC_INDICATOR_TO_UNIT = {
+    "mean_temperature": "°C",
+    "total_precipitation": "mm/day"
+}
+IPCC_SCENARIO = [
+    "historical",
+    "ssp126",
+    "ssp245",
+    "ssp370",
+    "ssp585",
+]
+IPCC_MODELS = []
+IPCC_PLOT_PARAMETERS = [
+    'year',
+    'location'
+]
+MACRO_COUNTRIES = ['JP',
+ 'IN',
+ 'MH',
+ 'PT',
+ 'ID',
+ 'SJ',
+ 'MX',
+ 'CN',
+ 'GL',
+ 'PN',
+ 'AR',
+ 'AQ',
+ 'PF',
+ 'BR',
+ 'SH',
+ 'GS',
+ 'ZA',
+ 'NZ',
+ 'TF',
+]
+HUGE_MACRO_COUNTRIES = ['CL',
+ 'CA',
+ 'AU',
+ 'US',
+ 'RU'
+]
+IPCC_INDICATOR_TO_COLORSCALE = {
+    "mean_temperature": TEMPERATURE_COLORSCALE,
+    "total_precipitation": PRECIPITATION_COLORSCALE
+}
+IPCC_UI_TEXT = """
+Hi, I'm **Talk to IPCC**, designed to answer your questions using [**IPCC - ATLAS**](https://interactive-atlas.ipcc.ch/regional-information#eyJ0eXBlIjoiQVRMQVMiLCJjb21tb25zIjp7ImxhdCI6OTc3MiwibG5nIjo0MDA2OTIsInpvb20iOjQsInByb2oiOiJFUFNHOjU0MDMwIiwibW9kZSI6ImNvbXBsZXRlX2F0bGFzIn0sInByaW1hcnkiOnsic2NlbmFyaW8iOiJzc3A1ODUiLCJwZXJpb2QiOiIyIiwic2Vhc29uIjoieWVhciIsImRhdGFzZXQiOiJDTUlQNiIsInZhcmlhYmxlIjoidGFzIiwidmFsdWVUeXBlIjoiQU5PTUFMWSIsImhhdGNoaW5nIjoiU0lNUExFIiwicmVnaW9uU2V0IjoiYXI2IiwiYmFzZWxpbmUiOiJwcmVJbmR1c3RyaWFsIiwicmVnaW9uc1NlbGVjdGVkIjpbXX0sInBsb3QiOnsiYWN0aXZlVGFiIjoicGx1bWUiLCJtYXNrIjoibm9uZSIsInNjYXR0ZXJZTWFnIjpudWxsLCJzY2F0dGVyWVZhciI6bnVsbCwic2hvd2luZyI6ZmFsc2V9fQ==) data.
+I'll answer by displaying a list of SQL queries, graphs and data most relevant to your question.
+You can ask me anything about these climate indicators: **temperature** or **precipitation**.
+You can specify **location** and/or **year**.
+By default, we take the **mediane of each climate model**.
+Current available charts :
+- Yearly evolution of an indicator at a specific location (historical + SSP Projections)
+- Yearly spatial distribution of an indicator in a specific country
+Current available indicators :
+- Mean temperature
+- Total precipitation
+For example, you can ask:
+- What will the temperature be like in Paris?
+- What will be the total rainfall in the USA in 2030?
+- How will the average temperature evolve in China ?
+⚠️ **Limitations**:
+- You can't ask anything that isn't related to **IPCC - ATLAS** data.
+- You can not ask about **several locations at the same time**.
+- If you specify a year **before 1850 or over 2100**, there will be **no data**.
+- You **cannot compare two models**.
+🛈 **Information**
+Please note that we **log your questions for meta-analysis purposes**, so avoid sharing any sensitive or personal information.
+"""

climateqa/engine/talk_to_data/ipcc/plot_informations.py ADDED Viewed

	@@ -0,0 +1,50 @@

+from climateqa.engine.talk_to_data.ipcc.config import IPCC_INDICATOR_TO_UNIT
+def indicator_evolution_informations(
+        indicator: str,
+        params: dict[str,str],
+) -> str:
+    if "location" not in params:
+        raise ValueError('"location" must be provided in params')
+    location = params["location"]
+    unit = IPCC_INDICATOR_TO_UNIT[indicator]
+    return f"""
+This plot shows how the climate indicator **{indicator}** evolves over time in **{location}**.
+It combines both historical (from 1950 to 2015) observations and future (from 2016 to 2100) projections for the different SSP climate scenarios (SSP126, SSP245, SSP370 and SSP585).
+The x-axis represents the years (from 1950 to 2100), and the y-axis shows the value of the {indicator} ({unit}).
+Each line corresponds to a different scenario, allowing you to compare how {indicator} might change under various future conditions.
+**Data source:**
+- The data comes from the CMIP6 IPCC ATLAS data. The data were initially extracted from [this referenced website](https://digital.csic.es/handle/10261/332744) and then preprocessed to a tabular format and uploaded as parquet in this [Hugging Face dataset](https://huggingface.co/datasets/Ekimetrics/ipcc-atlas).
+- The underlying data is retrieved by aggregating yearly values of {indicator} for the selected location, across all available scenarios. This means the system collects, for each year, the value of {indicator} in {location}, both for the historical period and for each scenario, to build the time series.
+- The coordinates used for {location} correspond to the closest available point in the IPCC database, which uses a regular grid with a spatial resolution of 1 degree.
+"""
+def choropleth_map_informations(
+        indicator: str,
+        params: dict[str, str],
+) -> str:
+    unit = IPCC_INDICATOR_TO_UNIT[indicator]
+    if "location" not in params:
+        raise ValueError('"location" must be provided in params')
+    location = params["location"]
+    country_name = params["country_name"]
+    year = params["year"]
+    if year is None:
+        year = 2050
+    return f"""
+This plot displays a choropleth map showing the spatial distribution of **{indicator}** across all regions of **{location}** country ({country_name}) for the year **{year}** and the chosen scenario.
+Each grid point is colored according to the value of the indicator ({unit}), allowing you to visually compare how {indicator} varies geographically within the country for the selected year and scenario.
+**Data source:**
+- The data come from the CMIP6 IPCC ATLAS data. The data were initially extracted from [this referenced website](https://digital.csic.es/handle/10261/332744) and then preprocessed to a tabular format and uploaded as parquet in this [Hugging Face dataset](https://huggingface.co/datasets/Ekimetrics/ipcc-atlas).
+- For each grid point of {location} country ({country_name}), the value of {indicator} in {year} and for the selected scenario is extracted and mapped to its geographic coordinates.
+- The grid points correspond to 1-degree squares centered on the grid points of the IPCC dataset. Each grid point has been mapped to a country using [**reverse_geocoder**](https://github.com/thampiman/reverse-geocoder).
+- The coordinates used for each region are those of the closest available grid point in the IPCC database, which uses a regular grid with a spatial resolution of 1 degree.
+"""

climateqa/engine/talk_to_data/ipcc/plots.py ADDED Viewed

	@@ -0,0 +1,189 @@

+from typing import Callable
+from plotly.graph_objects import Figure
+import plotly.graph_objects as go
+import pandas as pd
+import geojson
+from climateqa.engine.talk_to_data.ipcc.config import IPCC_INDICATOR_TO_COLORSCALE, IPCC_INDICATOR_TO_UNIT, IPCC_SCENARIO
+from climateqa.engine.talk_to_data.ipcc.plot_informations import choropleth_map_informations, indicator_evolution_informations
+from climateqa.engine.talk_to_data.ipcc.queries import indicator_for_given_year_query, indicator_per_year_at_location_query
+from climateqa.engine.talk_to_data.objects.plot import Plot
+def generate_geojson_polygons(latitudes: list[float], longitudes: list[float], indicators: list[float]) -> geojson.FeatureCollection:
+    features = [
+        geojson.Feature(
+            geometry=geojson.Polygon([[
+                [lon - 0.5, lat - 0.5],
+                [lon + 0.5, lat - 0.5],
+                [lon + 0.5, lat + 0.5],
+                [lon - 0.5, lat + 0.5],
+                [lon - 0.5, lat - 0.5]
+            ]]),
+            properties={"value": val},
+            id=str(idx)
+        )
+        for idx, (lat, lon, val) in enumerate(zip(latitudes, longitudes, indicators))
+    ]
+    geojson_data = geojson.FeatureCollection(features)
+    return geojson_data
+def plot_indicator_evolution_at_location_historical_and_projections(
+    params: dict,
+) -> Callable[[pd.DataFrame], Figure]:
+    """
+    Returns a function that generates a line plot showing the evolution of a climate indicator
+    (e.g., temperature, rainfall) over time at a specific location, including both historical data
+    and future projections for different climate scenarios.
+    Args:
+        params (dict): Dictionary with:
+            - indicator_column (str): Name of the climate indicator column to plot.
+            - location (str): Location (e.g., country, city) for which to plot the indicator.
+    Returns:
+        Callable[[pd.DataFrame], Figure]: Function that takes a DataFrame and returns a Plotly Figure
+        showing the indicator's evolution over time, with scenario lines and historical data.
+    """
+    indicator = params["indicator_column"]
+    location = params["location"]
+    indicator_label = " ".join(word.capitalize() for word in indicator.split("_"))
+    unit = IPCC_INDICATOR_TO_UNIT.get(indicator, "")
+    def plot_data(df: pd.DataFrame) -> Figure:
+        df = df.sort_values(by='year')
+        years = df['year'].astype(int).tolist()
+        indicators = df[indicator].astype(float).tolist()
+        scenarios = df['scenario'].astype(str).tolist()
+        # Find last historical value for continuity
+        last_historical = [(y, v) for y, v, s in zip(years, indicators, scenarios) if s == 'historical']
+        last_historical_year, last_historical_indicator = last_historical[-1] if last_historical else (None, None)
+        fig = go.Figure()
+        for scenario in IPCC_SCENARIO:
+            x = [y for y, s in zip(years, scenarios) if s == scenario]
+            y = [v for v, s in zip(indicators, scenarios) if s == scenario]
+            # Connect historical to scenario
+            if scenario != 'historical' and last_historical_indicator is not None:
+                x = [last_historical_year] + x
+                y = [last_historical_indicator] + y
+            fig.add_trace(go.Scatter(
+                x=x,
+                y=y,
+                mode='lines',
+                name=scenario
+            ))
+        fig.update_layout(
+            title=f'Yearly Evolution of {indicator_label} in {location} (Historical + SSP Scenarios)',
+            xaxis_title='Year',
+            yaxis_title=f'{indicator_label} ({unit})',
+            legend_title='Scenario',
+            height=800,
+        )
+        return fig
+    return plot_data
+indicator_evolution_at_location_historical_and_projections: Plot = {
+    "name": "Indicator Evolution at Location (Historical + Projections)",
+    "description": (
+        "Shows how a climate indicator (e.g., rainfall, temperature) changes over time at a specific location, "
+        "including historical data and future projections. "
+        "Useful for questions about the value or trend of an indicator at a location for any year, "
+        "such as 'What will be the total rainfall in China in 2050?' or 'How does rainfall evolve in China over time?'. "
+        "Parameters: indicator_column (the climate variable), location (e.g., country, city)."
+    ),
+    "params": ["indicator_column", "location"],
+    "plot_function": plot_indicator_evolution_at_location_historical_and_projections,
+    "sql_query": indicator_per_year_at_location_query,
+    "plot_information": indicator_evolution_informations,
+    "short_name": "Evolution"
+}
+def plot_choropleth_map_of_country_indicator_for_specific_year(
+    params: dict,
+) -> Callable[[pd.DataFrame], Figure]:
+    """
+    Returns a function that generates a choropleth map (heatmap) showing the spatial distribution
+    of a climate indicator (e.g., temperature, rainfall) across all regions of a country for a specific year.
+    Args:
+        params (dict): Dictionary with:
+            - indicator_column (str): Name of the climate indicator column to plot.
+            - year (str or int, optional): Year for which to plot the indicator (default: 2050).
+            - country_name (str): Name of the country.
+            - location (str): Location (country or region) for the map.
+    Returns:
+        Callable[[pd.DataFrame], Figure]: Function that takes a DataFrame and returns a Plotly Figure
+        showing the indicator's spatial distribution as a choropleth map for the specified year.
+    """
+    indicator = params["indicator_column"]
+    year = params.get('year')
+    if year is None:
+        year = 2050
+    country_name = params['country_name']
+    location = params['location']
+    indicator_label = " ".join(word.capitalize() for word in indicator.split("_"))
+    unit = IPCC_INDICATOR_TO_UNIT.get(indicator, "")
+    def plot_data(df: pd.DataFrame) -> Figure:
+        indicators = df[indicator].astype(float).tolist()
+        latitudes = df["latitude"].astype(float).tolist()
+        longitudes = df["longitude"].astype(float).tolist()
+        geojson_data = generate_geojson_polygons(latitudes, longitudes, indicators)
+        fig = go.Figure(go.Choroplethmapbox(
+            geojson=geojson_data,
+            locations=[str(i) for i in range(len(indicators))],
+            featureidkey="id",
+            z=indicators,
+            colorscale=IPCC_INDICATOR_TO_COLORSCALE[indicator],
+            zmin=min(indicators),
+            zmax=max(indicators),
+            marker_opacity=0.7,
+            marker_line_width=0,
+            colorbar_title=f"{indicator_label} ({unit})",
+            text=[f"{indicator_label}: {value:.2f} {unit}" for value in indicators],  # Add hover text showing the indicator value
+            hoverinfo="text"
+        ))
+        fig.update_layout(
+            mapbox_style="open-street-map",
+            mapbox_zoom=2,
+            height=800,
+            mapbox_center={
+                "lat": latitudes[len(latitudes)//2] if latitudes else 0,
+                "lon": longitudes[len(longitudes)//2] if longitudes else 0
+            },
+            coloraxis_colorbar=dict(title=f"{indicator_label} ({unit})"),
+            title=f"{indicator_label} in {year} in {location} ({country_name})"
+        )
+        return fig
+    return plot_data
+choropleth_map_of_country_indicator_for_specific_year: Plot = {
+    "name": "Choropleth Map of a Country's Indicator Distribution for a Specific Year",
+    "description": (
+        "Displays a map showing the spatial distribution of a climate indicator (e.g., rainfall, temperature) "
+        "across all regions of a country for a specific year. "
+        "Can answer questions about the value of an indicator in a country or region for a given year, "
+        "such as 'What will be the total rainfall in China in 2050?' or 'How is rainfall distributed across China in 2050?'. "
+        "Parameters: indicator_column (the climate variable), year, location (country name)."
+    ),
+    "params": ["indicator_column", "year", "location"],
+    "plot_function": plot_choropleth_map_of_country_indicator_for_specific_year,
+    "sql_query": indicator_for_given_year_query,
+    "plot_information": choropleth_map_informations,
+    "short_name": "Map",
+}
+IPCC_PLOTS = [
+    indicator_evolution_at_location_historical_and_projections,
+    choropleth_map_of_country_indicator_for_specific_year
+]

climateqa/engine/talk_to_data/ipcc/queries.py ADDED Viewed

	@@ -0,0 +1,143 @@

+from typing import TypedDict, Optional
+from climateqa.engine.talk_to_data.ipcc.config import HUGE_MACRO_COUNTRIES, MACRO_COUNTRIES
+from climateqa.engine.talk_to_data.config import IPCC_DATASET_URL
+class IndicatorPerYearAtLocationQueryParams(TypedDict, total=False):
+    """
+    Parameters for querying the evolution of an indicator per year at a specific location.
+    Attributes:
+        indicator_column (str): Name of the climate indicator column.
+        latitude (str): Latitude of the location.
+        longitude (str): Longitude of the location.
+        country_code (str): Country code.
+        admin1 (str): Administrative region (optional).
+    """
+    indicator_column: str
+    latitude: str
+    longitude: str
+    country_code: str
+    admin1: Optional[str]
+def indicator_per_year_at_location_query(
+    table: str, params: IndicatorPerYearAtLocationQueryParams
+) -> str:
+    """
+    Builds an SQL query to get the evolution of an indicator per year at a specific location.
+    Args:
+        table (str): SQL table of the indicator.
+        params (IndicatorPerYearAtLocationQueryParams): Dictionary with the required params for the query.
+    Returns:
+        str: The SQL query string, or an empty string if required parameters are missing.
+    """
+    indicator_column = params.get("indicator_column")
+    latitude = params.get("latitude")
+    longitude = params.get("longitude")
+    country_code = params.get("country_code")
+    admin1 = params.get("admin1")
+    if not all([indicator_column, latitude, longitude, country_code]):
+        return ""
+    if country_code in MACRO_COUNTRIES:
+        table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}_macro.parquet'"
+        sql_query = f"""
+        SELECT year, scenario, AVG({indicator_column}) as {indicator_column}
+        FROM {table_path}
+        WHERE latitude = {latitude} AND longitude = {longitude} AND year >= 1950
+        GROUP BY scenario, year
+        ORDER BY year, scenario
+        """
+    elif country_code in HUGE_MACRO_COUNTRIES:
+        table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}_macro.parquet'"
+        sql_query = f"""
+        SELECT year, scenario, {indicator_column},
+        FROM {table_path}
+        WHERE latitude = {latitude} AND longitude = {longitude} AND year >= 1950
+        ORDER year, scenario
+        """
+    else:
+        table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}.parquet'"
+        sql_query = f"""
+        WITH medians_per_month AS (
+            SELECT year, scenario, month, MEDIAN({indicator_column}) AS median_value
+            FROM {table_path}
+            WHERE latitude = {latitude} AND longitude = {longitude} AND year >= 1950
+            GROUP BY scenario, year, month
+        )
+        SELECT year, scenario, AVG(median_value) AS {indicator_column}
+        FROM medians_per_month
+        GROUP BY scenario, year
+        ORDER BY year, scenario
+        """
+    return sql_query.strip()
+class IndicatorForGivenYearQueryParams(TypedDict, total=False):
+    """
+    Parameters for querying an indicator's values across locations for a specific year.
+    Attributes:
+        indicator_column (str): The column name for the climate indicator.
+        year (str): The year to query.
+        country_code (str): The country code.
+    """
+    indicator_column: str
+    year: str
+    country_code: str
+def indicator_for_given_year_query(
+    table: str, params: IndicatorForGivenYearQueryParams
+) -> str:
+    """
+    Builds an SQL query to get the values of an indicator with their latitudes, longitudes,
+    and scenarios for a given year.
+    Args:
+        table (str): SQL table of the indicator.
+        params (IndicatorForGivenYearQueryParams): Dictionary with the required params for the query.
+    Returns:
+        str: The SQL query string, or an empty string if required parameters are missing.
+    """
+    indicator_column = params.get("indicator_column")
+    year = params.get("year") or 2050
+    country_code = params.get("country_code")
+    if not all([indicator_column, year, country_code]):
+        return ""
+    if country_code in MACRO_COUNTRIES:
+        table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}_macro.parquet'"
+        sql_query = f"""
+        SELECT latitude, longitude, scenario, AVG({indicator_column}) as {indicator_column}
+        FROM {table_path}
+        WHERE year = {year}
+        GROUP BY latitude, longitude, scenario
+        ORDER BY latitude, longitude, scenario
+        """
+    elif country_code in HUGE_MACRO_COUNTRIES:
+        table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}_macro.parquet'"
+        sql_query = f"""
+        SELECT latitude, longitude, scenario, {indicator_column},
+        FROM {table_path}
+        WHERE year = {year}
+        ORDER BY latitude, longitude, scenario
+        """
+    else:
+        table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}.parquet'"
+        sql_query = f"""
+        WITH medians_per_month AS (
+            SELECT latitude, longitude, scenario, month, MEDIAN({indicator_column}) AS median_value
+            FROM {table_path}
+            WHERE year = {year}
+            GROUP BY latitude, longitude, scenario, month
+        )
+        SELECT latitude, longitude, scenario, AVG(median_value) AS {indicator_column}
+        FROM medians_per_month
+        GROUP BY latitude, longitude, scenario
+        ORDER BY latitude, longitude, scenario
+        """
+    return sql_query.strip()

climateqa/engine/talk_to_data/main.py CHANGED Viewed

@@ -1,44 +1,70 @@
-from climateqa.engine.talk_to_data.talk_to_drias import drias_workflow
-from climateqa.engine.llm import get_llm
 from climateqa.logging import log_drias_interaction_to_huggingface
-import ast
-llm = get_llm(provider="openai")
-def ask_llm_to_add_table_names(sql_query: str, llm) -> str:
-    """Adds table names to the SQL query result rows using LLM.
-    This function modifies the SQL query to include the source table name in each row
-    of the result set, making it easier to track which data comes from which table.
     Args:
-        sql_query (str): The original SQL query to modify
-        llm: The language model instance to use for generating the modified query
     Returns:
-        str: The modified SQL query with table names included in the result rows
     """
-    sql_with_table_names = llm.invoke(f"Make the following sql query display the source table in the rows {sql_query}. Just answer the query. The answer should not include ```sql\n").content
-    return sql_with_table_names
-def ask_llm_column_names(sql_query: str, llm) -> list[str]:
-    """Extracts column names from a SQL query using LLM.
-    This function analyzes a SQL query to identify which columns are being selected
-    in the result set.
-    Args:
-        sql_query (str): The SQL query to analyze
-        llm: The language model instance to use for column extraction
-    Returns:
-        list[str]: A list of column names being selected in the query
-    """
-    columns = llm.invoke(f"From the given sql query, list the columns that are being selected. The answer should only be a python list. Just answer the list. The SQL query : {sql_query}").content
-    columns_list = ast.literal_eval(columns.strip("```python\n").strip())
-    return columns_list
-async def ask_drias(query: str, index_state: int = 0, user_id: str = None) -> tuple:
     """Main function to process a DRIAS query and return results.
     This function orchestrates the DRIAS workflow, processing a user query to generate
@@ -61,58 +87,38 @@ async def ask_drias(query: str, index_state: int = 0, user_id: str = None) -> tu
             - table_list (list): List of table names used
             - error (str): Error message if any
     """
-    final_state = await drias_workflow(query)
     sql_queries = []
     result_dataframes = []
     figures = []
-    table_list = []
-    for plot_state in final_state['plot_states'].values():
-        for table_state in plot_state['table_states'].values():
-            if table_state['status'] == 'OK':
-                if 'table_name' in table_state:
-                    table_list.append(' '.join(table_state['table_name'].capitalize().split('_')))
-                if 'sql_query' in table_state and table_state['sql_query'] is not None:
-                    sql_queries.append(table_state['sql_query'])
-                if 'dataframe' in table_state and table_state['dataframe'] is not None:
-                    result_dataframes.append(table_state['dataframe'])
-                    if 'figure' in table_state and table_state['figure'] is not None:
-                        figures.append(table_state['figure'])
     if "error" in final_state and final_state["error"] != "":
-        return None, None, None, [], [], [], 0, final_state["error"]
     sql_query = sql_queries[index_state]
     dataframe = result_dataframes[index_state]
     figure = figures[index_state](dataframe)
     log_drias_interaction_to_huggingface(query, sql_query, user_id)
-    return sql_query, dataframe, figure, sql_queries, result_dataframes, figures, index_state, table_list, ""
-# def ask_vanna(vn,db_vanna_path, query):
-#     try :
-#         location = detect_location_with_openai(query)
-#         if location:
-#             coords = loc2coords(location)
-#             user_input = query.lower().replace(location.lower(), f"lat, long : {coords}")
-#             relevant_tables = detect_relevant_tables(db_vanna_path, user_input, llm)
-#             coords_tables = [nearestNeighbourSQL(db_vanna_path, coords, relevant_tables[i]) for i in range(len(relevant_tables))]
-#             user_input_with_coords = replace_coordonates(coords, user_input, coords_tables)
-            # sql_query, result_dataframe, figure = vn.ask(user_input_with_coords, print_results=False, allow_llm_to_see_data=True, auto_train=False)
-#             return sql_query, result_dataframe, figure
-#         else :
-#             empty_df = pd.DataFrame()
-#             empty_fig = None
-#             return "", empty_df, empty_fig
-#     except Exception as e:
-#         print(f"Error: {e}")
-#         empty_df = pd.DataFrame()
-#         empty_fig = None
-#         return "", empty_df, empty_fig

+from climateqa.engine.talk_to_data.workflow.drias import drias_workflow
+from climateqa.engine.talk_to_data.workflow.ipcc import ipcc_workflow
 from climateqa.logging import log_drias_interaction_to_huggingface
+async def ask_drias(query: str, index_state: int = 0, user_id: str | None = None) -> tuple:
+    """Main function to process a DRIAS query and return results.
+    This function orchestrates the DRIAS workflow, processing a user query to generate
+    SQL queries, dataframes, and visualizations. It handles multiple results and allows
+    pagination through them.
     Args:
+        query (str): The user's question about climate data
+        index_state (int, optional): The index of the result to return. Defaults to 0.
     Returns:
+        tuple: A tuple containing:
+            - sql_query (str): The SQL query used
+            - dataframe (pd.DataFrame): The resulting data
+            - figure (Callable): Function to generate the visualization
+            - sql_queries (list): All generated SQL queries
+            - result_dataframes (list): All resulting dataframes
+            - figures (list): All figure generation functions
+            - index_state (int): Current result index
+            - table_list (list): List of table names used
+            - error (str): Error message if any
     """
+    final_state = await drias_workflow(query)
+    sql_queries = []
+    result_dataframes = []
+    figures = []
+    plot_title_list = []
+    plot_informations = []
+    for output_title, output in final_state['outputs'].items():
+        if output['status'] == 'OK':
+            if output['table'] is not None:
+                plot_title_list.append(output_title)
+            if output['plot_information'] is not None:
+                plot_informations.append(output['plot_information'])
+            if output['sql_query'] is not None:
+                sql_queries.append(output['sql_query'])
+            if output['dataframe'] is not None:
+                result_dataframes.append(output['dataframe'])
+                if output['figure'] is not None:
+                    figures.append(output['figure'])
+    if "error" in final_state and final_state["error"] != "":
+        # No Sql query, no dataframe, no figure, no plot information, empty sql queries list, empty result dataframes list, empty figures list, empty plot information list, index state = 0, empty table list, error message
+        return None, None, None, None, [], [], [], 0, [], final_state["error"]
+    sql_query = sql_queries[index_state]
+    dataframe = result_dataframes[index_state]
+    figure = figures[index_state](dataframe)
+    plot_information = plot_informations[index_state]
+    log_drias_interaction_to_huggingface(query, sql_query, user_id)
+    return sql_query, dataframe, figure, plot_information, sql_queries, result_dataframes, figures, plot_informations, index_state, plot_title_list, ""
+async def ask_ipcc(query: str, index_state: int = 0, user_id: str | None = None) -> tuple:
     """Main function to process a DRIAS query and return results.
     This function orchestrates the DRIAS workflow, processing a user query to generate
             - table_list (list): List of table names used
             - error (str): Error message if any
     """
+    final_state = await ipcc_workflow(query)
     sql_queries = []
     result_dataframes = []
     figures = []
+    plot_title_list = []
+    plot_informations = []
+    for output_title, output in final_state['outputs'].items():
+        if output['status'] == 'OK':
+            if output['table'] is not None:
+                plot_title_list.append(output_title)
+            if output['plot_information'] is not None:
+                plot_informations.append(output['plot_information'])
+            if output['sql_query'] is not None:
+                sql_queries.append(output['sql_query'])
+            if output['dataframe'] is not None:
+                result_dataframes.append(output['dataframe'])
+                if output['figure'] is not None:
+                    figures.append(output['figure'])
     if "error" in final_state and final_state["error"] != "":
+        # No Sql query, no dataframe, no figure, no plot information, empty sql queries list, empty result dataframes list, empty figures list, empty plot information list, index state = 0, empty table list, error message
+        return None, None, None, None, [], [], [], 0, [], final_state["error"]
     sql_query = sql_queries[index_state]
     dataframe = result_dataframes[index_state]
     figure = figures[index_state](dataframe)
+    plot_information = plot_informations[index_state]
     log_drias_interaction_to_huggingface(query, sql_query, user_id)
+    return sql_query, dataframe, figure, plot_information, sql_queries, result_dataframes, figures, plot_informations, index_state, plot_title_list, ""

climateqa/engine/talk_to_data/objects/llm_outputs.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from typing import Annotated, TypedDict
+class ArrayOutput(TypedDict):
+    """Represents the output of a function that returns an array.
+    This class is used to type-hint functions that return arrays,
+    ensuring consistent return types across the codebase.
+    Attributes:
+        array (str): A syntactically valid Python array string
+    """
+    array: Annotated[str, "Syntactically valid python array."]

climateqa/engine/talk_to_data/objects/location.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from token import OP
+from typing import Optional, TypedDict
+class Location(TypedDict):
+    location: str
+    latitude: Optional[str]
+    longitude: Optional[str]
+    country_code: Optional[str]
+    country_name: Optional[str]
+    admin1: Optional[str]

climateqa/engine/talk_to_data/objects/plot.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from typing import Callable, TypedDict, Optional
+from plotly.graph_objects import Figure
+class Plot(TypedDict):
+    """Represents a plot configuration in the DRIAS system.
+    This class defines the structure for configuring different types of plots
+    that can be generated from climate data.
+    Attributes:
+        name (str): The name of the plot type
+        description (str): A description of what the plot shows
+        params (list[str]): List of required parameters for the plot
+        plot_function (Callable[..., Callable[..., Figure]]): Function to generate the plot
+        sql_query (Callable[..., str]): Function to generate the SQL query for the plot
+    """
+    name: str
+    description: str
+    params: list[str]
+    plot_function: Callable[..., Callable[..., Figure]]
+    sql_query: Callable[..., str]
+    plot_information: Callable[..., str]
+    short_name: str

climateqa/engine/talk_to_data/objects/states.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from typing import Any, Callable, Optional, TypedDict
+from plotly.graph_objects import Figure
+import pandas as pd
+from climateqa.engine.talk_to_data.objects.plot import Plot
+class TTDOutput(TypedDict):
+    status: str
+    plot: Plot
+    table: str
+    sql_query: Optional[str]
+    dataframe: Optional[pd.DataFrame]
+    figure: Optional[Callable[..., Figure]]
+    plot_information: Optional[str]
+class State(TypedDict):
+    user_input: str
+    plots: list[str]
+    outputs: dict[str, TTDOutput]
+    error: Optional[str]

climateqa/engine/talk_to_data/prompt.py ADDED Viewed

	@@ -0,0 +1,44 @@

+query_prompt_template = """You are an expert SQL query generator. Given an input question, database schema, SQL dialect and relevant tables to answer the question, generate an optimized and syntactically correct SQL query which can provide useful insights to the question.
+### Instructions:
+1. **Use only relevant tables**: The following tables are relevant to answering the question: {relevant_tables}. Do not use any other tables.
+2. **Relevant columns only**: Never select `*`. Only include necessary columns based on the input question.
+3. **Schema Awareness**:
+   - Use only columns present in the given schema.
+   - **If a column name appears in multiple tables, always use the format `table_name.column_name` to avoid ambiguity.**
+   - Select only the column which are insightful for the question.
+4. **Dialect Compliance**: Follow `{dialect}` syntax rules.
+5. **Ordering**: Order the results by a relevant column if applicable (e.g., timestamp for recent records).
+6. **Valid query**: Make sure the query is syntactically and functionally correct.
+7. **Conditions** : For the common columns, the same condition should be applied to all the tables (e.g. latitude, longitude, model, year...)
+9. **Join tables** : If you need to join table, you should join them with year feature.
+10. **Model** : For each table, you need to add a condition on the model to be equal to {model}
+### Provided Database Schema:
+{table_info}
+### Relevant Tables:
+{relevant_tables}
+**Question:** {input}
+**SQL Query:**"""
+plot_prompt_template = """You are a data visualization expert. Given an input question and an SQL Query, generate an insightful plot according to the question.
+### Instructions
+1. **Use only the column names provided**. The data will be provided as a Pandas DataFrame `df` with the columns present in the SELECT.
+2. Generate the Python Plotly code to chart the results using `df` and the column names.
+3. Make as complete a graph as possible to answer the question, and make it as easy to understand as possible.
+4. **Response with only Python code**. Do not answer with any explanations -- just the code.
+5. **Specific cases** :
+- For a question about the evolution of something, it is also relevant to plot the data with also the sliding average for a period of 20 years for example.
+### SQL Query:
+{sql_query}
+**Question:** {input}
+**Python code:**
+"""

climateqa/engine/talk_to_data/query.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import asyncio
+from concurrent.futures import ThreadPoolExecutor
+import duckdb
+import pandas as pd
+import os
+def find_indicator_column(table: str, indicator_columns_per_table: dict[str,str]) -> str:
+    """Retrieves the name of the indicator column within a table.
+    This function maps table names to their corresponding indicator columns
+    using the predefined mapping in INDICATOR_COLUMNS_PER_TABLE.
+    Args:
+        table (str): Name of the table in the database
+    Returns:
+        str: Name of the indicator column for the specified table
+    Raises:
+        KeyError: If the table name is not found in the mapping
+    """
+    print(f"---- Find indicator column in table {table} ----")
+    return indicator_columns_per_table[table]
+async def execute_sql_query(sql_query: str) -> pd.DataFrame:
+    """Executes a SQL query on the DRIAS database and returns the results.
+    This function connects to the DuckDB database containing DRIAS climate data
+    and executes the provided SQL query. It handles the database connection and
+    returns the results as a pandas DataFrame.
+    Args:
+        sql_query (str): The SQL query to execute
+    Returns:
+        pd.DataFrame: A DataFrame containing the query results
+    Raises:
+        duckdb.Error: If there is an error executing the SQL query
+    """
+    def _execute_query():
+        # Execute the query
+        con = duckdb.connect()
+        HF_TOKEN = os.getenv("HF_TOKEN")
+        con.execute(f"""CREATE SECRET hf_token (
+            TYPE huggingface,
+            TOKEN '{HF_TOKEN}'
+        );""")
+        results = con.execute(sql_query).fetchdf()
+        # return fetched data
+        return results
+    # Run the query in a thread pool to avoid blocking
+    loop = asyncio.get_event_loop()
+    with ThreadPoolExecutor() as executor:
+        return await loop.run_in_executor(executor, _execute_query)

climateqa/engine/talk_to_data/talk_to_drias.py DELETED Viewed

@@ -1,317 +0,0 @@
-import os
-from typing import Any, Callable, TypedDict, Optional
-from numpy import sort
-import pandas as pd
-import asyncio
-from plotly.graph_objects import Figure
-from climateqa.engine.llm import get_llm
-from climateqa.engine.talk_to_data import sql_query
-from climateqa.engine.talk_to_data.config import INDICATOR_COLUMNS_PER_TABLE
-from climateqa.engine.talk_to_data.plot import PLOTS, Plot
-from climateqa.engine.talk_to_data.sql_query import execute_sql_query
-from climateqa.engine.talk_to_data.utils import (
-    detect_relevant_plots,
-    detect_year_with_openai,
-    loc2coords,
-    detect_location_with_openai,
-    nearestNeighbourSQL,
-    detect_relevant_tables,
-)
-ROOT_PATH = os.path.dirname(os.path.dirname(os.getcwd()))
-class TableState(TypedDict):
-    """Represents the state of a table in the DRIAS workflow.
-    This class defines the structure for tracking the state of a table during the
-    data processing workflow, including its name, parameters, SQL query, and results.
-    Attributes:
-        table_name (str): The name of the table in the database
-        params (dict[str, Any]): Parameters used for querying the table
-        sql_query (str, optional): The SQL query used to fetch data
-        dataframe (pd.DataFrame | None, optional): The resulting data
-        figure (Callable[..., Figure], optional): Function to generate visualization
-        status (str): The current status of the table processing ('OK' or 'ERROR')
-    """
-    table_name: str
-    params: dict[str, Any]
-    sql_query: Optional[str]
-    dataframe: Optional[pd.DataFrame | None]
-    figure: Optional[Callable[..., Figure]]
-    status: str
-class PlotState(TypedDict):
-    """Represents the state of a plot in the DRIAS workflow.
-    This class defines the structure for tracking the state of a plot during the
-    data processing workflow, including its name and associated tables.
-    Attributes:
-        plot_name (str): The name of the plot
-        tables (list[str]): List of tables used in the plot
-        table_states (dict[str, TableState]): States of the tables used in the plot
-    """
-    plot_name: str
-    tables: list[str]
-    table_states: dict[str, TableState]
-class State(TypedDict):
-    user_input: str
-    plots: list[str]
-    plot_states: dict[str, PlotState]
-    error: Optional[str]
-async def find_relevant_plots(state: State, llm) -> list[str]:
-    print("---- Find relevant plots ----")
-    relevant_plots = await detect_relevant_plots(state['user_input'], llm)
-    return relevant_plots
-async def find_relevant_tables_per_plot(state: State, plot: Plot, llm) -> list[str]:
-    print(f"---- Find relevant tables for {plot['name']} ----")
-    relevant_tables = await detect_relevant_tables(state['user_input'], plot, llm)
-    return relevant_tables
-async def find_param(state: State, param_name:str, table: str) -> dict[str, Any] | None:
-    """Perform the good method to retrieve the desired parameter
-    Args:
-        state (State): state of the workflow
-        param_name (str): name of the desired parameter
-        table (str): name of the table
-    Returns:
-        dict[str, Any] | None:
-    """
-    if param_name == 'location':
-        location = await find_location(state['user_input'], table)
-        return location
-    if param_name == 'year':
-        year = await find_year(state['user_input'])
-        return {'year': year}
-    return None
-class Location(TypedDict):
-    location: str
-    latitude: Optional[str]
-    longitude: Optional[str]
-async def find_location(user_input: str, table: str) -> Location:
-    print(f"---- Find location in table {table} ----")
-    location = await detect_location_with_openai(user_input)
-    output: Location = {'location' : location}
-    if location:
-        coords = loc2coords(location)
-        neighbour = nearestNeighbourSQL(coords, table)
-        output.update({
-            "latitude": neighbour[0],
-            "longitude": neighbour[1],
-        })
-    return output
-async def find_year(user_input: str) -> str:
-    """Extracts year information from user input using LLM.
-    This function uses an LLM to identify and extract year information from the
-    user's query, which is used to filter data in subsequent queries.
-    Args:
-        user_input (str): The user's query text
-    Returns:
-        str: The extracted year, or empty string if no year found
-    """
-    print(f"---- Find year ---")
-    year = await detect_year_with_openai(user_input)
-    return year
-def find_indicator_column(table: str) -> str:
-    """Retrieves the name of the indicator column within a table.
-    This function maps table names to their corresponding indicator columns
-    using the predefined mapping in INDICATOR_COLUMNS_PER_TABLE.
-    Args:
-        table (str): Name of the table in the database
-    Returns:
-        str: Name of the indicator column for the specified table
-    Raises:
-        KeyError: If the table name is not found in the mapping
-    """
-    print(f"---- Find indicator column in table {table} ----")
-    return INDICATOR_COLUMNS_PER_TABLE[table]
-async def process_table(
-    table: str,
-    params: dict[str, Any],
-    plot: Plot,
-) -> TableState:
-    """Processes a table to extract relevant data and generate visualizations.
-    This function retrieves the SQL query for the specified table, executes it,
-    and generates a visualization based on the results.
-    Args:
-        table (str): The name of the table to process
-        params (dict[str, Any]): Parameters used for querying the table
-        plot (Plot): The plot object containing SQL query and visualization function
-    Returns:
-        TableState: The state of the processed table
-    """
-    table_state: TableState = {
-        'table_name': table,
-        'params': params.copy(),
-        'status': 'OK',
-        'dataframe': None,
-        'sql_query': None,
-        'figure': None
-    }
-    table_state['params']['indicator_column'] = find_indicator_column(table)
-    sql_query = plot['sql_query'](table, table_state['params'])
-    if sql_query == "":
-        table_state['status'] = 'ERROR'
-        return table_state
-    table_state['sql_query'] = sql_query
-    df = await execute_sql_query(sql_query)
-    table_state['dataframe'] = df
-    table_state['figure'] = plot['plot_function'](table_state['params'])
-    return table_state
-async def drias_workflow(user_input: str) -> State:
-    """Performs the complete workflow of Talk To Drias : from user input to sql queries, dataframes and figures generated
-    Args:
-        user_input (str): initial user input
-    Returns:
-        State: Final state with all the results
-    """
-    state: State = {
-        'user_input': user_input,
-        'plots': [],
-        'plot_states': {},
-        'error': ''
-    }
-    llm = get_llm(provider="openai")
-    plots = await find_relevant_plots(state, llm)
-    state['plots'] = plots
-    if len(state['plots']) < 1:
-        state['error'] = 'There is no plot to answer to the question'
-        return state
-    have_relevant_table = False
-    have_sql_query = False
-    have_dataframe = False
-    for plot_name in state['plots']:
-        plot = next((p for p in PLOTS if p['name'] == plot_name), None) # Find the associated plot object
-        if plot is None:
-            continue
-        plot_state: PlotState = {
-            'plot_name': plot_name,
-            'tables': [],
-            'table_states': {}
-        }
-        plot_state['plot_name'] = plot_name
-        relevant_tables = await find_relevant_tables_per_plot(state, plot, llm)
-        if len(relevant_tables) > 0 :
-            have_relevant_table = True
-        plot_state['tables'] = relevant_tables
-        params = {}
-        for param_name in plot['params']:
-            param = await find_param(state, param_name, relevant_tables[0])
-            if param:
-                params.update(param)
-        tasks = [process_table(table, params, plot) for table in plot_state['tables'][:3]]
-        results = await asyncio.gather(*tasks)
-        # Store results back in plot_state
-        have_dataframe = False
-        have_sql_query = False
-        for table_state in results:
-            if table_state['sql_query']:
-                have_sql_query = True
-            if table_state['dataframe'] is not None and len(table_state['dataframe']) > 0:
-                have_dataframe = True
-            plot_state['table_states'][table_state['table_name']] = table_state
-        state['plot_states'][plot_name] = plot_state
-    if not have_relevant_table:
-        state['error'] = "There is no relevant table in our database to answer your question"
-    elif not have_sql_query:
-        state['error'] = "There is no relevant sql query on our database that can help to answer your question"
-    elif not have_dataframe:
-        state['error'] = "There is no data in our table that can answer to your question"
-    return state
-# def make_write_query_node():
-#     def write_query(state):
-#         print("---- Write query ----")
-#         for table in state["tables"]:
-#             sql_query = QUERIES[state[table]['query_type']](
-#                 table=table,
-#                 indicator_column=state[table]["columns"],
-#                 longitude=state[table]["longitude"],
-#                 latitude=state[table]["latitude"],
-#             )
-#             state[table].update({"sql_query": sql_query})
-#         return state
-#     return write_query
-# def make_fetch_data_node(db_path):
-#     def fetch_data(state):
-#         print("---- Fetch data ----")
-#         for table in state["tables"]:
-#             results = execute_sql_query(db_path, state[table]['sql_query'])
-#             state[table].update(results)
-#         return state
-#     return fetch_data
-## V2
-# def make_fetch_data_node(db_path: str, llm):
-#     def fetch_data(state):
-#         print("---- Fetch data ----")
-#         db = SQLDatabase.from_uri(f"sqlite:///{db_path}")
-#         output = {}
-#         sql_query = write_sql_query(state["query"], db, state["tables"], llm)
-#         # TO DO : Add query checker
-#         print(f"SQL query  : {sql_query}")
-#         output["sql_query"] = sql_query
-#         output.update(fetch_data_from_sql_query(db_path, sql_query))
-#         return output
-#     return fetch_data

climateqa/engine/talk_to_data/ui_config.py ADDED Viewed

	@@ -0,0 +1,27 @@

+TEMPERATURE_COLORSCALE = [
+    [0.0, "rgb(5, 48, 97)"],
+    [0.10, "rgb(33, 102, 172)"],
+    [0.20, "rgb(67, 147, 195)"],
+    [0.30, "rgb(146, 197, 222)"],
+    [0.40, "rgb(209, 229, 240)"],
+    [0.50, "rgb(247, 247, 247)"],
+    [0.60, "rgb(253, 219, 199)"],
+    [0.75, "rgb(244, 165, 130)"],
+    [0.85, "rgb(214, 96, 77)"],
+    [0.90, "rgb(178, 24, 43)"],
+    [1.0, "rgb(103, 0, 31)"]
+]
+PRECIPITATION_COLORSCALE = [
+    [0.0, "rgb(84, 48, 5)"],
+    [0.10, "rgb(140, 81, 10)"],
+    [0.20, "rgb(191, 129, 45)"],
+    [0.30, "rgb(223, 194, 125)"],
+    [0.40, "rgb(246, 232, 195)"],
+    [0.50, "rgb(245, 245, 245)"],
+    [0.60, "rgb(199, 234, 229)"],
+    [0.75, "rgb(128, 205, 193)"],
+    [0.85, "rgb(53, 151, 143)"],
+    [0.90, "rgb(1, 102, 94)"],
+    [1.0, "rgb(0, 60, 48)"]
+]

climateqa/engine/talk_to_data/{myVanna.py → vanna/myVanna.py} RENAMED Viewed

File without changes

climateqa/engine/talk_to_data/{vanna_class.py → vanna/vanna_class.py} RENAMED Viewed

File without changes

climateqa/engine/talk_to_data/workflow/drias.py ADDED Viewed

	@@ -0,0 +1,163 @@

+import os
+from typing import Any
+import asyncio
+from climateqa.engine.llm import get_llm
+from climateqa.engine.talk_to_data.input_processing import find_param, find_relevant_plots, find_relevant_tables_per_plot
+from climateqa.engine.talk_to_data.query import execute_sql_query, find_indicator_column
+from climateqa.engine.talk_to_data.objects.plot import Plot
+from climateqa.engine.talk_to_data.objects.states import State, TTDOutput
+from climateqa.engine.talk_to_data.drias.config import DRIAS_TABLES, DRIAS_INDICATOR_COLUMNS_PER_TABLE, DRIAS_PLOT_PARAMETERS
+from climateqa.engine.talk_to_data.drias.plots import DRIAS_PLOTS
+ROOT_PATH = os.path.dirname(os.path.dirname(os.getcwd()))
+async def process_output(
+    output_title: str,
+    table: str,
+    plot: Plot,
+    params: dict[str, Any]
+) -> tuple[str, TTDOutput, dict[str, bool]]:
+    """
+    Processes a table for a given plot and parameters: builds the SQL query, executes it,
+    and generates the corresponding figure.
+    Args:
+        output_title (str): Title for the output (used as key in outputs dict).
+        table (str): The name of the table to process.
+        plot (Plot): The plot object containing SQL query and visualization function.
+        params (dict[str, Any]): Parameters used for querying the table.
+    Returns:
+        tuple: (output_title, results dict, errors dict)
+    """
+    results: TTDOutput = {
+        'status': 'OK',
+        'plot': plot,
+        'table': table,
+        'sql_query': None,
+        'dataframe': None,
+        'figure': None,
+        'plot_information': None
+    }
+    errors = {
+        'have_sql_query': False,
+        'have_dataframe': False
+    }
+    # Find the indicator column for this table
+    indicator_column = find_indicator_column(table, DRIAS_INDICATOR_COLUMNS_PER_TABLE)
+    if indicator_column:
+        params['indicator_column'] = indicator_column
+    # Build the SQL query
+    sql_query = plot['sql_query'](table, params)
+    if not sql_query:
+        results['status'] = 'ERROR'
+        return output_title, results, errors
+    results['plot_information'] = plot['plot_information'](table, params)
+    results['sql_query'] = sql_query
+    errors['have_sql_query'] = True
+    # Execute the SQL query
+    df = await execute_sql_query(sql_query)
+    if df is not None and len(df) > 0:
+        results['dataframe'] = df
+        errors['have_dataframe'] = True
+    else:
+        results['status'] = 'NO_DATA'
+    # Generate the figure (always, even if df is empty, for consistency)
+    results['figure'] = plot['plot_function'](params)
+    return output_title, results, errors
+async def drias_workflow(user_input: str) -> State:
+    """
+    Orchestrates the DRIAS workflow: from user input to SQL queries, dataframes, and figures.
+    Args:
+        user_input (str): The user's question.
+    Returns:
+        State: Final state with all results and error messages if any.
+    """
+    state: State = {
+        'user_input': user_input,
+        'plots': [],
+        'outputs': {},
+        'error': ''
+    }
+    llm = get_llm(provider="openai")
+    plots = await find_relevant_plots(state, llm, DRIAS_PLOTS)
+    if not plots:
+        state['error'] = 'There is no plot to answer to the question'
+        return state
+    plots = plots[:2]  # limit to 2 types of plots
+    state['plots'] = plots
+    errors = {
+        'have_relevant_table': False,
+        'have_sql_query': False,
+        'have_dataframe': False
+    }
+    outputs = {}
+    # Find relevant tables for each plot and prepare outputs
+    for plot_name in plots:
+        plot = next((p for p in DRIAS_PLOTS if p['name'] == plot_name), None)
+        if plot is None:
+            continue
+        relevant_tables = await find_relevant_tables_per_plot(state, plot, llm, DRIAS_TABLES)
+        if relevant_tables:
+            errors['have_relevant_table'] = True
+        for table in relevant_tables:
+            output_title = f"{plot['short_name']} - {' '.join(table.capitalize().split('_'))}"
+            outputs[output_title] = {
+                'table': table,
+                'plot': plot,
+                'status': 'OK'
+            }
+    # Gather all required parameters
+    params = {}
+    for param_name in DRIAS_PLOT_PARAMETERS:
+        param = await find_param(state, param_name, mode='DRIAS')
+        if param:
+            params.update(param)
+    # Process all outputs in parallel using process_output
+    tasks = [
+        process_output(output_title, output['table'], output['plot'], params.copy())
+        for output_title, output in outputs.items()
+    ]
+    results = await asyncio.gather(*tasks)
+    # Update outputs with results and error flags
+    for output_title, task_results, task_errors in results:
+        outputs[output_title]['sql_query'] = task_results['sql_query']
+        outputs[output_title]['dataframe'] = task_results['dataframe']
+        outputs[output_title]['figure'] = task_results['figure']
+        outputs[output_title]['plot_information'] = task_results['plot_information']
+        outputs[output_title]['status'] = task_results['status']
+        errors['have_sql_query'] |= task_errors['have_sql_query']
+        errors['have_dataframe'] |= task_errors['have_dataframe']
+    state['outputs'] = outputs
+    # Set error messages if needed
+    if not errors['have_relevant_table']:
+        state['error'] = "There is no relevant table in our database to answer your question"
+    elif not errors['have_sql_query']:
+        state['error'] = "There is no relevant sql query on our database that can help to answer your question"
+    elif not errors['have_dataframe']:
+        state['error'] = "There is no data in our table that can answer to your question"
+    return state

climateqa/engine/talk_to_data/workflow/ipcc.py ADDED Viewed

	@@ -0,0 +1,161 @@

+import os
+from typing import Any
+import asyncio
+from climateqa.engine.llm import get_llm
+from climateqa.engine.talk_to_data.input_processing import find_param, find_relevant_plots, find_relevant_tables_per_plot
+from climateqa.engine.talk_to_data.query import execute_sql_query, find_indicator_column
+from climateqa.engine.talk_to_data.objects.plot import Plot
+from climateqa.engine.talk_to_data.objects.states import State, TTDOutput
+from climateqa.engine.talk_to_data.ipcc.config import IPCC_TABLES, IPCC_INDICATOR_COLUMNS_PER_TABLE, IPCC_PLOT_PARAMETERS
+from climateqa.engine.talk_to_data.ipcc.plots import IPCC_PLOTS
+ROOT_PATH = os.path.dirname(os.path.dirname(os.getcwd()))
+async def process_output(
+    output_title: str,
+    table: str,
+    plot: Plot,
+    params: dict[str, Any]
+) -> tuple[str, TTDOutput, dict[str, bool]]:
+    """
+    Process a table for a given plot and parameters: builds the SQL query, executes it,
+    and generates the corresponding figure.
+    Args:
+        output_title (str): Title for the output (used as key in outputs dict).
+        table (str): The name of the table to process.
+        plot (Plot): The plot object containing SQL query and visualization function.
+        params (dict[str, Any]): Parameters used for querying the table.
+    Returns:
+        tuple: (output_title, results dict, errors dict)
+    """
+    results: TTDOutput = {
+        'status': 'OK',
+        'plot': plot,
+        'table': table,
+        'sql_query': None,
+        'dataframe': None,
+        'figure': None,
+        'plot_information': None,
+    }
+    errors = {
+        'have_sql_query': False,
+        'have_dataframe': False
+    }
+    # Find the indicator column for this table
+    indicator_column = find_indicator_column(table, IPCC_INDICATOR_COLUMNS_PER_TABLE)
+    if indicator_column:
+        params['indicator_column'] = indicator_column
+    # Build the SQL query
+    sql_query = plot['sql_query'](table, params)
+    if not sql_query:
+        results['status'] = 'ERROR'
+        return output_title, results, errors
+    results['plot_information'] = plot['plot_information'](table, params)
+    results['sql_query'] = sql_query
+    errors['have_sql_query'] = True
+    # Execute the SQL query
+    df = await execute_sql_query(sql_query)
+    if df is not None and not df.empty:
+        results['dataframe'] = df
+        errors['have_dataframe'] = True
+    else:
+        results['status'] = 'NO_DATA'
+    # Generate the figure (always, even if df is empty, for consistency)
+    results['figure'] = plot['plot_function'](params)
+    return output_title, results, errors
+async def ipcc_workflow(user_input: str) -> State:
+    """
+    Performs the complete workflow of Talk To IPCC: from user input to SQL queries, dataframes, and figures.
+    Args:
+        user_input (str): The user's question.
+    Returns:
+        State: Final state with all the results and error messages if any.
+    """
+    state: State = {
+        'user_input': user_input,
+        'plots': [],
+        'outputs': {},
+        'error': ''
+    }
+    llm = get_llm(provider="openai")
+    plots = await find_relevant_plots(state, llm, IPCC_PLOTS)
+    state['plots'] = plots
+    if not plots:
+        state['error'] = 'There is no plot to answer to the question'
+        return state
+    errors = {
+        'have_relevant_table': False,
+        'have_sql_query': False,
+        'have_dataframe': False
+    }
+    outputs = {}
+    # Find relevant tables for each plot and prepare outputs
+    for plot_name in plots:
+        plot = next((p for p in IPCC_PLOTS if p['name'] == plot_name), None)
+        if plot is None:
+            continue
+        relevant_tables = await find_relevant_tables_per_plot(state, plot, llm, IPCC_TABLES)
+        if relevant_tables:
+            errors['have_relevant_table'] = True
+        for table in relevant_tables:
+            output_title = f"{plot['short_name']} - {' '.join(table.capitalize().split('_'))}"
+            outputs[output_title] = {
+                'table': table,
+                'plot': plot,
+                'status': 'OK'
+            }
+    # Gather all required parameters
+    params = {}
+    for param_name in IPCC_PLOT_PARAMETERS:
+        param = await find_param(state, param_name, mode='IPCC')
+        if param:
+            params.update(param)
+    # Process all outputs in parallel using process_output
+    tasks = [
+        process_output(output_title, output['table'], output['plot'], params.copy())
+        for output_title, output in outputs.items()
+    ]
+    results = await asyncio.gather(*tasks)
+    # Update outputs with results and error flags
+    for output_title, task_results, task_errors in results:
+        outputs[output_title]['sql_query'] = task_results['sql_query']
+        outputs[output_title]['dataframe'] = task_results['dataframe']
+        outputs[output_title]['figure'] = task_results['figure']
+        outputs[output_title]['plot_information'] = task_results['plot_information']
+        outputs[output_title]['status'] = task_results['status']
+        errors['have_sql_query'] |= task_errors['have_sql_query']
+        errors['have_dataframe'] |= task_errors['have_dataframe']
+    state['outputs'] = outputs
+    # Set error messages if needed
+    if not errors['have_relevant_table']:
+        state['error'] = "There is no relevant table in our database to answer your question"
+    elif not errors['have_sql_query']:
+        state['error'] = "There is no relevant sql query on our database that can help to answer your question"
+    elif not errors['have_dataframe']:
+        state['error'] = "There is no data in our table that can answer to your question"
+    return state

front/tabs/tab_drias.py CHANGED Viewed

@@ -4,26 +4,25 @@ import os
 import pandas as pd
 from climateqa.engine.talk_to_data.main import ask_drias
-from climateqa.engine.talk_to_data.config import DRIAS_MODELS, DRIAS_UI_TEXT
 class DriasUIElements(TypedDict):
     tab: gr.Tab
     details_accordion: gr.Accordion
     examples_hidden: gr.Textbox
     examples: gr.Examples
     drias_direct_question: gr.Textbox
     result_text: gr.Textbox
-    table_names_display: gr.DataFrame
     query_accordion: gr.Accordion
     drias_sql_query: gr.Textbox
     chart_accordion: gr.Accordion
     model_selection: gr.Dropdown
     drias_display: gr.Plot
     table_accordion: gr.Accordion
     drias_table: gr.DataFrame
-    pagination_display: gr.Markdown
-    prev_button: gr.Button
-    next_button: gr.Button
 async def ask_drias_query(query: str, index_state: int, user_id: str):
@@ -31,7 +30,7 @@ async def ask_drias_query(query: str, index_state: int, user_id: str):
     return result
-def show_results(sql_queries_state, dataframes_state, plots_state):
     if not sql_queries_state or not dataframes_state or not plots_state:
         # If all results are empty, show "No result"
         return (
@@ -40,9 +39,6 @@ def show_results(sql_queries_state, dataframes_state, plots_state):
             gr.update(visible=False),
             gr.update(visible=False),
             gr.update(visible=False),
-            gr.update(visible=False),
-            gr.update(visible=False),
-            gr.update(visible=False),
         )
     else:
         # Show the appropriate components with their data
@@ -51,10 +47,7 @@ def show_results(sql_queries_state, dataframes_state, plots_state):
             gr.update(visible=True),
             gr.update(visible=True),
             gr.update(visible=True),
-            gr.update(visible=True),
-            gr.update(visible=True),
-            gr.update(visible=True),
-            gr.update(visible=True),
         )
@@ -72,44 +65,14 @@ def filter_by_model(dataframes, figures, index_state, model_selection):
     return df, figure
-def update_pagination(index, sql_queries):
-    pagination = f"{index + 1}/{len(sql_queries)}" if sql_queries else ""
-    return pagination
-def show_previous(index, sql_queries, dataframes, plots):
-    if index > 0:
-        index -= 1
-    return (
-        sql_queries[index],
-        dataframes[index],
-        plots[index](dataframes[index]),
-        index,
-    )
-def show_next(index, sql_queries, dataframes, plots):
-    if index < len(sql_queries) - 1:
-        index += 1
-    return (
-        sql_queries[index],
-        dataframes[index],
-        plots[index](dataframes[index]),
-        index,
-    )
-def display_table_names(table_names):
-    return [table_names]
-def on_table_click(evt: gr.SelectData, table_names, sql_queries, dataframes, plots):
-    index = evt.index[1]
     figure = plots[index](dataframes[index])
     return (
         sql_queries[index],
         dataframes[index],
         figure,
         index,
     )
@@ -117,7 +80,7 @@ def on_table_click(evt: gr.SelectData, table_names, sql_queries, dataframes, plo
 def create_drias_ui() -> DriasUIElements:
     """Create and return all UI elements for the DRIAS tab."""
     with gr.Tab("France - Talk to DRIAS", elem_id="tab-vanna", id=6) as tab:
-        with gr.Accordion(label="Details") as details_accordion:
             gr.Markdown(DRIAS_UI_TEXT)
         # Add examples for common questions
@@ -141,24 +104,43 @@ def create_drias_ui() -> DriasUIElements:
                 elem_id="direct-question",
                 interactive=True,
             )
         result_text = gr.Textbox(
             label="", elem_id="no-result-label", interactive=False, visible=True
         )
-        table_names_display = gr.DataFrame(
-            [], label="List of relevant indicators", headers=None, interactive=False, elem_id="table-names", visible=False
-        )
-        with gr.Accordion(label="SQL Query Used", visible=False) as query_accordion:
-            drias_sql_query = gr.Textbox(
-                label="", elem_id="sql-query", interactive=False
             )
         with gr.Accordion(label="Chart", visible=False) as chart_accordion:
-            model_selection = gr.Dropdown(
-                label="Model", choices=DRIAS_MODELS, value="ALL", interactive=True
-            )
             drias_display = gr.Plot(elem_id="vanna-plot")
         with gr.Accordion(
@@ -166,32 +148,23 @@ def create_drias_ui() -> DriasUIElements:
         ) as table_accordion:
             drias_table = gr.DataFrame([], elem_id="vanna-table")
-        pagination_display = gr.Markdown(
-            value="", visible=False, elem_id="pagination-display"
-        )
-        with gr.Row():
-            prev_button = gr.Button("Previous", visible=False)
-            next_button = gr.Button("Next", visible=False)
         return DriasUIElements(
             tab=tab,
             details_accordion=details_accordion,
             examples_hidden=examples_hidden,
             examples=examples,
             drias_direct_question=drias_direct_question,
             result_text=result_text,
             table_names_display=table_names_display,
             query_accordion=query_accordion,
             drias_sql_query=drias_sql_query,
             chart_accordion=chart_accordion,
             model_selection=model_selection,
             drias_display=drias_display,
             table_accordion=table_accordion,
             drias_table=drias_table,
-            pagination_display=pagination_display,
-            prev_button=prev_button,
-            next_button=next_button
         )
@@ -202,94 +175,56 @@ def setup_drias_events(ui_elements: DriasUIElements, share_client=None, user_id=
     sql_queries_state = gr.State([])
     dataframes_state = gr.State([])
     plots_state = gr.State([])
     index_state = gr.State(0)
     table_names_list = gr.State([])
     user_id = gr.State(user_id)
     # Handle example selection
     ui_elements["examples_hidden"].change(
         lambda x: (gr.Accordion(open=False), gr.Textbox(value=x)),
         inputs=[ui_elements["examples_hidden"]],
         outputs=[ui_elements["details_accordion"], ui_elements["drias_direct_question"]]
     ).then(
-        ask_drias_query,
-        inputs=[ui_elements["examples_hidden"], index_state, user_id],
-        outputs=[
-            ui_elements["drias_sql_query"],
-            ui_elements["drias_table"],
-            ui_elements["drias_display"],
-            sql_queries_state,
-            dataframes_state,
-            plots_state,
-            index_state,
-            table_names_list,
-            ui_elements["result_text"],
-        ],
-    ).then(
-        show_results,
-        inputs=[sql_queries_state, dataframes_state, plots_state],
-        outputs=[
-            ui_elements["result_text"],
-            ui_elements["query_accordion"],
-            ui_elements["table_accordion"],
-            ui_elements["chart_accordion"],
-            ui_elements["prev_button"],
-            ui_elements["next_button"],
-            ui_elements["pagination_display"],
-            ui_elements["table_names_display"],
-        ],
-    ).then(
-        update_pagination,
-        inputs=[index_state, sql_queries_state],
-        outputs=[ui_elements["pagination_display"]],
-    ).then(
-        display_table_names,
-        inputs=[table_names_list],
-        outputs=[ui_elements["table_names_display"]],
-    )
-    # Handle direct question submission
-    ui_elements["drias_direct_question"].submit(
-        lambda: gr.Accordion(open=False),
         inputs=None,
-        outputs=[ui_elements["details_accordion"]]
     ).then(
         ask_drias_query,
-        inputs=[ui_elements["drias_direct_question"], index_state, user_id],
         outputs=[
             ui_elements["drias_sql_query"],
             ui_elements["drias_table"],
             ui_elements["drias_display"],
             sql_queries_state,
             dataframes_state,
             plots_state,
             index_state,
             table_names_list,
             ui_elements["result_text"],
         ],
     ).then(
         show_results,
-        inputs=[sql_queries_state, dataframes_state, plots_state],
         outputs=[
             ui_elements["result_text"],
             ui_elements["query_accordion"],
             ui_elements["table_accordion"],
             ui_elements["chart_accordion"],
-            ui_elements["prev_button"],
-            ui_elements["next_button"],
-            ui_elements["pagination_display"],
             ui_elements["table_names_display"],
         ],
-    ).then(
-        update_pagination,
-        inputs=[index_state, sql_queries_state],
-        outputs=[ui_elements["pagination_display"]],
-    ).then(
-        display_table_names,
-        inputs=[table_names_list],
-        outputs=[ui_elements["table_names_display"]],
     )
     # Handle model selection change
     ui_elements["model_selection"].change(
         filter_by_model,
@@ -297,36 +232,12 @@ def setup_drias_events(ui_elements: DriasUIElements, share_client=None, user_id=
         outputs=[ui_elements["drias_table"], ui_elements["drias_display"]],
     )
-    # Handle pagination buttons
-    ui_elements["prev_button"].click(
-        show_previous,
-        inputs=[index_state, sql_queries_state, dataframes_state, plots_state],
-        outputs=[ui_elements["drias_sql_query"], ui_elements["drias_table"], ui_elements["drias_display"], index_state],
-    ).then(
-        update_pagination,
-        inputs=[index_state, sql_queries_state],
-        outputs=[ui_elements["pagination_display"]],
-    )
-    ui_elements["next_button"].click(
-        show_next,
-        inputs=[index_state, sql_queries_state, dataframes_state, plots_state],
-        outputs=[ui_elements["drias_sql_query"], ui_elements["drias_table"], ui_elements["drias_display"], index_state],
-    ).then(
-        update_pagination,
-        inputs=[index_state, sql_queries_state],
-        outputs=[ui_elements["pagination_display"]],
-    )
     # Handle table selection
-    ui_elements["table_names_display"].select(
         fn=on_table_click,
-        inputs=[table_names_list, sql_queries_state, dataframes_state, plots_state],
-        outputs=[ui_elements["drias_sql_query"], ui_elements["drias_table"], ui_elements["drias_display"], index_state],
-    ).then(
-        update_pagination,
-        inputs=[index_state, sql_queries_state],
-        outputs=[ui_elements["pagination_display"]],
     )
 def create_drias_tab(share_client=None, user_id=None):

 import pandas as pd
 from climateqa.engine.talk_to_data.main import ask_drias
+from climateqa.engine.talk_to_data.drias.config import DRIAS_MODELS, DRIAS_UI_TEXT
 class DriasUIElements(TypedDict):
     tab: gr.Tab
     details_accordion: gr.Accordion
     examples_hidden: gr.Textbox
     examples: gr.Examples
+    image_examples: gr.Row
     drias_direct_question: gr.Textbox
     result_text: gr.Textbox
+    table_names_display: gr.Radio
     query_accordion: gr.Accordion
     drias_sql_query: gr.Textbox
     chart_accordion: gr.Accordion
+    plot_information: gr.Markdown
     model_selection: gr.Dropdown
     drias_display: gr.Plot
     table_accordion: gr.Accordion
     drias_table: gr.DataFrame
 async def ask_drias_query(query: str, index_state: int, user_id: str):
     return result
+def show_results(sql_queries_state, dataframes_state, plots_state, table_names):
     if not sql_queries_state or not dataframes_state or not plots_state:
         # If all results are empty, show "No result"
         return (
             gr.update(visible=False),
             gr.update(visible=False),
             gr.update(visible=False),
         )
     else:
         # Show the appropriate components with their data
             gr.update(visible=True),
             gr.update(visible=True),
             gr.update(visible=True),
+            gr.update(choices=table_names, value=table_names[0], visible=True),
         )
     return df, figure
+def on_table_click(selected_label, table_names, sql_queries, dataframes, plot_informations, plots):
+    index = table_names.index(selected_label)
     figure = plots[index](dataframes[index])
     return (
         sql_queries[index],
         dataframes[index],
         figure,
+        plot_informations[index],
         index,
     )
 def create_drias_ui() -> DriasUIElements:
     """Create and return all UI elements for the DRIAS tab."""
     with gr.Tab("France - Talk to DRIAS", elem_id="tab-vanna", id=6) as tab:
+        with gr.Accordion(label="❓ How to use?", elem_id="details") as details_accordion:
             gr.Markdown(DRIAS_UI_TEXT)
         # Add examples for common questions
                 elem_id="direct-question",
                 interactive=True,
             )
+        with gr.Row(visible=True, elem_id="example-img-container") as image_examples:
+            gr.Markdown("### Examples of possible visualizations")
+            with gr.Row():
+                gr.Image("./front/assets/talk_to_drias_winter_temp_paris_example.png", label="Evolution of Mean Winter Temperature in Paris", elem_classes=["example-img"])
+                gr.Image("./front/assets/talk_to_drias_annual_temperature_france_example.png", label="Mean Annual Temperature in 2030 in France", elem_classes=["example-img"])
+                gr.Image("./front/assets/talk_to_drias_frequency_remarkable_precipitation_lyon_example.png", label="Frequency of Remarkable Daily Precipitation in Lyon", elem_classes=["example-img"])
         result_text = gr.Textbox(
             label="", elem_id="no-result-label", interactive=False, visible=True
         )
+        with gr.Row():
+            table_names_display = gr.Radio(
+                choices=[],
+                label="Relevant figures created",
+                interactive=True,
+                elem_id="table-names",
+                visible=False
             )
+            with gr.Accordion(label="SQL Query Used", visible=False) as query_accordion:
+                drias_sql_query = gr.Textbox(
+                    label="", elem_id="sql-query", interactive=False
+                )
         with gr.Accordion(label="Chart", visible=False) as chart_accordion:
+            with gr.Row():
+                model_selection = gr.Dropdown(
+                    label="Model", choices=DRIAS_MODELS, value="ALL", interactive=True
+                )
+                with gr.Accordion(label="Informations about the plot", open=False):
+                    plot_information = gr.Markdown(value = "")
             drias_display = gr.Plot(elem_id="vanna-plot")
         with gr.Accordion(
         ) as table_accordion:
             drias_table = gr.DataFrame([], elem_id="vanna-table")
         return DriasUIElements(
             tab=tab,
             details_accordion=details_accordion,
             examples_hidden=examples_hidden,
             examples=examples,
+            image_examples=image_examples,
             drias_direct_question=drias_direct_question,
             result_text=result_text,
             table_names_display=table_names_display,
             query_accordion=query_accordion,
             drias_sql_query=drias_sql_query,
             chart_accordion=chart_accordion,
+            plot_information=plot_information,
             model_selection=model_selection,
             drias_display=drias_display,
             table_accordion=table_accordion,
             drias_table=drias_table,
         )
     sql_queries_state = gr.State([])
     dataframes_state = gr.State([])
     plots_state = gr.State([])
+    plot_informations_state = gr.State([])
     index_state = gr.State(0)
     table_names_list = gr.State([])
     user_id = gr.State(user_id)
+    # Handle direct question submission - trigger the same workflow by setting examples_hidden
+    ui_elements["drias_direct_question"].submit(
+        lambda x: gr.update(value=x),
+        inputs=[ui_elements["drias_direct_question"]],
+        outputs=[ui_elements["examples_hidden"]],
+    )
     # Handle example selection
     ui_elements["examples_hidden"].change(
         lambda x: (gr.Accordion(open=False), gr.Textbox(value=x)),
         inputs=[ui_elements["examples_hidden"]],
         outputs=[ui_elements["details_accordion"], ui_elements["drias_direct_question"]]
     ).then(
+        lambda : gr.update(visible=False),
         inputs=None,
+        outputs=ui_elements["image_examples"]
     ).then(
         ask_drias_query,
+        inputs=[ui_elements["examples_hidden"], index_state, user_id],
         outputs=[
             ui_elements["drias_sql_query"],
             ui_elements["drias_table"],
             ui_elements["drias_display"],
+            ui_elements["plot_information"],
             sql_queries_state,
             dataframes_state,
             plots_state,
+            plot_informations_state,
             index_state,
             table_names_list,
             ui_elements["result_text"],
         ],
     ).then(
         show_results,
+        inputs=[sql_queries_state, dataframes_state, plots_state, table_names_list],
         outputs=[
             ui_elements["result_text"],
             ui_elements["query_accordion"],
             ui_elements["table_accordion"],
             ui_elements["chart_accordion"],
             ui_elements["table_names_display"],
         ],
     )
     # Handle model selection change
     ui_elements["model_selection"].change(
         filter_by_model,
         outputs=[ui_elements["drias_table"], ui_elements["drias_display"]],
     )
     # Handle table selection
+    ui_elements["table_names_display"].change(
         fn=on_table_click,
+        inputs=[ui_elements["table_names_display"], table_names_list, sql_queries_state, dataframes_state, plot_informations_state, plots_state],
+        outputs=[ui_elements["drias_sql_query"], ui_elements["drias_table"], ui_elements["drias_display"], ui_elements["plot_information"], index_state],
     )
 def create_drias_tab(share_client=None, user_id=None):

front/tabs/tab_ipcc.py ADDED Viewed

	@@ -0,0 +1,300 @@

+from random import choices
+import gradio as gr
+from typing import TypedDict
+from climateqa.engine.talk_to_data.main import ask_ipcc
+from climateqa.engine.talk_to_data.ipcc.config import IPCC_MODELS, IPCC_SCENARIO, IPCC_UI_TEXT
+import uuid
+class ipccUIElements(TypedDict):
+    tab: gr.Tab
+    details_accordion: gr.Accordion
+    examples_hidden: gr.Textbox
+    examples: gr.Examples
+    image_examples: gr.Row
+    ipcc_direct_question: gr.Textbox
+    result_text: gr.Textbox
+    table_names_display: gr.Radio
+    query_accordion: gr.Accordion
+    ipcc_sql_query: gr.Textbox
+    chart_accordion: gr.Accordion
+    plot_information: gr.Markdown
+    scenario_selection: gr.Dropdown
+    ipcc_display: gr.Plot
+    table_accordion: gr.Accordion
+    ipcc_table: gr.DataFrame
+async def ask_ipcc_query(query: str, index_state: int, user_id: str):
+    result = await ask_ipcc(query, index_state, user_id)
+    return result
+def hide_outputs():
+    """Hide all outputs initially."""
+    return (
+        gr.update(visible=True),  # Show the result text
+        gr.update(visible=False),  # Hide the query accordion
+        gr.update(visible=False),  # Hide the table accordion
+        gr.update(visible=False),  # Hide the chart accordion
+        gr.update(visible=False),  # Hide table names
+    )
+def show_results(sql_queries_state, dataframes_state, plots_state, table_names):
+    if not sql_queries_state or not dataframes_state or not plots_state:
+        # If all results are empty, show "No result"
+        return (
+            gr.update(visible=True),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+        )
+    else:
+        # Show the appropriate components with their data
+        return (
+            gr.update(visible=False),
+            gr.update(visible=True),
+            gr.update(visible=True),
+            gr.update(visible=True),
+            gr.update(choices=table_names, value=table_names[0], visible=True),
+        )
+def show_filter_by_scenario(table_names, index_state, dataframes):
+    if len(table_names) > 0 and table_names[index_state].startswith("Map"):
+        df = dataframes[index_state]
+        scenarios = sorted(df["scenario"].unique())
+        return gr.update(visible=True, choices=scenarios, value=scenarios[0])
+    else:
+        return gr.update(visible=False)
+def filter_by_scenario(dataframes, figures, table_names, index_state, scenario):
+    df = dataframes[index_state]
+    if not table_names[index_state].startswith("Map"):
+        return df, figures[index_state](df)
+    if df.empty:
+        return df, None
+    if "scenario" not in df.columns:
+        return df, figures[index_state](df)
+    else:
+        df = df[df["scenario"] == scenario]
+        if df.empty:
+            return df, None
+    figure = figures[index_state](df)
+    return df, figure
+def display_table_names(table_names, index_state):
+    return [
+        [name]
+        for name in table_names
+    ]
+def on_table_click(selected_label, table_names, sql_queries, dataframes, plot_informations, plots):
+    index = table_names.index(selected_label)
+    figure = plots[index](dataframes[index])
+    return (
+        sql_queries[index],
+        dataframes[index],
+        figure,
+        plot_informations[index],
+        index,
+    )
+def create_ipcc_ui() -> ipccUIElements:
+    """Create and return all UI elements for the ipcc tab."""
+    with gr.Tab("(Beta) Talk to IPCC", elem_id="tab-vanna", id=7) as tab:
+        with gr.Accordion(label="❓ How to use?", elem_id="details") as details_accordion:
+            gr.Markdown(IPCC_UI_TEXT)
+        # Add examples for common questions
+        examples_hidden = gr.Textbox(visible=False, elem_id="ipcc-examples-hidden")
+        examples = gr.Examples(
+            examples=[
+                ["What will the temperature be like in Paris?"],
+                ["What will be the total rainfall in the USA in 2030?"],
+                ["How will the average temperature evolve in China?"],
+                ["What will be the average total precipitation in London ?"]
+            ],
+            label="Example Questions",
+            inputs=[examples_hidden],
+            outputs=[examples_hidden],
+        )
+        with gr.Row():
+            ipcc_direct_question = gr.Textbox(
+                label="Direct Question",
+                placeholder="You can write direct question here",
+                elem_id="direct-question",
+                interactive=True,
+            )
+        with gr.Row(visible=True, elem_id="example-img-container") as image_examples:
+            gr.Markdown("### Examples of possible visualizations")
+            with gr.Row():
+                gr.Image("./front/assets/talk_to_ipcc_france_example.png", label="Total Precipitation in 2030 in France", elem_classes=["example-img"])
+                gr.Image("./front/assets/talk_to_ipcc_new_york_example.png", label="Yearly Evolution of Mean Temperature in New York (Historical + SSP Scenarios)", elem_classes=["example-img"])
+                gr.Image("./front/assets/talk_to_ipcc_china_example.png", label="Mean Temperature in 2050 in China", elem_classes=["example-img"])
+        result_text = gr.Textbox(
+            label="", elem_id="no-result-label", interactive=False, visible=True
+        )
+        with gr.Row():
+            table_names_display = gr.Radio(
+                choices=[],
+                label="Relevant figures created",
+                interactive=True,
+                elem_id="table-names",
+                visible=False
+            )
+            with gr.Accordion(label="SQL Query Used", visible=False) as query_accordion:
+                ipcc_sql_query = gr.Textbox(
+                    label="", elem_id="sql-query", interactive=False
+                )
+        with gr.Accordion(label="Chart", visible=False) as chart_accordion:
+            with gr.Row():
+                scenario_selection = gr.Dropdown(
+                    label="Scenario", choices=IPCC_SCENARIO, value=IPCC_SCENARIO[0], interactive=True, visible=False
+                )
+                with gr.Accordion(label="Informations about the plot", open=False):
+                    plot_information = gr.Markdown(value = "")
+            ipcc_display = gr.Plot(elem_id="vanna-plot")
+        with gr.Accordion(
+            label="Data used", open=False, visible=False
+        ) as table_accordion:
+            ipcc_table = gr.DataFrame([], elem_id="vanna-table")
+        return ipccUIElements(
+            tab=tab,
+            details_accordion=details_accordion,
+            examples_hidden=examples_hidden,
+            examples=examples,
+            image_examples=image_examples,
+            ipcc_direct_question=ipcc_direct_question,
+            result_text=result_text,
+            table_names_display=table_names_display,
+            query_accordion=query_accordion,
+            ipcc_sql_query=ipcc_sql_query,
+            chart_accordion=chart_accordion,
+            plot_information=plot_information,
+            scenario_selection=scenario_selection,
+            ipcc_display=ipcc_display,
+            table_accordion=table_accordion,
+            ipcc_table=ipcc_table,
+        )
+def setup_ipcc_events(ui_elements: ipccUIElements, share_client=None, user_id=None) -> None:
+    """Set up all event handlers for the ipcc tab."""
+    # Create state variables
+    sql_queries_state = gr.State([])
+    dataframes_state = gr.State([])
+    plots_state = gr.State([])
+    plot_informations_state = gr.State([])
+    index_state = gr.State(0)
+    table_names_list = gr.State([])
+    user_id = gr.State(user_id)
+    # Handle direct question submission - trigger the same workflow by setting examples_hidden
+    ui_elements["ipcc_direct_question"].submit(
+        lambda x: gr.update(value=x),
+        inputs=[ui_elements["ipcc_direct_question"]],
+        outputs=[ui_elements["examples_hidden"]],
+    )
+    # Handle example selection
+    ui_elements["examples_hidden"].change(
+        lambda x: (gr.Accordion(open=False), gr.Textbox(value=x)),
+        inputs=[ui_elements["examples_hidden"]],
+        outputs=[ui_elements["details_accordion"], ui_elements["ipcc_direct_question"]]
+    ).then(
+        lambda : gr.update(visible=False),
+        inputs=None,
+        outputs=ui_elements["image_examples"]
+    ).then(
+        hide_outputs,
+        inputs=None,
+        outputs=[
+            ui_elements["result_text"],
+            ui_elements["query_accordion"],
+            ui_elements["table_accordion"],
+            ui_elements["chart_accordion"],
+            ui_elements["table_names_display"],
+        ]
+    ).then(
+        ask_ipcc_query,
+        inputs=[ui_elements["examples_hidden"], index_state, user_id],
+        outputs=[
+            ui_elements["ipcc_sql_query"],
+            ui_elements["ipcc_table"],
+            ui_elements["ipcc_display"],
+            ui_elements["plot_information"],
+            sql_queries_state,
+            dataframes_state,
+            plots_state,
+            plot_informations_state,
+            index_state,
+            table_names_list,
+            ui_elements["result_text"],
+        ],
+    ).then(
+        show_results,
+        inputs=[sql_queries_state, dataframes_state, plots_state, table_names_list],
+        outputs=[
+            ui_elements["result_text"],
+            ui_elements["query_accordion"],
+            ui_elements["table_accordion"],
+            ui_elements["chart_accordion"],
+            ui_elements["table_names_display"],
+        ],
+    ).then(
+        show_filter_by_scenario,
+        inputs=[table_names_list, index_state, dataframes_state],
+        outputs=[ui_elements["scenario_selection"]],
+    ).then(
+        filter_by_scenario,
+        inputs=[dataframes_state, plots_state, table_names_list, index_state, ui_elements["scenario_selection"]],
+        outputs=[ui_elements["ipcc_table"], ui_elements["ipcc_display"]],
+    )
+    # Handle model selection change
+    ui_elements["scenario_selection"].change(
+        filter_by_scenario,
+        inputs=[dataframes_state, plots_state, table_names_list, index_state, ui_elements["scenario_selection"]],
+        outputs=[ui_elements["ipcc_table"], ui_elements["ipcc_display"]],
+    )
+    # Handle table selection
+    ui_elements["table_names_display"].change(
+        fn=on_table_click,
+        inputs=[ui_elements["table_names_display"], table_names_list, sql_queries_state, dataframes_state, plot_informations_state, plots_state],
+        outputs=[ui_elements["ipcc_sql_query"], ui_elements["ipcc_table"], ui_elements["ipcc_display"], ui_elements["plot_information"], index_state],
+    ).then(
+        show_filter_by_scenario,
+        inputs=[table_names_list, index_state, dataframes_state],
+        outputs=[ui_elements["scenario_selection"]],
+    ).then(
+        filter_by_scenario,
+        inputs=[dataframes_state, plots_state, table_names_list, index_state, ui_elements["scenario_selection"]],
+        outputs=[ui_elements["ipcc_table"], ui_elements["ipcc_display"]],
+    )
+def create_ipcc_tab(share_client=None, user_id=None):
+    """Create the ipcc tab with all its components and event handlers."""
+    ui_elements = create_ipcc_ui()
+    setup_ipcc_events(ui_elements, share_client=share_client, user_id=user_id)

requirements.txt CHANGED Viewed

@@ -25,4 +25,5 @@ geopy==2.4.1
 duckdb==1.2.1
 openai==1.61.1
 pydantic==2.9.2
-pydantic-settings==2.2.1

 duckdb==1.2.1
 openai==1.61.1
 pydantic==2.9.2
+pydantic-settings==2.2.1
+geojson==3.2.0

style.css CHANGED Viewed

@@ -656,12 +656,11 @@ a {
     /* overflow-y: scroll; */
 }
 #sql-query{
-    max-height: 300px;
-    overflow-y:scroll;
 }
 #sql-query textarea{
-    min-height: 100px !important;
 }
 #sql-query span{
@@ -671,8 +670,11 @@ div#tab-vanna{
     max-height: 100¨vh;
     overflow-y: hidden;
 }
 #vanna-plot{
-    max-height:500px
 }
 #pagination-display{
@@ -681,13 +683,33 @@ div#tab-vanna{
     font-size: 16px;
 }
-#table-names table{
-    overflow: hidden;
 }
-#table-names thead{
     display: none;
 }
 /* DRIAS Data Table Styles */
 #vanna-table {
     height: 400px !important;
@@ -710,3 +732,13 @@ div#tab-vanna{
     background: white;
     z-index: 1;
 }

     /* overflow-y: scroll; */
 }
 #sql-query{
+    max-height: 100%;
 }
 #sql-query textarea{
+    min-height: 200px !important;
 }
 #sql-query span{
     max-height: 100¨vh;
     overflow-y: hidden;
 }
+#details button span{
+    font-weight: bold;
+}
 #vanna-plot{
+    max-height:1000px
 }
 #pagination-display{
     font-size: 16px;
 }
+#table-names label {
+    display: block;
+    width: 100%;
+    box-sizing: border-box;
+    padding: 8px 12px;
+    margin-bottom: 4px;
+    border: 1px solid #ccc;
+    border-radius: 6px;
+    background-color: white;
+    cursor: pointer;
+    text-align: center;
 }
+#table-names label:hover {
+    background-color: #f0f8ff;
+}
+#table-names input[type="radio"] {
     display: none;
 }
+#table-names input[type="radio"]:checked + label {
+    background-color: #d0eaff;
+    border-color: #2196f3;
+}
 /* DRIAS Data Table Styles */
 #vanna-table {
     height: 400px !important;
     background: white;
     z-index: 1;
 }
+.example-img{
+    height: 250px;
+    object-fit: contain;
+}
+#example-img-container {
+    flex-direction: column;
+    align-items: left;
+}