Spaces:

Ekimetrics
/

climate-question-answering

Running

App Files Files Community

armanddemasson commited on May 21

Commit

ac63459

2 Parent(s): c0fd277 45e1dba

Merged in feature/talk_to_ipcc (pull request #17)

Browse files

Files changed (8) hide show

app.py +3 -2
climateqa/engine/talk_to_data/ipcc/config.py +83 -0
climateqa/engine/talk_to_data/ipcc/plots.py +193 -0
climateqa/engine/talk_to_data/ipcc/queries.py +116 -0
climateqa/engine/talk_to_data/main.py +77 -17
climateqa/engine/talk_to_data/objects/states.py +7 -35
climateqa/engine/talk_to_data/workflow/ipcc.py +157 -0
front/tabs/tab_ipcc.py +289 -0

app.py CHANGED Viewed

@@ -16,6 +16,7 @@ from climateqa.chat import start_chat, chat_stream, finish_chat
 from front.tabs import create_config_modal, cqa_tab, create_about_tab
 from front.tabs import MainTabPanel, ConfigPanel
 from front.tabs.tab_drias import create_drias_tab
 from front.utils import process_figures
 from gradio_modal import Modal
@@ -532,8 +533,8 @@ def main_ui():
         with gr.Tabs():
             cqa_components = cqa_tab(tab_name="ClimateQ&A")
             local_cqa_components = cqa_tab(tab_name="France - Local Q&A")
-            create_drias_tab(share_client=share_client, user_id=user_id)
             create_about_tab()
         event_handling(cqa_components, config_components, tab_name="ClimateQ&A")

 from front.tabs import create_config_modal, cqa_tab, create_about_tab
 from front.tabs import MainTabPanel, ConfigPanel
 from front.tabs.tab_drias import create_drias_tab
+from front.tabs.tab_ipcc import create_ipcc_tab
 from front.utils import process_figures
 from gradio_modal import Modal
         with gr.Tabs():
             cqa_components = cqa_tab(tab_name="ClimateQ&A")
             local_cqa_components = cqa_tab(tab_name="France - Local Q&A")
+            drias_components = create_drias_tab(share_client=share_client, user_id=user_id)
+            ipcc_components = create_ipcc_tab(share_client=share_client, user_id=user_id)
             create_about_tab()
         event_handling(cqa_components, config_components, tab_name="ClimateQ&A")

climateqa/engine/talk_to_data/ipcc/config.py ADDED Viewed

	@@ -0,0 +1,83 @@

+IPCC_DATASET_URL = "hf://datasets/ekimetrics/ipcc-atlas"
+IPCC_TABLES = [
+    "mean_temperature",
+    "total_precipitation",
+]
+IPCC_INDICATOR_COLUMNS_PER_TABLE = {
+    "mean_temperature": "mean_temperature",
+    "total_precipitation": "total_precipitation"
+}
+IPCC_INDICATOR_TO_UNIT = {
+    "mean_temperature": "°C",
+    "total_precipitation": "mm/day"
+}
+IPCC_SCENARIO = [
+    "historical",
+    "ssp126",
+    "ssp245",
+    "ssp370",
+    "ssp585",
+]
+IPCC_MODELS = []
+IPCC_PLOT_PARAMETERS = [
+    'year',
+    'location'
+]
+MACRO_COUNTRIES = ['JP',
+ 'IN',
+ 'MH',
+ 'PT',
+ 'ID',
+ 'SJ',
+ 'MX',
+ 'CN',
+ 'GL',
+ 'PN',
+ 'AR',
+ 'AQ',
+ 'PF',
+ 'BR',
+ 'SH',
+ 'GS',
+ 'ZA',
+ 'NZ',
+ 'TF',
+ 'CL',
+ 'CA',
+ 'AU',
+ 'US',
+ 'RU'
+]
+IPCC_UI_TEXT = """
+Hi, I'm **Talk to IPCC**, designed to answer your questions using [**IPCC - ATLAS**](https://interactive-atlas.ipcc.ch/regional-information#eyJ0eXBlIjoiQVRMQVMiLCJjb21tb25zIjp7ImxhdCI6OTc3MiwibG5nIjo0MDA2OTIsInpvb20iOjQsInByb2oiOiJFUFNHOjU0MDMwIiwibW9kZSI6ImNvbXBsZXRlX2F0bGFzIn0sInByaW1hcnkiOnsic2NlbmFyaW8iOiJzc3A1ODUiLCJwZXJpb2QiOiIyIiwic2Vhc29uIjoieWVhciIsImRhdGFzZXQiOiJDTUlQNiIsInZhcmlhYmxlIjoidGFzIiwidmFsdWVUeXBlIjoiQU5PTUFMWSIsImhhdGNoaW5nIjoiU0lNUExFIiwicmVnaW9uU2V0IjoiYXI2IiwiYmFzZWxpbmUiOiJwcmVJbmR1c3RyaWFsIiwicmVnaW9uc1NlbGVjdGVkIjpbXX0sInBsb3QiOnsiYWN0aXZlVGFiIjoicGx1bWUiLCJtYXNrIjoibm9uZSIsInNjYXR0ZXJZTWFnIjpudWxsLCJzY2F0dGVyWVZhciI6bnVsbCwic2hvd2luZyI6ZmFsc2V9fQ==) data.
+I'll answer by displaying a list of SQL queries, graphs and data most relevant to your question.
+You can ask me anything about these climate indicators: **temperature** or **precipitation**.
+You can specify **location** and/or **year**.
+By default, we take the **mediane of each climate model**.
+For example, you can ask:
+- What will the temperature be like in Paris?
+- What will be the total rainfall in the USA in 2030?
+- How will the average temperature evolve in China ?
+**Example of indicators in the data**:
+- Mean temperature
+- Total precipitation
+⚠️ **Limitations**:
+- You can't ask anything that isn't related to *IPCC - ATLAS** data.
+- You can not ask about **several locations at the same time**.
+- If you specify a year **before 1850 or over 2100**, there will be **no data**.
+- You **cannot compare two models**.
+🛈 **Information**
+Please note that we **log your questions for meta-analysis purposes**, so avoid sharing any sensitive or personal information.
+"""

climateqa/engine/talk_to_data/ipcc/plots.py ADDED Viewed

	@@ -0,0 +1,193 @@

+from typing import Callable
+from plotly.graph_objects import Figure
+import plotly.graph_objects as go
+import pandas as pd
+import geojson
+from climateqa.engine.talk_to_data.ipcc.config import IPCC_INDICATOR_TO_UNIT, IPCC_SCENARIO
+from climateqa.engine.talk_to_data.ipcc.queries import indicator_for_given_year_query, indicator_per_year_at_location_query
+from climateqa.engine.talk_to_data.objects.plot import Plot
+def plot_indicator_evolution_at_location_historical_and_projections(
+    params: dict,
+) -> Callable[[pd.DataFrame], Figure]:
+    """
+    Returns a function that generates a line plot showing the evolution of a climate indicator
+    (e.g., temperature, rainfall) over time at a specific location, including both historical data
+    and future projections for different climate scenarios.
+    Args:
+        params (dict): Dictionary with:
+            - indicator_column (str): Name of the climate indicator column to plot.
+            - location (str): Location (e.g., country, city) for which to plot the indicator.
+    Returns:
+        Callable[[pd.DataFrame], Figure]: Function that takes a DataFrame and returns a Plotly Figure
+        showing the indicator's evolution over time, with scenario lines and historical data.
+    """
+    indicator = params["indicator_column"]
+    location = params["location"]
+    indicator_label = " ".join(word.capitalize() for word in indicator.split("_"))
+    unit = IPCC_INDICATOR_TO_UNIT.get(indicator, "")
+    def plot_data(df: pd.DataFrame) -> Figure:
+        df = df.sort_values(by='year')
+        years = df['year'].astype(int).tolist()
+        indicators = df[indicator].astype(float).tolist()
+        scenarios = df['scenario'].astype(str).tolist()
+        # Find last historical value for continuity
+        last_historical = [(y, v) for y, v, s in zip(years, indicators, scenarios) if s == 'historical']
+        last_historical_year, last_historical_indicator = last_historical[-1] if last_historical else (None, None)
+        fig = go.Figure()
+        for scenario in IPCC_SCENARIO:
+            x = [y for y, s in zip(years, scenarios) if s == scenario]
+            y = [v for v, s in zip(indicators, scenarios) if s == scenario]
+            # Connect historical to scenario
+            if scenario != 'historical' and last_historical_indicator is not None:
+                x = [last_historical_year] + x
+                y = [last_historical_indicator] + y
+            fig.add_trace(go.Scatter(
+                x=x,
+                y=y,
+                mode='lines',
+                name=scenario
+            ))
+        fig.update_layout(
+            title=f'Yearly Evolution of {indicator_label} in {location} (Historical + SSP Scenarios)',
+            xaxis_title='Year',
+            yaxis_title=f'{indicator_label} ({unit})',
+            legend_title='Scenario',
+            height=800,
+        )
+        return fig
+    return plot_data
+indicator_evolution_at_location_historical_and_projections: Plot = {
+    "name": "Indicator Evolution at Location (Historical + Projections)",
+    "description": (
+        "Shows how a climate indicator (e.g., rainfall, temperature) changes over time at a specific location, "
+        "including historical data and future projections. "
+        "Useful for questions about the value or trend of an indicator at a location for any year, "
+        "such as 'What will be the total rainfall in China in 2050?' or 'How does rainfall evolve in China over time?'. "
+        "Parameters: indicator_column (the climate variable), location (e.g., country, city)."
+    ),
+    "params": ["indicator_column", "location"],
+    "plot_function": plot_indicator_evolution_at_location_historical_and_projections,
+    "sql_query": indicator_per_year_at_location_query,
+    "short_name": "Indicator Evolution"
+}
+def plot_choropleth_map_of_country_indicator_for_specific_year(
+    params: dict,
+) -> Callable[[pd.DataFrame], Figure]:
+    """
+    Returns a function that generates a choropleth map (heatmap) showing the spatial distribution
+    of a climate indicator (e.g., temperature, rainfall) across all regions of a country for a specific year.
+    Args:
+        params (dict): Dictionary with:
+            - indicator_column (str): Name of the climate indicator column to plot.
+            - year (str or int, optional): Year for which to plot the indicator (default: 2050).
+            - country_name (str): Name of the country.
+            - location (str): Location (country or region) for the map.
+    Returns:
+        Callable[[pd.DataFrame], Figure]: Function that takes a DataFrame and returns a Plotly Figure
+        showing the indicator's spatial distribution as a choropleth map for the specified year.
+    """
+    indicator = params["indicator_column"]
+    year = params.get('year', 2050)
+    country_name = params['country_name']
+    location = params['location']
+    indicator_label = " ".join(word.capitalize() for word in indicator.split("_"))
+    unit = IPCC_INDICATOR_TO_UNIT.get(indicator, "")
+    def plot_data(df: pd.DataFrame) -> Figure:
+        custom_colorscale = [
+            [0.0, "rgb(5, 48, 97)"],
+            [0.10, "rgb(33, 102, 172)"],
+            [0.20, "rgb(67, 147, 195)"],
+            [0.30, "rgb(146, 197, 222)"],
+            [0.40, "rgb(209, 229, 240)"],
+            [0.50, "rgb(247, 247, 247)"],
+            [0.60, "rgb(253, 219, 199)"],
+            [0.75, "rgb(244, 165, 130)"],
+            [0.85, "rgb(214, 96, 77)"],
+            [0.90, "rgb(178, 24, 43)"],
+            [1.0, "rgb(103, 0, 31)"]
+        ]
+        indicators = df[indicator].astype(float).tolist()
+        latitudes = df["latitude"].astype(float).tolist()
+        longitudes = df["longitude"].astype(float).tolist()
+        features = [
+            geojson.Feature(
+                geometry=geojson.Polygon([[
+                    [lon - 0.5, lat - 0.5],
+                    [lon + 0.5, lat - 0.5],
+                    [lon + 0.5, lat + 0.5],
+                    [lon - 0.5, lat + 0.5],
+                    [lon - 0.5, lat - 0.5]
+                ]]),
+                properties={"value": val},
+                id=str(idx)
+            )
+            for idx, (lat, lon, val) in enumerate(zip(latitudes, longitudes, indicators))
+        ]
+        geojson_data = geojson.FeatureCollection(features)
+        fig = go.Figure(go.Choroplethmapbox(
+            geojson=geojson_data,
+            locations=[str(i) for i in range(len(indicators))],
+            featureidkey="id",
+            z=indicators,
+            colorscale=custom_colorscale,
+            zmin=min(indicators),
+            zmax=max(indicators),
+            marker_opacity=0.7,
+            marker_line_width=0,
+            colorbar_title=f"{indicator_label} ({unit})",
+            text=[f"{indicator_label}: {value:.2f} {unit}" for value in indicators],  # Add hover text showing the indicator value
+            hoverinfo="text"
+        ))
+        fig.update_layout(
+            mapbox_style="open-street-map",
+            mapbox_zoom=3,
+            height=800,
+            mapbox_center={
+                "lat": latitudes[len(latitudes)//2] if latitudes else 0,
+                "lon": longitudes[len(longitudes)//2] if longitudes else 0
+            },
+            coloraxis_colorbar=dict(title=f"{indicator_label} ({unit})"),
+            title=f"{indicator_label} in {year} in {location} ({country_name})"
+        )
+        return fig
+    return plot_data
+choropleth_map_of_country_indicator_for_specific_year: Plot = {
+    "name": "Choropleth Map of a Country's Indicator Distribution for a Specific Year",
+    "description": (
+        "Displays a map showing the spatial distribution of a climate indicator (e.g., rainfall, temperature) "
+        "across all regions of a country for a specific year. "
+        "Can answer questions about the value of an indicator in a country or region for a given year, "
+        "such as 'What will be the total rainfall in China in 2050?' or 'How is rainfall distributed across China in 2050?'. "
+        "Parameters: indicator_column (the climate variable), year, location (country name)."
+    ),
+    "params": ["indicator_column", "year", "location"],
+    "plot_function": plot_choropleth_map_of_country_indicator_for_specific_year,
+    "sql_query": indicator_for_given_year_query,
+    "short_name": "Choropleth Map"
+}
+IPCC_PLOTS = [
+    indicator_evolution_at_location_historical_and_projections,
+    choropleth_map_of_country_indicator_for_specific_year
+]

climateqa/engine/talk_to_data/ipcc/queries.py ADDED Viewed

	@@ -0,0 +1,116 @@

+from typing import TypedDict, Optional
+from climateqa.engine.talk_to_data.ipcc.config import IPCC_DATASET_URL, MACRO_COUNTRIES
+class IndicatorPerYearAtLocationQueryParams(TypedDict, total=False):
+    """
+    Parameters for querying the evolution of an indicator per year at a specific location.
+    Attributes:
+        indicator_column (str): Name of the climate indicator column.
+        latitude (str): Latitude of the location.
+        longitude (str): Longitude of the location.
+        country_code (str): Country code.
+        admin1 (str): Administrative region (optional).
+    """
+    indicator_column: str
+    latitude: str
+    longitude: str
+    country_code: str
+    admin1: Optional[str]
+def indicator_per_year_at_location_query(
+    table: str, params: IndicatorPerYearAtLocationQueryParams
+) -> str:
+    """
+    Builds an SQL query to get the evolution of an indicator per year at a specific location.
+    Args:
+        table (str): SQL table of the indicator.
+        params (IndicatorPerYearAtLocationQueryParams): Dictionary with the required params for the query.
+    Returns:
+        str: The SQL query string, or an empty string if required parameters are missing.
+    """
+    indicator_column = params.get("indicator_column")
+    latitude = params.get("latitude")
+    longitude = params.get("longitude")
+    country_code = params.get("country_code")
+    admin1 = params.get("admin1")
+    if not all([indicator_column, latitude, longitude, country_code]):
+        return ""
+    if country_code in MACRO_COUNTRIES:
+        table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}_macro.parquet'"
+        sql_query = f"""
+        SELECT year, scenario, {indicator_column}
+        FROM {table_path}
+        WHERE admin1 = '{admin1}' AND year >= 1950
+        ORDER BY year, scenario
+        """
+    else:
+        table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}.parquet'"
+        sql_query = f"""
+        SELECT year, scenario, AVG({indicator_column}) AS {indicator_column}
+        FROM {table_path}
+        WHERE latitude = {latitude} AND longitude = {longitude} AND year >= 1950
+        GROUP BY scenario, year
+        ORDER BY year, scenario
+        """
+    return sql_query.strip()
+class IndicatorForGivenYearQueryParams(TypedDict, total=False):
+    """
+    Parameters for querying an indicator's values across locations for a specific year.
+    Attributes:
+        indicator_column (str): The column name for the climate indicator.
+        year (str): The year to query.
+        country_code (str): The country code.
+    """
+    indicator_column: str
+    year: str
+    country_code: str
+def indicator_for_given_year_query(
+    table: str, params: IndicatorForGivenYearQueryParams
+) -> str:
+    """
+    Builds an SQL query to get the values of an indicator with their latitudes, longitudes,
+    and scenarios for a given year.
+    Args:
+        table (str): SQL table of the indicator.
+        params (IndicatorForGivenYearQueryParams): Dictionary with the required params for the query.
+    Returns:
+        str: The SQL query string, or an empty string if required parameters are missing.
+    """
+    indicator_column = params.get("indicator_column")
+    year = params.get("year") or 2050
+    country_code = params.get("country_code")
+    if not all([indicator_column, year, country_code]):
+        return ""
+    if country_code in MACRO_COUNTRIES:
+        table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}_macro.parquet'"
+        sql_query = f"""
+        SELECT {indicator_column}, c.latitude, c.longitude, c.admin1, scenario
+        FROM {table_path} AS t
+        RIGHT JOIN '{IPCC_DATASET_URL}/coordinates.parquet' AS c
+        ON c.admin1 = t.admin1 AND c.country_code = t.country_code
+        WHERE year = {year}
+        ORDER BY latitude, longitude, scenario
+        """
+    else:
+        table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}.parquet'"
+        sql_query = f"""
+        SELECT AVG({indicator_column}) AS {indicator_column}, latitude, longitude, scenario
+        FROM {table_path}
+        WHERE year = {year}
+        GROUP BY latitude, longitude, scenario
+        ORDER BY latitude, longitude, scenario
+        """
+    return sql_query.strip()

climateqa/engine/talk_to_data/main.py CHANGED Viewed

@@ -1,10 +1,13 @@
 from climateqa.engine.talk_to_data.workflow.drias import drias_workflow
 from climateqa.engine.llm import get_llm
 from climateqa.logging import log_drias_interaction_to_huggingface
 from climateqa.logging import log_drias_interaction_to_huggingface
 import ast
-async def ask_drias(query: str, index_state: int = 0, user_id: str = None) -> tuple:
     """Main function to process a DRIAS query and return results.
     This function orchestrates the DRIAS workflow, processing a user query to generate
@@ -31,23 +34,80 @@ async def ask_drias(query: str, index_state: int = 0, user_id: str = None) -> tu
     sql_queries = []
     result_dataframes = []
     figures = []
-    table_list = []
-    for plot_state in final_state['plot_states'].values():
-        for table_state in plot_state['table_states'].values():
-            if table_state['status'] == 'OK':
-                if 'table_name' in table_state:
-                    table_list.append(' '.join(table_state['table_name'].capitalize().split('_')))
-                if 'sql_query' in table_state and table_state['sql_query'] is not None:
-                    sql_queries.append(table_state['sql_query'])
-                if 'dataframe' in table_state and table_state['dataframe'] is not None:
-                    result_dataframes.append(table_state['dataframe'])
-                    if 'figure' in table_state and table_state['figure'] is not None:
-                        figures.append(table_state['figure'])
     if "error" in final_state and final_state["error"] != "":
-        return None, None, None, [], [], [], 0, final_state["error"]
     sql_query = sql_queries[index_state]
     dataframe = result_dataframes[index_state]
@@ -55,4 +115,4 @@ async def ask_drias(query: str, index_state: int = 0, user_id: str = None) -> tu
     log_drias_interaction_to_huggingface(query, sql_query, user_id)
-    return sql_query, dataframe, figure, sql_queries, result_dataframes, figures, index_state, table_list, ""

+from operator import index
+from duckdb import sql
 from climateqa.engine.talk_to_data.workflow.drias import drias_workflow
 from climateqa.engine.llm import get_llm
+from climateqa.engine.talk_to_data.workflow.ipcc import ipcc_workflow
 from climateqa.logging import log_drias_interaction_to_huggingface
 from climateqa.logging import log_drias_interaction_to_huggingface
 import ast
+async def ask_drias(query: str, index_state: int = 0, user_id: str | None = None) -> tuple:
     """Main function to process a DRIAS query and return results.
     This function orchestrates the DRIAS workflow, processing a user query to generate
     sql_queries = []
     result_dataframes = []
     figures = []
+    plot_title_list = []
+    for output_title, output in final_state['outputs'].items():
+        if output['status'] == 'OK':
+            if output['table'] is not None:
+                plot_title_list.append(output_title)
+            if output['sql_query'] is not None:
+                sql_queries.append(output['sql_query'])
+            if output['dataframe'] is not None:
+                result_dataframes.append(output['dataframe'])
+                if output['figure'] is not None:
+                    figures.append(output['figure'])
+    if "error" in final_state and final_state["error"] != "":
+        # No Sql query, no dataframe, no figure, empty sql queries list, empty result dataframes list, empty figures list, index state = 0, empty table list, error message
+        return None, None, None, [], [], [], 0, [], final_state["error"]
+    sql_query = sql_queries[index_state]
+    dataframe = result_dataframes[index_state]
+    figure = figures[index_state](dataframe)
+    log_drias_interaction_to_huggingface(query, sql_query, user_id)
+    return sql_query, dataframe, figure, sql_queries, result_dataframes, figures, index_state, plot_title_list, ""
+async def ask_ipcc(query: str, index_state: int = 0, user_id: str | None = None) -> tuple:
+    """Main function to process a DRIAS query and return results.
+    This function orchestrates the DRIAS workflow, processing a user query to generate
+    SQL queries, dataframes, and visualizations. It handles multiple results and allows
+    pagination through them.
+    Args:
+        query (str): The user's question about climate data
+        index_state (int, optional): The index of the result to return. Defaults to 0.
+    Returns:
+        tuple: A tuple containing:
+            - sql_query (str): The SQL query used
+            - dataframe (pd.DataFrame): The resulting data
+            - figure (Callable): Function to generate the visualization
+            - sql_queries (list): All generated SQL queries
+            - result_dataframes (list): All resulting dataframes
+            - figures (list): All figure generation functions
+            - index_state (int): Current result index
+            - table_list (list): List of table names used
+            - error (str): Error message if any
+    """
+    final_state = await ipcc_workflow(query)
+    sql_queries = []
+    result_dataframes = []
+    figures = []
+    plot_title_list = []
+    for output_title, output in final_state['outputs'].items():
+        if output['status'] == 'OK':
+            if output['table'] is not None:
+                plot_title_list.append(output_title)
+            if output['sql_query'] is not None:
+                sql_queries.append(output['sql_query'])
+            if output['dataframe'] is not None:
+                result_dataframes.append(output['dataframe'])
+                if output['figure'] is not None:
+                    figures.append(output['figure'])
     if "error" in final_state and final_state["error"] != "":
+        # No Sql query, no dataframe, no figure, empty sql queries list, empty result dataframes list, empty figures list, index state = 0, empty table list, error message
+        return None, None, None, [], [], [], 0, [], final_state["error"]
     sql_query = sql_queries[index_state]
     dataframe = result_dataframes[index_state]
     log_drias_interaction_to_huggingface(query, sql_query, user_id)
+    return sql_query, dataframe, figure, sql_queries, result_dataframes, figures, index_state, plot_title_list, ""

climateqa/engine/talk_to_data/objects/states.py CHANGED Viewed

@@ -1,46 +1,18 @@
 from typing import Any, Callable, Optional, TypedDict
 from plotly.graph_objects import Figure
 import pandas as pd
-class TableState(TypedDict):
-    """Represents the state of a table in the DRIAS workflow.
-    This class defines the structure for tracking the state of a table during the
-    data processing workflow, including its name, parameters, SQL query, and results.
-    Attributes:
-        table_name (str): The name of the table in the database
-        params (dict[str, Any]): Parameters used for querying the table
-        sql_query (str, optional): The SQL query used to fetch data
-        dataframe (pd.DataFrame | None, optional): The resulting data
-        figure (Callable[..., Figure], optional): Function to generate visualization
-        status (str): The current status of the table processing ('OK' or 'ERROR')
-    """
-    table_name: str
-    params: dict[str, Any]
     sql_query: Optional[str]
-    dataframe: Optional[pd.DataFrame | None]
     figure: Optional[Callable[..., Figure]]
-    status: str
-class PlotState(TypedDict):
-    """Represents the state of a plot in the DRIAS workflow.
-    This class defines the structure for tracking the state of a plot during the
-    data processing workflow, including its name and associated tables.
-    Attributes:
-        plot_name (str): The name of the plot
-        tables (list[str]): List of tables used in the plot
-        table_states (dict[str, TableState]): States of the tables used in the plot
-    """
-    plot_name: str
-    tables: list[str]
-    table_states: dict[str, TableState]
 class State(TypedDict):
     user_input: str
     plots: list[str]
-    plot_states: dict[str, PlotState]
     error: Optional[str]

 from typing import Any, Callable, Optional, TypedDict
 from plotly.graph_objects import Figure
 import pandas as pd
+from climateqa.engine.talk_to_data.objects.plot import Plot
+class TTDOutput(TypedDict):
+    status: str
+    plot: Plot
+    table: str
     sql_query: Optional[str]
+    dataframe: Optional[pd.DataFrame]
     figure: Optional[Callable[..., Figure]]
 class State(TypedDict):
     user_input: str
     plots: list[str]
+    outputs: dict[str, TTDOutput]
     error: Optional[str]

climateqa/engine/talk_to_data/workflow/ipcc.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import os
+from typing import Any
+import asyncio
+from climateqa.engine.llm import get_llm
+from climateqa.engine.talk_to_data.input_processing import find_param, find_relevant_plots, find_relevant_tables_per_plot
+from climateqa.engine.talk_to_data.query import execute_sql_query, find_indicator_column
+from climateqa.engine.talk_to_data.objects.plot import Plot
+from climateqa.engine.talk_to_data.objects.states import State, TTDOutput
+from climateqa.engine.talk_to_data.ipcc.config import IPCC_TABLES, IPCC_INDICATOR_COLUMNS_PER_TABLE, IPCC_PLOT_PARAMETERS
+from climateqa.engine.talk_to_data.ipcc.plots import IPCC_PLOTS
+ROOT_PATH = os.path.dirname(os.path.dirname(os.getcwd()))
+async def process_output(
+    output_title: str,
+    table: str,
+    plot: Plot,
+    params: dict[str, Any]
+) -> tuple[str, TTDOutput, dict[str, bool]]:
+    """
+    Process a table for a given plot and parameters: builds the SQL query, executes it,
+    and generates the corresponding figure.
+    Args:
+        output_title (str): Title for the output (used as key in outputs dict).
+        table (str): The name of the table to process.
+        plot (Plot): The plot object containing SQL query and visualization function.
+        params (dict[str, Any]): Parameters used for querying the table.
+    Returns:
+        tuple: (output_title, results dict, errors dict)
+    """
+    results: TTDOutput = {
+        'status': 'OK',
+        'plot': plot,
+        'table': table,
+        'sql_query': None,
+        'dataframe': None,
+        'figure': None
+    }
+    errors = {
+        'have_sql_query': False,
+        'have_dataframe': False
+    }
+    # Find the indicator column for this table
+    indicator_column = find_indicator_column(table, IPCC_INDICATOR_COLUMNS_PER_TABLE)
+    if indicator_column:
+        params['indicator_column'] = indicator_column
+    # Build the SQL query
+    sql_query = plot['sql_query'](table, params)
+    if not sql_query:
+        results['status'] = 'ERROR'
+        return output_title, results, errors
+    results['sql_query'] = sql_query
+    errors['have_sql_query'] = True
+    # Execute the SQL query
+    df = await execute_sql_query(sql_query)
+    if df is not None and not df.empty:
+        results['dataframe'] = df
+        errors['have_dataframe'] = True
+    else:
+        results['status'] = 'NO_DATA'
+    # Generate the figure (always, even if df is empty, for consistency)
+    results['figure'] = plot['plot_function'](params)
+    return output_title, results, errors
+async def ipcc_workflow(user_input: str) -> State:
+    """
+    Performs the complete workflow of Talk To IPCC: from user input to SQL queries, dataframes, and figures.
+    Args:
+        user_input (str): The user's question.
+    Returns:
+        State: Final state with all the results and error messages if any.
+    """
+    state: State = {
+        'user_input': user_input,
+        'plots': [],
+        'outputs': {},
+        'error': ''
+    }
+    llm = get_llm(provider="openai")
+    plots = await find_relevant_plots(state, llm, IPCC_PLOTS)
+    state['plots'] = plots
+    if not plots:
+        state['error'] = 'There is no plot to answer to the question'
+        return state
+    errors = {
+        'have_relevant_table': False,
+        'have_sql_query': False,
+        'have_dataframe': False
+    }
+    outputs = {}
+    # Find relevant tables for each plot and prepare outputs
+    for plot_name in plots:
+        plot = next((p for p in IPCC_PLOTS if p['name'] == plot_name), None)
+        if plot is None:
+            continue
+        relevant_tables = await find_relevant_tables_per_plot(state, plot, llm, IPCC_TABLES)
+        if relevant_tables:
+            errors['have_relevant_table'] = True
+        for table in relevant_tables:
+            output_title = f"{plot['short_name']} - {' '.join(table.capitalize().split('_'))}"
+            outputs[output_title] = {
+                'table': table,
+                'plot': plot,
+                'status': 'OK'
+            }
+    # Gather all required parameters
+    params = {}
+    for param_name in IPCC_PLOT_PARAMETERS:
+        param = await find_param(state, param_name, mode='IPCC')
+        if param:
+            params.update(param)
+    # Process all outputs in parallel using process_output
+    tasks = [
+        process_output(output_title, output['table'], output['plot'], params.copy())
+        for output_title, output in outputs.items()
+    ]
+    results = await asyncio.gather(*tasks)
+    # Update outputs with results and error flags
+    for output_title, task_results, task_errors in results:
+        outputs[output_title]['sql_query'] = task_results['sql_query']
+        outputs[output_title]['dataframe'] = task_results['dataframe']
+        outputs[output_title]['figure'] = task_results['figure']
+        outputs[output_title]['status'] = task_results['status']
+        errors['have_sql_query'] |= task_errors['have_sql_query']
+        errors['have_dataframe'] |= task_errors['have_dataframe']
+    state['outputs'] = outputs
+    # Set error messages if needed
+    if not errors['have_relevant_table']:
+        state['error'] = "There is no relevant table in our database to answer your question"
+    elif not errors['have_sql_query']:
+        state['error'] = "There is no relevant sql query on our database that can help to answer your question"
+    elif not errors['have_dataframe']:
+        state['error'] = "There is no data in our table that can answer to your question"
+    return state

front/tabs/tab_ipcc.py ADDED Viewed

	@@ -0,0 +1,289 @@

+from operator import index
+from random import choices
+import gradio as gr
+from typing import TypedDict, List, Optional
+import pandas as pd
+import os
+from climateqa.engine.talk_to_data.main import ask_ipcc
+from climateqa.engine.talk_to_data.ipcc.config import IPCC_MODELS, IPCC_UI_TEXT
+class ipccUIElements(TypedDict):
+    tab: gr.Tab
+    details_accordion: gr.Accordion
+    examples_hidden: gr.Textbox
+    examples: gr.Examples
+    image_examples: gr.Row
+    ipcc_direct_question: gr.Textbox
+    result_text: gr.Textbox
+    table_names_display: gr.Radio
+    query_accordion: gr.Accordion
+    ipcc_sql_query: gr.Textbox
+    chart_accordion: gr.Accordion
+    scenario_selection: gr.Dropdown
+    ipcc_display: gr.Plot
+    table_accordion: gr.Accordion
+    ipcc_table: gr.DataFrame
+async def ask_ipcc_query(query: str, index_state: int, user_id: str):
+    result = await ask_ipcc(query, index_state, user_id)
+    return result
+def show_results(sql_queries_state, dataframes_state, plots_state, table_names):
+    if not sql_queries_state or not dataframes_state or not plots_state:
+        # If all results are empty, show "No result"
+        return (
+            gr.update(visible=True),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+        )
+    else:
+        # Show the appropriate components with their data
+        return (
+            gr.update(visible=False),
+            gr.update(visible=True),
+            gr.update(visible=True),
+            gr.update(visible=True),
+            gr.update(choices=table_names, value=table_names[0], visible=True),
+        )
+def show_filter_by_scenario(table_names, index_state, dataframes):
+    if table_names[index_state].startswith("Choropleth Map"):
+        df = dataframes[index_state]
+        return gr.update(visible=True, choices=sorted(df["scenario"].unique()), value=df["scenario"].unique()[0])
+    else:
+        return gr.update(visible=False)
+def filter_by_scenario(dataframes, figures, index_state, scenario):
+    df = dataframes[index_state]
+    if df.empty:
+        return df, None
+    if "scenario" not in df.columns:
+        return df, figures[index_state](df)
+    else:
+        df = df[df["scenario"] == scenario]
+        if df.empty:
+            return df, None
+    figure = figures[index_state](df)
+    return df, figure
+def display_table_names(table_names, index_state):
+    return [
+        [name]
+        for name in table_names
+    ]
+def on_table_click(selected_label, table_names, sql_queries, dataframes, plots):
+    index = table_names.index(selected_label)
+    figure = plots[index](dataframes[index])
+    return (
+        sql_queries[index],
+        dataframes[index],
+        figure,
+        index,
+    )
+def create_ipcc_ui() -> ipccUIElements:
+    """Create and return all UI elements for the ipcc tab."""
+    with gr.Tab("(Beta) Talk to IPCC", elem_id="tab-vanna", id=7) as tab:
+        with gr.Accordion(label="❓ How to use?", elem_id="details") as details_accordion:
+            gr.Markdown(IPCC_UI_TEXT)
+        # Add examples for common questions
+        examples_hidden = gr.Textbox(visible=False, elem_id="ipcc-examples-hidden")
+        examples = gr.Examples(
+            examples=[
+                ["What will the temperature be like in Paris?"],
+                ["What will be the total rainfall in the USA in 2030?"],
+                ["How will the average temperature evolve in China?"],
+                ["What will be the average total precipitation in London ?"]
+            ],
+            label="Example Questions",
+            inputs=[examples_hidden],
+            outputs=[examples_hidden],
+        )
+        with gr.Row():
+            ipcc_direct_question = gr.Textbox(
+                label="Direct Question",
+                placeholder="You can write direct question here",
+                elem_id="direct-question",
+                interactive=True,
+            )
+        with gr.Row(visible=True, elem_id="example-img-container") as image_examples:
+            gr.Markdown("### Examples of possible visualizations")
+            with gr.Row():
+                gr.Image("./front/assets/talk_to_ipcc_france_example.png", label="Total Precipitation in 2030 in France", elem_classes=["example-img"])
+                gr.Image("./front/assets/talk_to_ipcc_new_york_example.png", label="Yearly Evolution of Mean Temperature in New York (Historical + SSP Scenarios)", elem_classes=["example-img"])
+                gr.Image("./front/assets/talk_to_ipcc_china_example.png", label="Mean Temperature in 2050 in China", elem_classes=["example-img"])
+        result_text = gr.Textbox(
+            label="", elem_id="no-result-label", interactive=False, visible=True
+        )
+        with gr.Row():
+            table_names_display = gr.Radio(
+                choices=[],
+                label="Relevant figures created",
+                interactive=True,
+                elem_id="table-names",
+                visible=False
+            )
+            with gr.Accordion(label="SQL Query Used", visible=False) as query_accordion:
+                ipcc_sql_query = gr.Textbox(
+                    label="", elem_id="sql-query", interactive=False
+                )
+        with gr.Accordion(label="Chart", visible=False) as chart_accordion:
+            scenario_selection = gr.Dropdown(
+                label="Scenario", choices=IPCC_MODELS, value="ALL", interactive=True, visible=False
+            )
+            ipcc_display = gr.Plot(elem_id="vanna-plot")
+        with gr.Accordion(
+            label="Data used", open=False, visible=False
+        ) as table_accordion:
+            ipcc_table = gr.DataFrame([], elem_id="vanna-table")
+        return ipccUIElements(
+            tab=tab,
+            details_accordion=details_accordion,
+            examples_hidden=examples_hidden,
+            examples=examples,
+            image_examples=image_examples,
+            ipcc_direct_question=ipcc_direct_question,
+            result_text=result_text,
+            table_names_display=table_names_display,
+            query_accordion=query_accordion,
+            ipcc_sql_query=ipcc_sql_query,
+            chart_accordion=chart_accordion,
+            scenario_selection=scenario_selection,
+            ipcc_display=ipcc_display,
+            table_accordion=table_accordion,
+            ipcc_table=ipcc_table,
+        )
+def setup_ipcc_events(ui_elements: ipccUIElements, share_client=None, user_id=None) -> None:
+    """Set up all event handlers for the ipcc tab."""
+    # Create state variables
+    sql_queries_state = gr.State([])
+    dataframes_state = gr.State([])
+    plots_state = gr.State([])
+    index_state = gr.State(0)
+    table_names_list = gr.State([])
+    user_id = gr.State(user_id)
+    # Handle example selection
+    ui_elements["examples_hidden"].change(
+        lambda x: (gr.Accordion(open=False), gr.Textbox(value=x)),
+        inputs=[ui_elements["examples_hidden"]],
+        outputs=[ui_elements["details_accordion"], ui_elements["ipcc_direct_question"]]
+    ).then(
+        lambda : gr.update(visible=False),
+        inputs=None,
+        outputs=ui_elements["image_examples"]
+    ).then(
+        ask_ipcc_query,
+        inputs=[ui_elements["examples_hidden"], index_state, user_id],
+        outputs=[
+            ui_elements["ipcc_sql_query"],
+            ui_elements["ipcc_table"],
+            ui_elements["ipcc_display"],
+            sql_queries_state,
+            dataframes_state,
+            plots_state,
+            index_state,
+            table_names_list,
+            ui_elements["result_text"],
+        ],
+    ).then(
+        show_results,
+        inputs=[sql_queries_state, dataframes_state, plots_state, table_names_list],
+        outputs=[
+            ui_elements["result_text"],
+            ui_elements["query_accordion"],
+            ui_elements["table_accordion"],
+            ui_elements["chart_accordion"],
+            ui_elements["table_names_display"],
+        ],
+    ).then(
+        show_filter_by_scenario,
+        inputs=[table_names_list, index_state, dataframes_state],
+        outputs=[ui_elements["scenario_selection"]],
+    )
+    # Handle direct question submission
+    ui_elements["ipcc_direct_question"].submit(
+        lambda: gr.Accordion(open=False),
+        inputs=None,
+        outputs=[ui_elements["details_accordion"]]
+    ).then(
+        lambda: gr.update(visible=False),
+        inputs=None,
+        outputs=ui_elements["image_examples"]
+    ).then(
+        ask_ipcc_query,
+        inputs=[ui_elements["ipcc_direct_question"], index_state, user_id],
+        outputs=[
+            ui_elements["ipcc_sql_query"],
+            ui_elements["ipcc_table"],
+            ui_elements["ipcc_display"],
+            sql_queries_state,
+            dataframes_state,
+            plots_state,
+            index_state,
+            table_names_list,
+            ui_elements["result_text"],
+        ],
+    ).then(
+        show_results,
+        inputs=[sql_queries_state, dataframes_state, plots_state, table_names_list],
+        outputs=[
+            ui_elements["result_text"],
+            ui_elements["query_accordion"],
+            ui_elements["table_accordion"],
+            ui_elements["chart_accordion"],
+            ui_elements["table_names_display"],
+        ],
+    ).then(
+        show_filter_by_scenario,
+        inputs=[table_names_list, index_state, dataframes_state],
+        outputs=[ui_elements["scenario_selection"]],
+    )
+    # Handle model selection change
+    ui_elements["scenario_selection"].change(
+        filter_by_scenario,
+        inputs=[dataframes_state, plots_state, index_state, ui_elements["scenario_selection"]],
+        outputs=[ui_elements["ipcc_table"], ui_elements["ipcc_display"]],
+    )
+    # Handle table selection
+    ui_elements["table_names_display"].change(
+        fn=on_table_click,
+        inputs=[ui_elements["table_names_display"], table_names_list, sql_queries_state, dataframes_state, plots_state],
+        outputs=[ui_elements["ipcc_sql_query"], ui_elements["ipcc_table"], ui_elements["ipcc_display"], index_state],
+    ).then(
+        show_filter_by_scenario,
+        inputs=[table_names_list, index_state, dataframes_state],
+        outputs=[ui_elements["scenario_selection"]],
+    )
+def create_ipcc_tab(share_client=None, user_id=None):
+    """Create the ipcc tab with all its components and event handlers."""
+    ui_elements = create_ipcc_ui()
+    setup_ipcc_events(ui_elements, share_client=share_client, user_id=user_id)