Spaces:

Ekimetrics
/

climate-question-answering

Running

App Files Files Community

armanddemasson commited on Apr 1

Commit

4df74e4

1 Parent(s): 3e75ed8

feat: implemented talk to drias v1

Browse files

Files changed (6) hide show

app.py +69 -16
climateqa/engine/talk_to_data/main.py +42 -30
climateqa/engine/talk_to_data/plot.py +172 -0
climateqa/engine/talk_to_data/sql_query.py +64 -0
climateqa/engine/talk_to_data/utils.py +107 -33
climateqa/engine/talk_to_data/workflow.py +233 -0

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ from climateqa.engine.reranker import get_reranker
 from climateqa.engine.graph import make_graph_agent,make_graph_agent_poc
 from climateqa.engine.chains.retrieve_papers import find_papers
 from climateqa.chat import start_chat, chat_stream, finish_chat
-from climateqa.engine.talk_to_data.main import ask_vanna
 from climateqa.engine.talk_to_data.myVanna import MyVanna
 from front.tabs import (create_config_modal, create_examples_tab, create_papers_tab, create_figures_tab, create_chat_interface, create_about_tab)
@@ -84,8 +84,11 @@ vn = MyVanna(config = {"temperature": 0, "api_key": os.getenv('THEO_API_KEY'), '
 db_vanna_path = os.path.join(os.getcwd(), "data/drias/drias.db")
 vn.connect_to_sqlite(db_vanna_path)
-def ask_vanna_query(query):
-    return ask_vanna(vn, db_vanna_path, query)
 async def chat(query, history, audience, sources, reports, relevant_content_sources_selection, search_only):
     print("chat cqa - message received")
@@ -122,20 +125,70 @@ def update_sources_number_display(sources_textbox, figures_cards, current_graphs
     return gr.update(label=recommended_content_notif_label), gr.update(label=sources_notif_label), gr.update(label=figures_notif_label), gr.update(label=graphs_notif_label), gr.update(label=papers_notif_label)
 def create_drias_tab():
-    with gr.Tab("Beta - Talk to DRIAS", elem_id="tab-vanna", id=6) as tab_vanna:
-        vanna_direct_question = gr.Textbox(label="Direct Question", placeholder="You can write direct question here",elem_id="direct-question", interactive=True)
-        with gr.Accordion("Details",elem_id = 'vanna-details', open=False) as vanna_details :
-            vanna_sql_query = gr.Textbox(label="SQL Query Used", elem_id="sql-query", interactive=False)
-            show_vanna_table = gr.Button("Show Table", elem_id="show-table")
-            with Modal(visible=False) as vanna_table_modal:
-                vanna_table = gr.DataFrame([], elem_id="vanna-table")
-                close_vanna_modal = gr.Button("Close", elem_id="close-vanna-modal")
-                close_vanna_modal.click(lambda: Modal(visible=False),None, [vanna_table_modal])
-            show_vanna_table.click(lambda: Modal(visible=True),None ,[vanna_table_modal])
-        vanna_display = gr.Plot()
-        vanna_direct_question.submit(ask_vanna_query, [vanna_direct_question], [vanna_sql_query ,vanna_table, vanna_display])
 # # UI Layout Components
 def cqa_tab(tab_name):

 from climateqa.engine.graph import make_graph_agent,make_graph_agent_poc
 from climateqa.engine.chains.retrieve_papers import find_papers
 from climateqa.chat import start_chat, chat_stream, finish_chat
+from climateqa.engine.talk_to_data.main import ask_drias
 from climateqa.engine.talk_to_data.myVanna import MyVanna
 from front.tabs import (create_config_modal, create_examples_tab, create_papers_tab, create_figures_tab, create_chat_interface, create_about_tab)
 db_vanna_path = os.path.join(os.getcwd(), "data/drias/drias.db")
 vn.connect_to_sqlite(db_vanna_path)
+# def ask_vanna_query(query):
+#     return ask_vanna(vn, db_vanna_path, query)
+def ask_drias_query(query, index_state):
+    return ask_drias(db_vanna_path, query, index_state)
 async def chat(query, history, audience, sources, reports, relevant_content_sources_selection, search_only):
     print("chat cqa - message received")
     return gr.update(label=recommended_content_notif_label), gr.update(label=sources_notif_label), gr.update(label=figures_notif_label), gr.update(label=graphs_notif_label), gr.update(label=papers_notif_label)
+# def create_drias_tab():
+#     with gr.Tab("Beta - Talk to DRIAS", elem_id="tab-vanna", id=6) as tab_vanna:
+#         vanna_direct_question = gr.Textbox(label="Direct Question", placeholder="You can write direct question here",elem_id="direct-question", interactive=True)
+#         with gr.Accordion("Details",elem_id = 'vanna-details', open=False) as vanna_details :
+#             vanna_sql_query = gr.Textbox(label="SQL Query Used", elem_id="sql-query", interactive=False)
+#             show_vanna_table = gr.Button("Show Table", elem_id="show-table")
+#             with Modal(visible=False) as vanna_table_modal:
+#                 vanna_table = gr.DataFrame([], elem_id="vanna-table")
+#                 close_vanna_modal = gr.Button("Close", elem_id="close-vanna-modal")
+#                 close_vanna_modal.click(lambda: Modal(visible=False),None, [vanna_table_modal])
+#             show_vanna_table.click(lambda: Modal(visible=True),None ,[vanna_table_modal])
+#         vanna_display = gr.Plot()
+#         vanna_direct_question.submit(ask_drias_query, [vanna_direct_question], [vanna_sql_query ,vanna_table, vanna_display])
 def create_drias_tab():
+    with gr.Tab("Beta - Talk to DRIAS", elem_id="tab-vanna", id=6):
+        drias_direct_question = gr.Textbox(label="Direct Question", placeholder="You can write direct question here", elem_id="direct-question", interactive=True)
+        with gr.Accordion("Details", elem_id="vanna-details", open=False) as drias_details:
+            drias_sql_query = gr.Textbox(label="SQL Query Used", elem_id="sql-query", interactive=False)
+            drias_table = gr.DataFrame([], elem_id="vanna-table")
+            drias_display = gr.Plot()
+            # Navigation buttons
+            prev_button = gr.Button("Previous")
+            next_button = gr.Button("Next")
+            # Initialisation des données
+            sql_queries_state = gr.State([])
+            dataframes_state = gr.State([])
+            plots_state = gr.State([])
+            index_state = gr.State(0)  # To track the current position
+        # Action sur la soumission du texte
+        drias_direct_question.submit(
+            ask_drias_query,
+            inputs=[drias_direct_question, index_state],
+            outputs=[drias_sql_query, drias_table, drias_display, sql_queries_state, dataframes_state, plots_state, index_state]
+        )
+        # Define functions to navigate history
+        def show_previous(index, sql_queries, dataframes, plots):
+            if index > 0:
+                index -= 1
+            return sql_queries[index], dataframes[index], plots[index], index
+        def show_next(index, sql_queries, dataframes, plots):
+            if index < len(sql_queries) - 1:
+                index += 1
+            return sql_queries[index], dataframes[index], plots[index], index
+        prev_button.click(
+            show_previous,
+            inputs=[index_state, sql_queries_state, dataframes_state, plots_state],
+            outputs=[drias_sql_query, drias_table, drias_display, index_state]
+        )
+        next_button.click(
+            show_next,
+            inputs=[index_state, sql_queries_state, dataframes_state, plots_state],
+            outputs=[drias_sql_query, drias_table, drias_display, index_state]
+        )
 # # UI Layout Components
 def cqa_tab(tab_name):

climateqa/engine/talk_to_data/main.py CHANGED Viewed

@@ -1,13 +1,7 @@
-from climateqa.engine.talk_to_data.myVanna import MyVanna
-from climateqa.engine.talk_to_data.utils import loc2coords, detect_location_with_openai, detectTable, nearestNeighbourSQL, detect_relevant_tables, replace_coordonates
-import sqlite3
-import os
-import pandas as pd
 from climateqa.engine.llm import get_llm
 import ast
 llm = get_llm(provider="openai")
 def ask_llm_to_add_table_names(sql_query, llm):
@@ -19,29 +13,47 @@ def ask_llm_column_names(sql_query, llm):
     columns_list = ast.literal_eval(columns.strip("```python\n").strip())
     return columns_list
-def ask_vanna(vn,db_vanna_path, query):
-    try :
-        location = detect_location_with_openai(query)
-        if location:
-            coords = loc2coords(location)
-            user_input = query.lower().replace(location.lower(), f"lat, long : {coords}")
-            relevant_tables = detect_relevant_tables(user_input, llm)
-            coords_tables = [nearestNeighbourSQL(db_vanna_path, coords, relevant_tables[i]) for i in range(len(relevant_tables))]
-            user_input_with_coords = replace_coordonates(coords, user_input, coords_tables)
-            sql_query, result_dataframe, figure = vn.ask(user_input_with_coords, print_results=False, allow_llm_to_see_data=True, auto_train=False)
-            return sql_query, result_dataframe, figure
-        else :
-            empty_df = pd.DataFrame()
-            empty_fig = None
-            return "", empty_df, empty_fig
-    except Exception as e:
-        print(f"Error: {e}")
-        empty_df = pd.DataFrame()
-        empty_fig = None
-        return "", empty_df, empty_fig

+from climateqa.engine.talk_to_data.workflow import drias_workflow
 from climateqa.engine.llm import get_llm
 import ast
 llm = get_llm(provider="openai")
 def ask_llm_to_add_table_names(sql_query, llm):
     columns_list = ast.literal_eval(columns.strip("```python\n").strip())
     return columns_list
+def ask_drias(db_drias_path:str, query:str , index_state: int):
+    final_state = drias_workflow(db_drias_path, query)
+    sql_queries = []
+    result_dataframes = []
+    figures = []
+    for plot_state in final_state['plot_states'].values():
+        for table_state in plot_state['table_states'].values():
+            if 'ql_query' in table_state and table_state['sql_query'] is not None:
+                sql_queries.append(table_state['sql_query'])
+            if 'dataframe' in table_state and table_state['dataframe'] is not None:
+                result_dataframes.append(table_state['dataframe'])
+                if 'figure' in table_state and table_state['figure'] is not None:
+                    figures.append(table_state['figure'](table_state['dataframe']))
+    return sql_queries[index_state], result_dataframes[index_state], figures[index_state], sql_queries, result_dataframes, figures, index_state
+# def ask_vanna(vn,db_vanna_path, query):
+#     try :
+#         location = detect_location_with_openai(query)
+#         if location:
+#             coords = loc2coords(location)
+#             user_input = query.lower().replace(location.lower(), f"lat, long : {coords}")
+#             relevant_tables = detect_relevant_tables(db_vanna_path, user_input, llm)
+#             coords_tables = [nearestNeighbourSQL(db_vanna_path, coords, relevant_tables[i]) for i in range(len(relevant_tables))]
+#             user_input_with_coords = replace_coordonates(coords, user_input, coords_tables)
+            # sql_query, result_dataframe, figure = vn.ask(user_input_with_coords, print_results=False, allow_llm_to_see_data=True, auto_train=False)
+#             return sql_query, result_dataframe, figure
+#         else :
+#             empty_df = pd.DataFrame()
+#             empty_fig = None
+#             return "", empty_df, empty_fig
+#     except Exception as e:
+#         print(f"Error: {e}")
+#         empty_df = pd.DataFrame()
+#         empty_fig = None
+#         return "", empty_df, empty_fig

climateqa/engine/talk_to_data/plot.py ADDED Viewed

	@@ -0,0 +1,172 @@

+from typing import Callable, TypedDict
+import pandas as pd
+from plotly.graph_objects import Figure
+import plotly.graph_objects as go
+from climateqa.engine.talk_to_data.sql_query import indicator_per_year_at_location_query
+class Plot(TypedDict):
+    name: str
+    description: str
+    params: list[str]
+    plot_function: Callable[..., Callable[..., Figure]]
+    sql_query: Callable[..., str]
+def plot_indicator_per_year_at_location(params: dict) -> Callable[..., Figure]:
+    """Generate the function to plot a line plot of an indicator per year at a certain location
+    Args:
+        params (dict): dictionnary with the required params : model, indicator_column, location
+    Returns:
+        Callable[..., Figure]: Function which can be call to create the figure with the associated dataframe
+    """
+    indicator = params["indicator_column"]
+    model = params["model"]
+    indicator_label = " ".join([word.capitalize() for word in indicator.split("_")])
+    def plot_data(df: pd.DataFrame) -> Figure:
+        """Generate the figure thanks to the dataframe
+        Args:
+            df (pd.DataFrame): pandas dataframe with the required data
+        Returns:
+            Figure: Plotly figure
+        """
+        fig = go.Figure()
+        if model == "ALL":
+            df_avg = df.groupby("year", as_index=False)[indicator].mean()
+            # Transform to list to avoid pandas encoding
+            indicators = df_avg[indicator].astype(float).tolist()
+            years = df_avg["year"].astype(int).tolist()
+            # Compute the 10-year rolling average
+            sliding_averages = (
+                df_avg[indicator]
+                .rolling(window=10, min_periods=5)
+                .mean()
+                .astype(float)
+                .tolist()
+            )
+        else:
+            df_model = df[df["model"] == model]
+            # Transform to list to avoid pandas encoding
+            indicators = df_model[indicator].astype(float).tolist()
+            years = df_model["year"].astype(int).tolist()
+            # Compute the 10-year rolling average
+            sliding_averages = (
+                df_model[indicator]
+                .rolling(window=10, min_periods=5)
+                .mean()
+                .astype(float)
+                .tolist()
+            )
+        # Indicator per year plot
+        fig.add_scatter(
+            x=years,
+            y=indicators,
+            name=f"Yearly {indicator_label}",
+            mode="lines",
+        )
+        # Sliding average dashed line
+        fig.add_scatter(
+            x=years,
+            y=sliding_averages,
+            mode="lines",
+            name="10 years rolling average",
+            line=dict(dash="dash"),
+            marker=dict(color="#1f77b4"),
+        )
+        fig.update_layout(
+            title=f"Plot of {indicator_label} in {params['location']} (Model Average)",
+            xaxis_title="Year",
+            yaxis_title=indicator_label,
+            template="plotly_white",
+        )
+        return fig
+    return plot_data
+indicator_per_year_at_location: Plot = {
+    "name": "Indicator per year at location",
+    "description": "Plot an evolution of the indicator at a certain location over the years",
+    "params": ["indicator_column", "location", "model"],
+    "plot_function": plot_indicator_per_year_at_location,
+    "sql_query": indicator_per_year_at_location_query,
+}
+def plot_indicator_number_of_days_per_year_at_location(params) -> Callable[..., Figure]:
+    """Generate the function to plot a line plot of an indicator per year at a certain location
+    Args:
+        params (dict): dictionnary with the required params : model, indicator_column, location
+    Returns:
+        Callable[..., Figure]: Function which can be call to create the figure with the associated dataframe
+    """
+    indicator = params["indicator_column"]
+    model = params["model"]
+    def plot_data(df) -> Figure:
+        fig = go.Figure()
+        if params["model"] == "ALL":
+            df_avg = df.groupby("year", as_index=False)[indicator].mean()
+            # Transform to list to avoid pandas encoding
+            indicators = df_avg[indicator].astype(float).tolist()
+            years = df_avg["year"].astype(int).tolist()
+        else:
+            df_model = df[df["model"] == model]
+            # Transform to list to avoid pandas encoding
+            indicators = df_model[indicator].astype(float).tolist()
+            years = df_model["year"].astype(int).tolist()
+        # Bar plot
+        fig.add_trace(
+            go.Bar(
+                x=years,
+                y=indicators,
+                width=0.5,
+                marker=dict(color="#1f77b4"),
+            )
+        )
+        indicator_label = " ".join([word.capitalize() for word in indicator.split("_")])
+        fig.update_layout(
+            title=f"{indicator_label} in {params['location']} (Model Average)",
+            xaxis_title="Year",
+            yaxis_title=indicator,
+            yaxis=dict(range=[0, 366]),
+            bargap=0.5,
+            template="plotly_white",
+        )
+        return fig
+    return plot_data
+indicator_number_of_days_per_year_at_location: Plot = {
+    "name": "Indicator number of days per year at location",
+    "description": "Plot a barchart of the number of days per year of a certain indicator at a certain location. It is appropriate for frequency indicator.",
+    "params": ["indicator_column", "location", "model"],
+    "plot_function": plot_indicator_number_of_days_per_year_at_location,
+    "sql_query": indicator_per_year_at_location_query,
+}
+PLOTS = [indicator_per_year_at_location, indicator_number_of_days_per_year_at_location]

climateqa/engine/talk_to_data/sql_query.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import sqlite3
+from typing import Any, TypedDict
+class SqlQueryOutput(TypedDict):
+    labels: list[str]
+    data: list[list[Any]]
+def execute_sql_query(db_path: str, sql_query: str) -> SqlQueryOutput:
+    """Execute the SQL Query on the sqlite database
+    Args:
+        db_ (str): path to the sqlite database
+        sql_query (str): sql query to execute
+    Returns:
+        SqlQueryOutput: labels of the selected column and fetched data
+    """
+    # Connect to sqlite3 database
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    # Execute the query
+    cursor.execute(sql_query)
+    # Fetch labels of selected columns
+    labels = [desc[0] for desc in cursor.description]
+    # Fetch data
+    data = cursor.fetchall()
+    conn.close()
+    return {
+        "labels": labels,
+        "data": data,
+    }
+class IndicatorPerYearAtLocationQueryParams(TypedDict, total=False):
+    table: str
+    indicator_column: list[str]
+    latitude: str
+    longitude: str
+def indicator_per_year_at_location_query(
+    table: str, params: IndicatorPerYearAtLocationQueryParams
+) -> str:
+    """SQL Query to get the evolution of an indicator per year at a certain location
+    Args:
+        table (str): sql table of the indicator
+        params (IndicatorPerYearAtLocationQueryParams) : dictionary with the required params for the query
+    Returns:
+        str: the sql query
+    """
+    indicator_column = params.get("indicator_column")
+    latitude = params.get("latitude")
+    longitude = params.get("longitude")
+    sql_query = f"SELECT year, {indicator_column}, model FROM {table} WHERE latitude = {latitude} and longitude={longitude} Order by Year"
+    return sql_query

climateqa/engine/talk_to_data/utils.py CHANGED Viewed

@@ -1,10 +1,12 @@
 import re
-import openai
-import pandas as pd
 from geopy.geocoders import Nominatim
 import sqlite3
 import ast
 from climateqa.engine.llm import get_llm
 def detect_location_with_openai(sentence):
     """
@@ -26,67 +28,139 @@ def detect_location_with_openai(sentence):
     else:
         return ""
 def detectTable(sql_query):
     pattern = r'(?i)\bFROM\s+((?:`[^`]+`|"[^"]+"|\'[^\']+\'|\w+)(?:\.(?:`[^`]+`|"[^"]+"|\'[^\']+\'|\w+))*)'
     matches = re.findall(pattern, sql_query)
     return matches
-def loc2coords(location : str):
     geolocator = Nominatim(user_agent="city_to_latlong")
-    location = geolocator.geocode(location)
-    return (location.latitude, location.longitude)
-def coords2loc(coords : tuple):
     geolocator = Nominatim(user_agent="coords_to_city")
     try:
         location = geolocator.reverse(coords)
         return location.address
     except Exception as e:
         print(f"Error: {e}")
-        return "Unknown Location"
-def nearestNeighbourSQL(db: str, location: tuple, table : str):
     conn = sqlite3.connect(db)
     long = round(location[1], 3)
     lat = round(location[0], 3)
-    cursor  = conn.cursor()
-    cursor.execute(f"SELECT lat, lon FROM {table} WHERE lat BETWEEN {lat - 0.3} AND {lat + 0.3} AND lon BETWEEN {long - 0.3} AND {long + 0.3}")
     results = cursor.fetchall()
     return results[0]
-def detect_relevant_tables(user_question, llm):
-    table_names_list = [
-        "Frequency_of_rainy_days_index",
-        "Winter_precipitation_total",
-        "Summer_precipitation_total",
-        "Annual_precipitation_total",
-        # "Remarkable_daily_precipitation_total_(Q99)",
-        "Frequency_of_remarkable_daily_precipitation",
-        "Extreme_precipitation_intensity",
-        "Mean_winter_temperature",
-        "Mean_summer_temperature",
-        "Number_of_tropical_nights",
-        "Maximum_summer_temperature",
-        "Number_of_days_with_Tx_above_30C",
-        "Number_of_days_with_Tx_above_35C",
-        "Drought_index"
-    ]
     prompt = (
-        f"You are helping to build a sql query to retrieve relevant data for a user question."
         f"The different tables are {table_names_list}."
         f"The user question is {user_question}. Write the relevant tables to use. Answer only a python list of table name."
     )
-    table_names = ast.literal_eval(llm.invoke(prompt).content.strip("```python\n").strip())
     return table_names
 def replace_coordonates(coords, query, coords_tables):
     n = query.count(str(coords[0]))
     for i in range(n):
-        query = query.replace(str(coords[0]), str(coords_tables[i][0]),1)
-        query = query.replace(str(coords[1]), str(coords_tables[i][1]),1)
-    return query

 import re
+from sympy import use
 from geopy.geocoders import Nominatim
 import sqlite3
 import ast
 from climateqa.engine.llm import get_llm
+from climateqa.engine.talk_to_data.plot import PLOTS, Plot
 def detect_location_with_openai(sentence):
     """
     else:
         return ""
 def detectTable(sql_query):
     pattern = r'(?i)\bFROM\s+((?:`[^`]+`|"[^"]+"|\'[^\']+\'|\w+)(?:\.(?:`[^`]+`|"[^"]+"|\'[^\']+\'|\w+))*)'
     matches = re.findall(pattern, sql_query)
     return matches
+def loc2coords(location: str):
     geolocator = Nominatim(user_agent="city_to_latlong")
+    coords = geolocator.geocode(location)
+    return (coords.latitude, coords.longitude)
+def coords2loc(coords: tuple):
     geolocator = Nominatim(user_agent="coords_to_city")
     try:
         location = geolocator.reverse(coords)
         return location.address
     except Exception as e:
         print(f"Error: {e}")
+        return "Unknown Location"
+def nearestNeighbourSQL(db: str, location: tuple, table: str):
     conn = sqlite3.connect(db)
     long = round(location[1], 3)
     lat = round(location[0], 3)
+    cursor = conn.cursor()
+    cursor.execute(
+        f"SELECT latitude, longitude FROM {table} WHERE latitude BETWEEN {lat - 0.3} AND {lat + 0.3} AND longitude BETWEEN {long - 0.3} AND {long + 0.3}"
+    )
+    # cursor.execute(f"SELECT latitude, longitude FROM {table} WHERE latitude BETWEEN {lat - 0.3} AND {lat + 0.3} AND longitude BETWEEN {long - 0.3} AND {long + 0.3}")
     results = cursor.fetchall()
     return results[0]
+def detect_relevant_tables(db: str, user_question: str, plot: Plot, llm) -> list[str]:
+    conn = sqlite3.connect(db)
+    cursor = conn.cursor()
+    # Get all table names
+    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
+    table_names_list = cursor.fetchall()
     prompt = (
+        f"You are helping to build a plot following this description : {plot['description']}."
+        f"Based on the description of the plot, which table are appropriate for that kind of plot."
         f"The different tables are {table_names_list}."
         f"The user question is {user_question}. Write the relevant tables to use. Answer only a python list of table name."
     )
+    table_names = ast.literal_eval(
+        llm.invoke(prompt).content.strip("```python\n").strip()
+    )
     return table_names
 def replace_coordonates(coords, query, coords_tables):
     n = query.count(str(coords[0]))
     for i in range(n):
+        query = query.replace(str(coords[0]), str(coords_tables[i][0]), 1)
+        query = query.replace(str(coords[1]), str(coords_tables[i][1]), 1)
+    return query
+def detect_relevant_plots(user_question: str, llm):
+    plots_description = ""
+    for plot in PLOTS:
+        plots_description += "Name: " + plot["name"]
+        plots_description += " - Description: " + plot["description"] + "\n"
+    prompt = (
+        f"You are helping to answer a question with insightful visualizations. "
+        f"Given a list of plots with their name and description: "
+        f"{plots_description} "
+        f"The user question is: {user_question}. "
+        f"Choose the most relevant plots to answer the question. "
+        f"The answer must be a Python list with the names of the relevant plots, and nothing else. "
+        f"Ensure the response is in the exact format: ['PlotName1', 'PlotName2']."
+    )
+    response = llm.invoke(prompt).content
+    return eval(response)
+# Next Version
+# class QueryOutput(TypedDict):
+#     """Generated SQL query."""
+#     query: Annotated[str, ..., "Syntactically valid SQL query."]
+# class PlotlyCodeOutput(TypedDict):
+#     """Generated Plotly code"""
+#     code: Annotated[str, ..., "Synatically valid Plotly python code."]
+# def write_sql_query(user_input: str, db: SQLDatabase, relevant_tables: list[str], llm):
+#     """Generate SQL query to fetch information."""
+#     prompt_params = {
+#         "dialect": db.dialect,
+#         "table_info": db.get_table_info(),
+#         "input": user_input,
+#         "relevant_tables": relevant_tables,
+#         "model": "ALADIN63_CNRM-CM5",
+#     }
+#     prompt = ChatPromptTemplate.from_template(query_prompt_template)
+#     structured_llm = llm.with_structured_output(QueryOutput)
+#     chain = prompt | structured_llm
+#     result = chain.invoke(prompt_params)
+#     return result["query"]
+# def fetch_data_from_sql_query(db: str, sql_query: str):
+#     conn = sqlite3.connect(db)
+#     cursor = conn.cursor()
+#     cursor.execute(sql_query)
+#     column_names = [desc[0] for desc in cursor.description]
+#     values = cursor.fetchall()
+#     return {"column_names": column_names, "data": values}
+# def generate_chart_code(user_input: str, sql_query: list[str], llm):
+#     """ "Generate plotly python code for the chart based on the sql query and the user question"""
+#     class PlotlyCodeOutput(TypedDict):
+#         """Generated Plotly code"""
+#         code: Annotated[str, ..., "Synatically valid Plotly python code."]
+#     prompt = ChatPromptTemplate.from_template(plot_prompt_template)
+#     structured_llm = llm.with_structured_output(PlotlyCodeOutput)
+#     chain = prompt | structured_llm
+#     result = chain.invoke({"input": user_input, "sql_query": sql_query})
+#     return result["code"]

climateqa/engine/talk_to_data/workflow.py ADDED Viewed

	@@ -0,0 +1,233 @@

+import os
+from typing import Any, Callable, NotRequired, TypedDict
+import pandas as pd
+from plotly.graph_objects import Figure
+from climateqa.engine.llm import get_llm
+from climateqa.engine.talk_to_data.plot import PLOTS, Plot
+from climateqa.engine.talk_to_data.sql_query import execute_sql_query
+from climateqa.engine.talk_to_data.utils import (
+    detect_relevant_plots,
+    loc2coords,
+    detect_location_with_openai,
+    nearestNeighbourSQL,
+    detect_relevant_tables,
+)
+ROOT_PATH = os.path.dirname(os.path.dirname(os.getcwd()))
+DRIAS_DB_PATH = ROOT_PATH + "/data/drias/drias.db"
+class TableState(TypedDict):
+    table_name: str
+    params: dict[str, Any]
+    sql_query: NotRequired[str]
+    dataframe: NotRequired[pd.DataFrame | None]
+    figure: NotRequired[Callable[..., Figure]]
+class PlotState(TypedDict):
+    plot_name: str
+    tables: list[str]
+    table_states: dict[str, TableState]
+class State(TypedDict):
+    user_input: str
+    plots: list[str]
+    plot_states: dict[str, PlotState]
+def drias_workflow(db_drias_path: str, user_input: str) -> State:
+    """Performs the complete workflow of Talk To Drias : from user input to sql queries, dataframes and figures generated
+    Args:
+        db_drias_path (str): path to the drias database
+        user_input (str): initial user input
+    Returns:
+        State: Final state with all the results
+    """
+    state: State = {
+        'user_input': user_input,
+        'plots': [],
+        'plot_states': {}
+    }
+    llm = get_llm(provider="openai")
+    plots = find_relevant_plots(state, llm)
+    state['plots'] = plots
+    if not state['plots']:
+        return state
+    for plot_name in state['plots']:
+        plot = next((p for p in PLOTS if p['name'] == plot_name), None) # Find the associated plot object
+        if plot is None:
+            continue
+        plot_state: PlotState = {
+            'plot_name': plot_name,
+            'tables': [],
+            'table_states': {}
+        }
+        plot_state['plot_name'] = plot_name
+        relevant_tables = find_relevant_tables_per_plot(state, plot, db_drias_path, llm)
+        plot_state['tables'] = relevant_tables
+        for table in plot_state['tables']:
+            table_state: TableState = {
+                'table_name': table,
+                'params': {},
+            }
+            table_state['params'] = {
+                'model': 'ALL'
+            }
+            for param_name in plot['params']:
+                param = find_param(state, param_name, table, db_drias_path)
+                if param:
+                    table_state['params'].update(param)
+            sql_query = plot['sql_query'](table, table_state['params'])
+            table_state['sql_query'] = sql_query
+            results = execute_sql_query(db_drias_path, sql_query)
+            df = pd.DataFrame(results['data'], columns=results['labels'])
+            figure = plot['plot_function'](table_state['params'])
+            table_state['dataframe'] = df
+            table_state['figure'] = figure
+            plot_state['table_states'][table] = table_state
+        state['plot_states'][plot_name] = plot_state
+    return state
+def find_relevant_plots(state: State, llm) -> list[str]:
+    print("---- Find relevant plots ----")
+    relevant_plots = detect_relevant_plots(state['user_input'], llm)
+    return relevant_plots
+def find_relevant_tables_per_plot(state: State, plot: Plot, db_path: str, llm) -> list[str]:
+    print(f"---- Find relevant tables for {plot['name']} ----")
+    relevant_tables = detect_relevant_tables(db_path, state['user_input'], plot, llm)
+    return relevant_tables
+def find_param(state: State, param_name:str, table: str, db_path: str) -> dict[str, Any] | None:
+    """Perform the good method to retrieve the desired parameter
+    Args:
+        state (State): state of the workflow
+        param_name (str): name of the desired parameter
+        table (str): name of the table
+        db_path (str): path to the databse
+    Returns:
+        dict[str, Any] | None:
+    """
+    if param_name == 'location':
+        location = find_location(state['user_input'], table, db_path)
+        return location
+    if param_name == 'indicator_column':
+        indicator_column = find_indicator_column(table)
+        return {'indicator_column': indicator_column}
+    return None
+class Location(TypedDict):
+    location: str
+    latitude: NotRequired[str]
+    longitude: NotRequired[str]
+def find_location(user_input: str, table: str, db_path: str) -> Location:
+    print(f"---- Find location in table {table} ----")
+    location = detect_location_with_openai(user_input)
+    output: Location = {'location' : location}
+    if location:
+        coords = loc2coords(location)
+        neighbour = nearestNeighbourSQL(db_path, coords, table)
+        output.update({
+            "latitude": neighbour[0],
+            "longitude": neighbour[1],
+        })
+    return output
+def find_indicator_column(table: str) -> str:
+    """Retrieve the name of the indicator column within the table in the database
+    Args:
+        table (str): name of the table
+    Returns:
+        str: name of the indicator column
+    """
+    print(f"---- Find indicator column in table {table} ----")
+    indicator_columns_per_table = {
+        "total_winter_precipitation": "total_winter_precipitation",
+        "total_summer_precipiation": "total_summer_precipitation",
+        "total_annual_precipitation": "total_annual_precipitation",
+        "total_remarkable_daily_precipitation": "total_remarkable_daily_precipitation",
+        "frequency_of_remarkable_daily_precipitation": "frequency_of_remarkable_daily_precipitation",
+        "extreme_precipitation_intensity": "extreme_precipitation_intensity",
+        "mean_winter_temperature": "mean_winter_temperature",
+        "mean_summer_temperature": "mean_summer_temperature",
+        "mean_annual_temperature": "mean_annual_temperature",
+        "number_of_tropical_nights": "number_tropical_nights",
+        "maximum_summer_temperature": "maximum_summer_temperature",
+        "number_of_days_with_TX_above_30": "number_of_days_with_tx_above_30",
+        "number_of_days_with_TX_above_35": "number_of_days_with_tx_above_35",
+        "number_of_days_with_a_dry_ground": "number_of_days_with_dry_ground"
+    }
+    return indicator_columns_per_table[table]
+# def make_write_query_node():
+#     def write_query(state):
+#         print("---- Write query ----")
+#         for table in state["tables"]:
+#             sql_query = QUERIES[state[table]['query_type']](
+#                 table=table,
+#                 indicator_column=state[table]["columns"],
+#                 longitude=state[table]["longitude"],
+#                 latitude=state[table]["latitude"],
+#             )
+#             state[table].update({"sql_query": sql_query})
+#         return state
+#     return write_query
+# def make_fetch_data_node(db_path):
+#     def fetch_data(state):
+#         print("---- Fetch data ----")
+#         for table in state["tables"]:
+#             results = execute_sql_query(db_path, state[table]['sql_query'])
+#             state[table].update(results)
+#         return state
+#     return fetch_data
+## V2
+# def make_fetch_data_node(db_path: str, llm):
+#     def fetch_data(state):
+#         print("---- Fetch data ----")
+#         db = SQLDatabase.from_uri(f"sqlite:///{db_path}")
+#         output = {}
+#         sql_query = write_sql_query(state["query"], db, state["tables"], llm)
+#         # TO DO : Add query checker
+#         print(f"SQL query  : {sql_query}")
+#         output["sql_query"] = sql_query
+#         output.update(fetch_data_from_sql_query(db_path, sql_query))
+#         return output
+#     return fetch_data