Spaces:

Ekimetrics
/

climate-question-answering

Running

App Files Files Community

armanddemasson commited on Apr 18

Commit

705ccec

1 Parent(s): 8c7a7fe

feat: added multithreading to run sql queries in talk to drias

Browse files

Files changed (2) hide show

climateqa/engine/talk_to_data/sql_query.py +3 -2
climateqa/engine/talk_to_data/{workflow.py → talk_to_drias.py} +128 -96

climateqa/engine/talk_to_data/sql_query.py CHANGED Viewed

@@ -22,9 +22,10 @@ async def execute_sql_query(sql_query: str) -> pd.DataFrame:
     """
     def _execute_query():
         # Execute the query
-        results = duckdb.sql(sql_query)
         # return fetched data
-        return results.fetchdf()
     # Run the query in a thread pool to avoid blocking
     loop = asyncio.get_event_loop()

     """
     def _execute_query():
         # Execute the query
+        con = duckdb.connect()
+        results = con.sql(sql_query).fetchdf()
         # return fetched data
+        return results
     # Run the query in a thread pool to avoid blocking
     loop = asyncio.get_event_loop()

climateqa/engine/talk_to_data/{workflow.py → talk_to_drias.py} RENAMED Viewed

@@ -1,10 +1,12 @@
 import os
 from typing import Any, Callable, TypedDict, Optional
 import pandas as pd
 from plotly.graph_objects import Figure
 from climateqa.engine.llm import get_llm
 from climateqa.engine.talk_to_data.config import INDICATOR_COLUMNS_PER_TABLE
 from climateqa.engine.talk_to_data.plot import PLOTS, Plot
 from climateqa.engine.talk_to_data.sql_query import execute_sql_query
@@ -17,6 +19,7 @@ from climateqa.engine.talk_to_data.utils import (
     detect_relevant_tables,
 )
 ROOT_PATH = os.path.dirname(os.path.dirname(os.getcwd()))
 class TableState(TypedDict):
@@ -61,101 +64,6 @@ class State(TypedDict):
     plot_states: dict[str, PlotState]
     error: Optional[str]
-async def drias_workflow(user_input: str) -> State:
-    """Performs the complete workflow of Talk To Drias : from user input to sql queries, dataframes and figures generated
-    Args:
-        user_input (str): initial user input
-    Returns:
-        State: Final state with all the results
-    """
-    state: State = {
-        'user_input': user_input,
-        'plots': [],
-        'plot_states': {}
-    }
-    llm = get_llm(provider="openai")
-    plots = await find_relevant_plots(state, llm)
-    state['plots'] = plots
-    if not state['plots']:
-        state['error'] = 'There is no plot to answer to the question'
-        return state
-    have_relevant_table = False
-    have_sql_query = False
-    have_dataframe = False
-    for plot_name in state['plots']:
-        plot = next((p for p in PLOTS if p['name'] == plot_name), None) # Find the associated plot object
-        if plot is None:
-            continue
-        plot_state: PlotState = {
-            'plot_name': plot_name,
-            'tables': [],
-            'table_states': {}
-        }
-        plot_state['plot_name'] = plot_name
-        relevant_tables = await find_relevant_tables_per_plot(state, plot, llm)
-        if len(relevant_tables) > 0 :
-            have_relevant_table = True
-        plot_state['tables'] = relevant_tables
-        params = {}
-        for param_name in plot['params']:
-            param = await find_param(state, param_name, relevant_tables[0])
-            if param:
-                params.update(param)
-        for n, table in enumerate(plot_state['tables']):
-            if n > 2:
-                break
-            table_state: TableState = {
-                'table_name': table,
-                'params': params,
-                'status': 'OK'
-            }
-            table_state["params"]['indicator_column'] = find_indicator_column(table)
-            sql_query = plot['sql_query'](table, table_state['params'])
-            if sql_query == "":
-                table_state['status'] = 'ERROR'
-                continue
-            else :
-                have_sql_query = True
-            table_state['sql_query'] = sql_query
-            df = await execute_sql_query(sql_query)
-            if len(df) > 0:
-                have_dataframe = True
-            figure = plot['plot_function'](table_state['params'])
-            table_state['dataframe'] = df
-            table_state['figure'] = figure
-            plot_state['table_states'][table] = table_state
-        state['plot_states'][plot_name] = plot_state
-    if not have_relevant_table:
-        state['error'] = "There is no relevant table in the our database to answer your question"
-    elif not have_sql_query:
-        state['error'] = "There is no relevant sql query on our database that can help to answer your question"
-    elif not have_dataframe:
-        state['error'] = "There is no data in our table that can answer to your question"
-    return state
 async def find_relevant_plots(state: State, llm) -> list[str]:
     print("---- Find relevant plots ----")
     relevant_plots = await detect_relevant_plots(state['user_input'], llm)
@@ -238,6 +146,130 @@ def find_indicator_column(table: str) -> str:
     return INDICATOR_COLUMNS_PER_TABLE[table]
 # def make_write_query_node():
 #     def write_query(state):

 import os
 from typing import Any, Callable, TypedDict, Optional
+from numpy import sort
 import pandas as pd
+import asyncio
 from plotly.graph_objects import Figure
 from climateqa.engine.llm import get_llm
+from climateqa.engine.talk_to_data import sql_query
 from climateqa.engine.talk_to_data.config import INDICATOR_COLUMNS_PER_TABLE
 from climateqa.engine.talk_to_data.plot import PLOTS, Plot
 from climateqa.engine.talk_to_data.sql_query import execute_sql_query
     detect_relevant_tables,
 )
 ROOT_PATH = os.path.dirname(os.path.dirname(os.getcwd()))
 class TableState(TypedDict):
     plot_states: dict[str, PlotState]
     error: Optional[str]
 async def find_relevant_plots(state: State, llm) -> list[str]:
     print("---- Find relevant plots ----")
     relevant_plots = await detect_relevant_plots(state['user_input'], llm)
     return INDICATOR_COLUMNS_PER_TABLE[table]
+async def process_table(
+    table: str,
+    params: dict[str, Any],
+    plot: Plot,
+) -> TableState:
+    """Processes a table to extract relevant data and generate visualizations.
+    This function retrieves the SQL query for the specified table, executes it,
+    and generates a visualization based on the results.
+    Args:
+        table (str): The name of the table to process
+        params (dict[str, Any]): Parameters used for querying the table
+        plot (Plot): The plot object containing SQL query and visualization function
+    Returns:
+        TableState: The state of the processed table
+    """
+    table_state: TableState = {
+        'table_name': table,
+        'params': params.copy(),
+        'status': 'OK',
+        'dataframe': None,
+        'sql_query': None,
+        'figure': None
+    }
+    table_state['params']['indicator_column'] = find_indicator_column(table)
+    sql_query = plot['sql_query'](table, table_state['params'])
+    if sql_query == "":
+        table_state['status'] = 'ERROR'
+        return table_state
+    table_state['sql_query'] = sql_query
+    print(sql_query)
+    df = await execute_sql_query(sql_query)
+    table_state['dataframe'] = df
+    table_state['figure'] = plot['plot_function'](table_state['params'])
+    return table_state
+async def drias_workflow(user_input: str) -> State:
+    """Performs the complete workflow of Talk To Drias : from user input to sql queries, dataframes and figures generated
+    Args:
+        user_input (str): initial user input
+    Returns:
+        State: Final state with all the results
+    """
+    state: State = {
+        'user_input': user_input,
+        'plots': [],
+        'plot_states': {},
+        'error': ''
+    }
+    llm = get_llm(provider="openai")
+    plots = await find_relevant_plots(state, llm)
+    state['plots'] = plots
+    if len(state['plots']) < 1:
+        state['error'] = 'There is no plot to answer to the question'
+        return state
+    have_relevant_table = False
+    have_sql_query = False
+    have_dataframe = False
+    for plot_name in state['plots']:
+        plot = next((p for p in PLOTS if p['name'] == plot_name), None) # Find the associated plot object
+        if plot is None:
+            continue
+        plot_state: PlotState = {
+            'plot_name': plot_name,
+            'tables': [],
+            'table_states': {}
+        }
+        plot_state['plot_name'] = plot_name
+        relevant_tables = await find_relevant_tables_per_plot(state, plot, llm)
+        if len(relevant_tables) > 0 :
+            have_relevant_table = True
+        plot_state['tables'] = relevant_tables
+        params = {}
+        for param_name in plot['params']:
+            param = await find_param(state, param_name, relevant_tables[0])
+            if param:
+                params.update(param)
+        tasks = [process_table(table, params, plot) for table in plot_state['tables'][:3]]
+        results = await asyncio.gather(*tasks)
+        # Store results back in plot_state
+        have_dataframe = False
+        have_sql_query = False
+        for table_state in results:
+            print(table_state)
+            if table_state['sql_query']:
+                have_sql_query = True
+            if table_state['dataframe'] is not None and len(table_state['dataframe']) > 0:
+                have_dataframe = True
+            plot_state['table_states'][table_state['table_name']] = table_state
+        state['plot_states'][plot_name] = plot_state
+    if not have_relevant_table:
+        state['error'] = "There is no relevant table in our database to answer your question"
+    elif not have_sql_query:
+        state['error'] = "There is no relevant sql query on our database that can help to answer your question"
+    elif not have_dataframe:
+        state['error'] = "There is no data in our table that can answer to your question"
+    return state
 # def make_write_query_node():
 #     def write_query(state):