Spaces:

Ekimetrics
/

climate-question-answering

Running

App Files Files Community

timeki commited on Apr 7

Commit

5fe1543

1 Parent(s): 45a9320

make ask drias asynchronous

Browse files

Files changed (5) hide show

climateqa/engine/talk_to_data/main.py +2 -2
climateqa/engine/talk_to_data/sql_query.py +12 -5
climateqa/engine/talk_to_data/utils.py +8 -8
climateqa/engine/talk_to_data/workflow.py +16 -23
front/tabs/tab_drias.py +2 -2

climateqa/engine/talk_to_data/main.py CHANGED Viewed

@@ -37,7 +37,7 @@ def ask_llm_column_names(sql_query: str, llm) -> list[str]:
     columns_list = ast.literal_eval(columns.strip("```python\n").strip())
     return columns_list
-def ask_drias(query: str, index_state: int = 0) -> tuple:
     """Main function to process a DRIAS query and return results.
     This function orchestrates the DRIAS workflow, processing a user query to generate
@@ -60,7 +60,7 @@ def ask_drias(query: str, index_state: int = 0) -> tuple:
             - table_list (list): List of table names used
             - error (str): Error message if any
     """
-    final_state = drias_workflow(query)
     sql_queries = []
     result_dataframes = []
     figures = []

     columns_list = ast.literal_eval(columns.strip("```python\n").strip())
     return columns_list
+async def ask_drias(query: str, index_state: int = 0) -> tuple:
     """Main function to process a DRIAS query and return results.
     This function orchestrates the DRIAS workflow, processing a user query to generate
             - table_list (list): List of table names used
             - error (str): Error message if any
     """
+    final_state = await drias_workflow(query)
     sql_queries = []
     result_dataframes = []
     figures = []

climateqa/engine/talk_to_data/sql_query.py CHANGED Viewed

@@ -1,8 +1,10 @@
 from typing import TypedDict
 import duckdb
 import pandas as pd
-def execute_sql_query(sql_query: str) -> pd.DataFrame:
     """Executes a SQL query on the DRIAS database and returns the results.
     This function connects to the DuckDB database containing DRIAS climate data
@@ -18,11 +20,16 @@ def execute_sql_query(sql_query: str) -> pd.DataFrame:
     Raises:
         duckdb.Error: If there is an error executing the SQL query
     """
-    # Execute the query
-    results = duckdb.sql(sql_query)
-    # return fetched data
-    return results.fetchdf()
 class IndicatorPerYearAtLocationQueryParams(TypedDict, total=False):

+import asyncio
+from concurrent.futures import ThreadPoolExecutor
 from typing import TypedDict
 import duckdb
 import pandas as pd
+async def execute_sql_query(sql_query: str) -> pd.DataFrame:
     """Executes a SQL query on the DRIAS database and returns the results.
     This function connects to the DuckDB database containing DRIAS climate data
     Raises:
         duckdb.Error: If there is an error executing the SQL query
     """
+    def _execute_query():
+        # Execute the query
+        results = duckdb.sql(sql_query)
+        # return fetched data
+        return results.fetchdf()
+    # Run the query in a thread pool to avoid blocking
+    loop = asyncio.get_event_loop()
+    with ThreadPoolExecutor() as executor:
+        return await loop.run_in_executor(executor, _execute_query)
 class IndicatorPerYearAtLocationQueryParams(TypedDict, total=False):

climateqa/engine/talk_to_data/utils.py CHANGED Viewed

@@ -9,7 +9,7 @@ from climateqa.engine.talk_to_data.plot import PLOTS, Plot
 from langchain_core.prompts import ChatPromptTemplate
-def detect_location_with_openai(sentence):
     """
     Detects locations in a sentence using OpenAI's API via LangChain.
     """
@@ -22,7 +22,7 @@ def detect_location_with_openai(sentence):
     Sentence: "{sentence}"
     """
-    response = llm.invoke(prompt)
     location_list = ast.literal_eval(response.content.strip("```python\n").strip())
     if location_list:
         return location_list[0]
@@ -40,7 +40,7 @@ class ArrayOutput(TypedDict):
     """
     array: Annotated[str, "Syntactically valid python array."]
-def detect_year_with_openai(sentence: str) -> str:
     """
     Detects years in a sentence using OpenAI's API via LangChain.
     """
@@ -56,7 +56,7 @@ def detect_year_with_openai(sentence: str) -> str:
     prompt = ChatPromptTemplate.from_template(prompt)
     structured_llm = llm.with_structured_output(ArrayOutput)
     chain = prompt | structured_llm
-    response: ArrayOutput = chain.invoke({"sentence": sentence})
     years_list = eval(response['array'])
     if len(years_list) > 0:
         return years_list[0]
@@ -146,7 +146,7 @@ def nearestNeighbourSQL(location: tuple, table: str) -> tuple[str, str]:
     return results['latitude'].iloc[0], results['longitude'].iloc[0]
-def detect_relevant_tables(user_question: str, plot: Plot, llm) -> list[str]:
     """Identifies relevant tables for a plot based on user input.
     This function uses an LLM to analyze the user's question and the plot
@@ -183,7 +183,7 @@ def detect_relevant_tables(user_question: str, plot: Plot, llm) -> list[str]:
     )
     table_names = ast.literal_eval(
-        llm.invoke(prompt).content.strip("```python\n").strip()
     )
     return table_names
@@ -197,7 +197,7 @@ def replace_coordonates(coords, query, coords_tables):
     return query
-def detect_relevant_plots(user_question: str, llm):
     plots_description = ""
     for plot in PLOTS:
         plots_description += "Name: " + plot["name"]
@@ -223,7 +223,7 @@ def detect_relevant_plots(user_question: str, llm):
     # )
     plot_names = ast.literal_eval(
-        llm.invoke(prompt).content.strip("```python\n").strip()
     )
     return plot_names

 from langchain_core.prompts import ChatPromptTemplate
+async def detect_location_with_openai(sentence):
     """
     Detects locations in a sentence using OpenAI's API via LangChain.
     """
     Sentence: "{sentence}"
     """
+    response = await llm.ainvoke(prompt)
     location_list = ast.literal_eval(response.content.strip("```python\n").strip())
     if location_list:
         return location_list[0]
     """
     array: Annotated[str, "Syntactically valid python array."]
+async def detect_year_with_openai(sentence: str) -> str:
     """
     Detects years in a sentence using OpenAI's API via LangChain.
     """
     prompt = ChatPromptTemplate.from_template(prompt)
     structured_llm = llm.with_structured_output(ArrayOutput)
     chain = prompt | structured_llm
+    response: ArrayOutput = await chain.ainvoke({"sentence": sentence})
     years_list = eval(response['array'])
     if len(years_list) > 0:
         return years_list[0]
     return results['latitude'].iloc[0], results['longitude'].iloc[0]
+async def detect_relevant_tables(user_question: str, plot: Plot, llm) -> list[str]:
     """Identifies relevant tables for a plot based on user input.
     This function uses an LLM to analyze the user's question and the plot
     )
     table_names = ast.literal_eval(
+        (await llm.ainvoke(prompt)).content.strip("```python\n").strip()
     )
     return table_names
     return query
+async def detect_relevant_plots(user_question: str, llm):
     plots_description = ""
     for plot in PLOTS:
         plots_description += "Name: " + plot["name"]
     # )
     plot_names = ast.literal_eval(
+        (await llm.ainvoke(prompt)).content.strip("```python\n").strip()
     )
     return plot_names

climateqa/engine/talk_to_data/workflow.py CHANGED Viewed

@@ -61,7 +61,7 @@ class State(TypedDict):
     plot_states: dict[str, PlotState]
     error: NotRequired[str]
-def drias_workflow(user_input: str) -> State:
     """Performs the complete workflow of Talk To Drias : from user input to sql queries, dataframes and figures generated
     Args:
@@ -78,7 +78,7 @@ def drias_workflow(user_input: str) -> State:
     llm = get_llm(provider="openai")
-    plots = find_relevant_plots(state, llm)
     state['plots'] = plots
     if not state['plots']:
@@ -102,7 +102,7 @@ def drias_workflow(user_input: str) -> State:
         plot_state['plot_name'] = plot_name
-        relevant_tables = find_relevant_tables_per_plot(state, plot, llm)
         if len(relevant_tables) > 0 :
             have_relevant_table = True
@@ -110,7 +110,7 @@ def drias_workflow(user_input: str) -> State:
         params = {}
         for param_name in plot['params']:
-            param = find_param(state, param_name, relevant_tables[0])
             if param:
                 params.update(param)
@@ -135,7 +135,7 @@ def drias_workflow(user_input: str) -> State:
                 have_sql_query = True
             table_state['sql_query'] = sql_query
-            df = execute_sql_query(sql_query)
             if len(df) > 0:
                 have_dataframe = True
@@ -154,22 +154,19 @@ def drias_workflow(user_input: str) -> State:
     elif not have_dataframe:
         state['error'] = "There is no data in our table that can answer to your question"
     return state
-def find_relevant_plots(state: State, llm) -> list[str]:
     print("---- Find relevant plots ----")
-    relevant_plots = detect_relevant_plots(state['user_input'], llm)
     return relevant_plots
-def find_relevant_tables_per_plot(state: State, plot: Plot, llm) -> list[str]:
     print(f"---- Find relevant tables for {plot['name']} ----")
-    relevant_tables = detect_relevant_tables(state['user_input'], plot, llm)
     return relevant_tables
-def find_param(state: State, param_name:str, table: str) -> dict[str, Any] | None:
     """Perform the good method to retrieve the desired parameter
     Args:
@@ -181,25 +178,21 @@ def find_param(state: State, param_name:str, table: str) -> dict[str, Any] | Non
         dict[str, Any] | None:
     """
     if param_name == 'location':
-        location = find_location(state['user_input'], table)
         return location
-    # if param_name == 'indicator_column':
-    #     indicator_column = find_indicator_column(table)
-    #     return {'indicator_column': indicator_column}
     if param_name == 'year':
-        year = find_year(state['user_input'])
         return {'year': year}
     return None
 class Location(TypedDict):
     location: str
     latitude: NotRequired[str]
     longitude: NotRequired[str]
-def find_location(user_input: str, table: str) -> Location:
     print(f"---- Find location in table {table} ----")
-    location = detect_location_with_openai(user_input)
     output: Location = {'location' : location}
     if location:
         coords = loc2coords(location)
@@ -210,7 +203,7 @@ def find_location(user_input: str, table: str) -> Location:
         })
     return output
-def find_year(user_input: str) -> str:
     """Extracts year information from user input using LLM.
     This function uses an LLM to identify and extract year information from the
@@ -223,7 +216,7 @@ def find_year(user_input: str) -> str:
         str: The extracted year, or empty string if no year found
     """
     print(f"---- Find year ---")
-    year = detect_year_with_openai(user_input)
     return year
 def find_indicator_column(table: str) -> str:

     plot_states: dict[str, PlotState]
     error: NotRequired[str]
+async def drias_workflow(user_input: str) -> State:
     """Performs the complete workflow of Talk To Drias : from user input to sql queries, dataframes and figures generated
     Args:
     llm = get_llm(provider="openai")
+    plots = await find_relevant_plots(state, llm)
     state['plots'] = plots
     if not state['plots']:
         plot_state['plot_name'] = plot_name
+        relevant_tables = await find_relevant_tables_per_plot(state, plot, llm)
         if len(relevant_tables) > 0 :
             have_relevant_table = True
         params = {}
         for param_name in plot['params']:
+            param = await find_param(state, param_name, relevant_tables[0])
             if param:
                 params.update(param)
                 have_sql_query = True
             table_state['sql_query'] = sql_query
+            df = await execute_sql_query(sql_query)
             if len(df) > 0:
                 have_dataframe = True
     elif not have_dataframe:
         state['error'] = "There is no data in our table that can answer to your question"
     return state
+async def find_relevant_plots(state: State, llm) -> list[str]:
     print("---- Find relevant plots ----")
+    relevant_plots = await detect_relevant_plots(state['user_input'], llm)
     return relevant_plots
+async def find_relevant_tables_per_plot(state: State, plot: Plot, llm) -> list[str]:
     print(f"---- Find relevant tables for {plot['name']} ----")
+    relevant_tables = await detect_relevant_tables(state['user_input'], plot, llm)
     return relevant_tables
+async def find_param(state: State, param_name:str, table: str) -> dict[str, Any] | None:
     """Perform the good method to retrieve the desired parameter
     Args:
         dict[str, Any] | None:
     """
     if param_name == 'location':
+        location = await find_location(state['user_input'], table)
         return location
     if param_name == 'year':
+        year = await find_year(state['user_input'])
         return {'year': year}
     return None
 class Location(TypedDict):
     location: str
     latitude: NotRequired[str]
     longitude: NotRequired[str]
+async def find_location(user_input: str, table: str) -> Location:
     print(f"---- Find location in table {table} ----")
+    location = await detect_location_with_openai(user_input)
     output: Location = {'location' : location}
     if location:
         coords = loc2coords(location)
         })
     return output
+async def find_year(user_input: str) -> str:
     """Extracts year information from user input using LLM.
     This function uses an LLM to identify and extract year information from the
         str: The extracted year, or empty string if no year found
     """
     print(f"---- Find year ---")
+    year = await detect_year_with_openai(user_input)
     return year
 def find_indicator_column(table: str) -> str:

front/tabs/tab_drias.py CHANGED Viewed

@@ -4,8 +4,8 @@ from climateqa.engine.talk_to_data.main import ask_drias
 from climateqa.engine.talk_to_data.config import DRIAS_MODELS, DRIAS_UI_TEXT
-def ask_drias_query(query: str, index_state: int):
-    return ask_drias(query, index_state)
 def show_results(sql_queries_state, dataframes_state, plots_state):

 from climateqa.engine.talk_to_data.config import DRIAS_MODELS, DRIAS_UI_TEXT
+async def ask_drias_query(query: str, index_state: int):
+    return await ask_drias(query, index_state)
 def show_results(sql_queries_state, dataframes_state, plots_state):