Spaces:

Ekimetrics
/

climate-question-answering

Running

App Files Files Community

timeki commited on Apr 7

Commit

6af9e98

2 Parent(s): 3e75ed8 bc43b45

Merged in dev (pull request #4)

Browse files

Files changed (20) hide show

app.py +414 -217
climateqa/chat.py +32 -3
climateqa/engine/chains/answer_rag.py +4 -2
climateqa/engine/chains/follow_up.py +33 -0
climateqa/engine/chains/intent_categorization.py +11 -4
climateqa/engine/chains/retrieve_documents.py +2 -8
climateqa/engine/chains/standalone_question.py +42 -0
climateqa/engine/graph.py +36 -19
climateqa/engine/talk_to_data/config.py +99 -0
climateqa/engine/talk_to_data/main.py +100 -32
climateqa/engine/talk_to_data/plot.py +402 -0
climateqa/engine/talk_to_data/sql_query.py +113 -0
climateqa/engine/talk_to_data/utils.py +232 -43
climateqa/engine/talk_to_data/workflow.py +287 -0
front/tabs/__init__.py +4 -1
front/tabs/chat_interface.py +15 -12
front/tabs/main_tab.py +59 -27
front/tabs/tab_config.py +19 -29
front/tabs/tab_drias.py +362 -0
style.css +102 -10

app.py CHANGED Viewed

@@ -9,13 +9,13 @@ from climateqa.engine.embeddings import get_embeddings_function
 from climateqa.engine.llm import get_llm
 from climateqa.engine.vectorstore import get_pinecone_vectorstore
 from climateqa.engine.reranker import get_reranker
-from climateqa.engine.graph import make_graph_agent,make_graph_agent_poc
 from climateqa.engine.chains.retrieve_papers import find_papers
 from climateqa.chat import start_chat, chat_stream, finish_chat
-from climateqa.engine.talk_to_data.main import ask_vanna
-from climateqa.engine.talk_to_data.myVanna import MyVanna
-from front.tabs import (create_config_modal, create_examples_tab, create_papers_tab, create_figures_tab, create_chat_interface, create_about_tab)
 from front.utils import process_figures
 from gradio_modal import Modal
@@ -24,14 +24,14 @@ from utils import create_user_id
 import logging
 logging.basicConfig(level=logging.WARNING)
-os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # Suppresses INFO and WARNING logs
 logging.getLogger().setLevel(logging.WARNING)
 # Load environment variables in local mode
 try:
     from dotenv import load_dotenv
     load_dotenv()
 except Exception as e:
     pass
@@ -62,39 +62,94 @@ share_client = service.get_share_client(file_share_name)
 user_id = create_user_id()
 # Create vectorstore and retriever
 embeddings_function = get_embeddings_function()
-vectorstore = get_pinecone_vectorstore(embeddings_function, index_name=os.getenv("PINECONE_API_INDEX"))
-vectorstore_graphs = get_pinecone_vectorstore(embeddings_function, index_name=os.getenv("PINECONE_API_INDEX_OWID"), text_key="description")
-vectorstore_region = get_pinecone_vectorstore(embeddings_function, index_name=os.getenv("PINECONE_API_INDEX_LOCAL_V2"))
-llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0)
 if os.environ["GRADIO_ENV"] == "local":
     reranker = get_reranker("nano")
-else :
     reranker = get_reranker("large")
-agent = make_graph_agent(llm=llm, vectorstore_ipcc=vectorstore, vectorstore_graphs=vectorstore_graphs, vectorstore_region = vectorstore_region, reranker=reranker, threshold_docs=0.2)
-agent_poc = make_graph_agent_poc(llm=llm, vectorstore_ipcc=vectorstore, vectorstore_graphs=vectorstore_graphs, vectorstore_region = vectorstore_region, reranker=reranker, threshold_docs=0, version="v4")#TODO put back default 0.2
-#Vanna object
-vn = MyVanna(config = {"temperature": 0, "api_key": os.getenv('THEO_API_KEY'), 'model': os.getenv('VANNA_MODEL'), 'pc_api_key': os.getenv('VANNA_PINECONE_API_KEY'), 'index_name': os.getenv('VANNA_INDEX_NAME'), "top_k" : 4})
-db_vanna_path = os.path.join(os.getcwd(), "data/drias/drias.db")
-vn.connect_to_sqlite(db_vanna_path)
-def ask_vanna_query(query):
-    return ask_vanna(vn, db_vanna_path, query)
-async def chat(query, history, audience, sources, reports, relevant_content_sources_selection, search_only):
     print("chat cqa - message received")
-    async for event in chat_stream(agent, query, history, audience, sources, reports, relevant_content_sources_selection, search_only, share_client, user_id):
         yield event
-async def chat_poc(query, history, audience, sources, reports, relevant_content_sources_selection, search_only):
     print("chat poc - message received")
-    async for event in chat_stream(agent_poc, query, history, audience, sources, reports, relevant_content_sources_selection, search_only, share_client, user_id):
         yield event
@@ -102,14 +157,17 @@ async def chat_poc(query, history, audience, sources, reports, relevant_content_
 # Gradio
 # --------------------------------------------------------------------
 # Function to update modal visibility
 def update_config_modal_visibility(config_open):
     print(config_open)
     new_config_visibility_status = not config_open
     return Modal(visible=new_config_visibility_status), new_config_visibility_status
-def update_sources_number_display(sources_textbox, figures_cards, current_graphs, papers_html):
     sources_number = sources_textbox.count("<h2>")
     figures_number = figures_cards.count("<h2>")
     graphs_number = current_graphs.count("<iframe")
@@ -118,229 +176,368 @@ def update_sources_number_display(sources_textbox, figures_cards, current_graphs
     figures_notif_label = f"Figures ({figures_number})"
     graphs_notif_label = f"Graphs ({graphs_number})"
     papers_notif_label = f"Papers ({papers_number})"
-    recommended_content_notif_label = f"Recommended content ({figures_number + graphs_number + papers_number})"
-    return gr.update(label=recommended_content_notif_label), gr.update(label=sources_notif_label), gr.update(label=figures_notif_label), gr.update(label=graphs_notif_label), gr.update(label=papers_notif_label)
-def create_drias_tab():
-    with gr.Tab("Beta - Talk to DRIAS", elem_id="tab-vanna", id=6) as tab_vanna:
-        vanna_direct_question = gr.Textbox(label="Direct Question", placeholder="You can write direct question here",elem_id="direct-question", interactive=True)
-        with gr.Accordion("Details",elem_id = 'vanna-details', open=False) as vanna_details :
-            vanna_sql_query = gr.Textbox(label="SQL Query Used", elem_id="sql-query", interactive=False)
-            show_vanna_table = gr.Button("Show Table", elem_id="show-table")
-            with Modal(visible=False) as vanna_table_modal:
-                vanna_table = gr.DataFrame([], elem_id="vanna-table")
-                close_vanna_modal = gr.Button("Close", elem_id="close-vanna-modal")
-                close_vanna_modal.click(lambda: Modal(visible=False),None, [vanna_table_modal])
-            show_vanna_table.click(lambda: Modal(visible=True),None ,[vanna_table_modal])
-        vanna_display = gr.Plot()
-        vanna_direct_question.submit(ask_vanna_query, [vanna_direct_question], [vanna_sql_query ,vanna_table, vanna_display])
-# # UI Layout Components
-def cqa_tab(tab_name):
-    # State variables
-    current_graphs = gr.State([])
-    with gr.Tab(tab_name):
-        with gr.Row(elem_id="chatbot-row"):
-            # Left column - Chat interface
-            with gr.Column(scale=2):
-                chatbot, textbox, config_button = create_chat_interface(tab_name)
-            # Right column - Content panels
-            with gr.Column(scale=2, variant="panel", elem_id="right-panel"):
-                with gr.Tabs(elem_id="right_panel_tab") as tabs:
-                    # Examples tab
-                    with gr.TabItem("Examples", elem_id="tab-examples", id=0):
-                        examples_hidden = create_examples_tab(tab_name)
-                    # Sources tab
-                    with gr.Tab("Sources", elem_id="tab-sources", id=1) as tab_sources:
-                        sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
-                    # Recommended content tab
-                    with gr.Tab("Recommended content", elem_id="tab-recommended_content", id=2) as tab_recommended_content:
-                        with gr.Tabs(elem_id="group-subtabs") as tabs_recommended_content:
-                            # Figures subtab
-                            with gr.Tab("Figures", elem_id="tab-figures", id=3) as tab_figures:
-                                sources_raw, new_figures, used_figures, gallery_component, figures_cards, figure_modal = create_figures_tab()
-                            # Papers subtab
-                            with gr.Tab("Papers", elem_id="tab-citations", id=4) as tab_papers:
-                                papers_direct_search, papers_summary, papers_html, citations_network, papers_modal = create_papers_tab()
-                            # Graphs subtab
-                            with gr.Tab("Graphs", elem_id="tab-graphs", id=5) as tab_graphs:
-                                graphs_container = gr.HTML(
-                                    "<h2>There are no graphs to be displayed at the moment. Try asking another question.</h2>",
-                                    elem_id="graphs-container"
-                                )
-    return {
-        "chatbot": chatbot,
-        "textbox": textbox,
-        "tabs": tabs,
-        "sources_raw": sources_raw,
-        "new_figures": new_figures,
-        "current_graphs": current_graphs,
-        "examples_hidden": examples_hidden,
-        "sources_textbox": sources_textbox,
-        "figures_cards": figures_cards,
-        "gallery_component": gallery_component,
-        "config_button": config_button,
-        "papers_direct_search" : papers_direct_search,
-        "papers_html": papers_html,
-        "citations_network": citations_network,
-        "papers_summary": papers_summary,
-        "tab_recommended_content": tab_recommended_content,
-        "tab_sources": tab_sources,
-        "tab_figures": tab_figures,
-        "tab_graphs": tab_graphs,
-        "tab_papers": tab_papers,
-        "graph_container": graphs_container,
-        # "vanna_sql_query": vanna_sql_query,
-        # "vanna_table" : vanna_table,
-        # "vanna_display": vanna_display
-    }
-def config_event_handling(main_tabs_components : list[dict], config_componenets : dict):
-    config_open = config_componenets["config_open"]
-    config_modal = config_componenets["config_modal"]
-    close_config_modal = config_componenets["close_config_modal_button"]
-    for button in [close_config_modal] + [main_tab_component["config_button"] for main_tab_component in main_tabs_components]:
         button.click(
             fn=update_config_modal_visibility,
             inputs=[config_open],
-            outputs=[config_modal, config_open]
-        )
 def event_handling(
-    main_tab_components,
-    config_components,
-    tab_name="ClimateQ&A"
 ):
-    chatbot = main_tab_components["chatbot"]
-    textbox = main_tab_components["textbox"]
-    tabs = main_tab_components["tabs"]
-    sources_raw = main_tab_components["sources_raw"]
-    new_figures = main_tab_components["new_figures"]
-    current_graphs = main_tab_components["current_graphs"]
-    examples_hidden = main_tab_components["examples_hidden"]
-    sources_textbox = main_tab_components["sources_textbox"]
-    figures_cards = main_tab_components["figures_cards"]
-    gallery_component = main_tab_components["gallery_component"]
-    # config_button = main_tab_components["config_button"]
-    papers_direct_search = main_tab_components["papers_direct_search"]
-    papers_html = main_tab_components["papers_html"]
-    citations_network = main_tab_components["citations_network"]
-    papers_summary = main_tab_components["papers_summary"]
-    tab_recommended_content = main_tab_components["tab_recommended_content"]
-    tab_sources = main_tab_components["tab_sources"]
-    tab_figures = main_tab_components["tab_figures"]
-    tab_graphs = main_tab_components["tab_graphs"]
-    tab_papers = main_tab_components["tab_papers"]
-    graphs_container = main_tab_components["graph_container"]
-    # vanna_sql_query = main_tab_components["vanna_sql_query"]
-    # vanna_table = main_tab_components["vanna_table"]
-    # vanna_display = main_tab_components["vanna_display"]
-    # config_open = config_components["config_open"]
-    # config_modal = config_components["config_modal"]
-    dropdown_sources = config_components["dropdown_sources"]
-    dropdown_reports = config_components["dropdown_reports"]
-    dropdown_external_sources = config_components["dropdown_external_sources"]
-    search_only = config_components["search_only"]
-    dropdown_audience = config_components["dropdown_audience"]
-    after = config_components["after"]
-    output_query = config_components["output_query"]
-    output_language = config_components["output_language"]
-    # close_config_modal = config_components["close_config_modal_button"]
     new_sources_hmtl = gr.State([])
     ttd_data = gr.State([])
-    # for button in [config_button, close_config_modal]:
-    #     button.click(
-    #         fn=update_config_modal_visibility,
-    #         inputs=[config_open],
-    #         outputs=[config_modal, config_open]
-    #     )
     if tab_name == "ClimateQ&A":
         print("chat cqa - message sent")
         # Event for textbox
-        (textbox
-            .submit(start_chat, [textbox, chatbot, search_only], [textbox, tabs, chatbot, sources_raw], queue=False, api_name=f"start_chat_{textbox.elem_id}")
-            .then(chat, [textbox, chatbot, dropdown_audience, dropdown_sources, dropdown_reports, dropdown_external_sources, search_only], [chatbot, new_sources_hmtl, output_query, output_language, new_figures, current_graphs], concurrency_limit=8, api_name=f"chat_{textbox.elem_id}")
-            .then(finish_chat, None, [textbox], api_name=f"finish_chat_{textbox.elem_id}")
         )
         # Event for examples_hidden
-        (examples_hidden
-            .change(start_chat, [examples_hidden, chatbot, search_only], [examples_hidden, tabs, chatbot, sources_raw], queue=False, api_name=f"start_chat_{examples_hidden.elem_id}")
-            .then(chat, [examples_hidden, chatbot, dropdown_audience, dropdown_sources, dropdown_reports, dropdown_external_sources, search_only], [chatbot, new_sources_hmtl, output_query, output_language, new_figures, current_graphs], concurrency_limit=8, api_name=f"chat_{examples_hidden.elem_id}")
-            .then(finish_chat, None, [textbox], api_name=f"finish_chat_{examples_hidden.elem_id}")
         )
     elif tab_name == "Beta - POC Adapt'Action":
         print("chat poc - message sent")
         # Event for textbox
-        (textbox
-            .submit(start_chat, [textbox, chatbot, search_only], [textbox, tabs, chatbot, sources_raw], queue=False, api_name=f"start_chat_{textbox.elem_id}")
-            .then(chat_poc, [textbox, chatbot, dropdown_audience, dropdown_sources, dropdown_reports, dropdown_external_sources, search_only], [chatbot, new_sources_hmtl, output_query, output_language, new_figures, current_graphs], concurrency_limit=8, api_name=f"chat_{textbox.elem_id}")
-            .then(finish_chat, None, [textbox], api_name=f"finish_chat_{textbox.elem_id}")
         )
         # Event for examples_hidden
-        (examples_hidden
-            .change(start_chat, [examples_hidden, chatbot, search_only], [examples_hidden, tabs, chatbot, sources_raw], queue=False, api_name=f"start_chat_{examples_hidden.elem_id}")
-            .then(chat_poc, [examples_hidden, chatbot, dropdown_audience, dropdown_sources, dropdown_reports, dropdown_external_sources, search_only], [chatbot, new_sources_hmtl, output_query, output_language, new_figures, current_graphs], concurrency_limit=8, api_name=f"chat_{examples_hidden.elem_id}")
-            .then(finish_chat, None, [textbox], api_name=f"finish_chat_{examples_hidden.elem_id}")
         )
-    new_sources_hmtl.change(lambda x : x, inputs = [new_sources_hmtl], outputs = [sources_textbox])
-    current_graphs.change(lambda x: x, inputs=[current_graphs], outputs=[graphs_container])
-    new_figures.change(process_figures, inputs=[sources_raw, new_figures], outputs=[sources_raw, figures_cards, gallery_component])
     # Update sources numbers
     for component in [sources_textbox, figures_cards, current_graphs, papers_html]:
-        component.change(update_sources_number_display, [sources_textbox, figures_cards, current_graphs, papers_html], [tab_recommended_content, tab_sources, tab_figures, tab_graphs, tab_papers])
     # Search for papers
     for component in [textbox, examples_hidden, papers_direct_search]:
-        component.submit(find_papers, [component, after, dropdown_external_sources], [papers_html, citations_network, papers_summary])
     # if tab_name == "Beta - POC Adapt'Action": # Not untill results are good enough
     #     # Drias search
     #     textbox.submit(ask_vanna, [textbox], [vanna_sql_query ,vanna_table, vanna_display])
 def main_ui():
     # config_open = gr.State(True)
-    with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=theme, elem_id="main-component") as demo:
-        config_components = create_config_modal()
         with gr.Tabs():
-            cqa_components = cqa_tab(tab_name = "ClimateQ&A")
-            local_cqa_components = cqa_tab(tab_name = "Beta - POC Adapt'Action")
-            create_drias_tab()
             create_about_tab()
-        event_handling(cqa_components, config_components, tab_name = 'ClimateQ&A')
-        event_handling(local_cqa_components, config_components, tab_name = "Beta - POC Adapt'Action")
-        config_event_handling([cqa_components,local_cqa_components] ,config_components)
         demo.queue()
     return demo
 demo = main_ui()
 demo.launch(ssr_mode=False)

 from climateqa.engine.llm import get_llm
 from climateqa.engine.vectorstore import get_pinecone_vectorstore
 from climateqa.engine.reranker import get_reranker
+from climateqa.engine.graph import make_graph_agent, make_graph_agent_poc
 from climateqa.engine.chains.retrieve_papers import find_papers
 from climateqa.chat import start_chat, chat_stream, finish_chat
+from front.tabs import create_config_modal, cqa_tab, create_about_tab
+from front.tabs import MainTabPanel, ConfigPanel
+from front.tabs.tab_drias import create_drias_tab
 from front.utils import process_figures
 from gradio_modal import Modal
 import logging
 logging.basicConfig(level=logging.WARNING)
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"  # Suppresses INFO and WARNING logs
 logging.getLogger().setLevel(logging.WARNING)
 # Load environment variables in local mode
 try:
     from dotenv import load_dotenv
     load_dotenv()
 except Exception as e:
     pass
 user_id = create_user_id()
 # Create vectorstore and retriever
 embeddings_function = get_embeddings_function()
+vectorstore = get_pinecone_vectorstore(
+    embeddings_function, index_name=os.getenv("PINECONE_API_INDEX")
+)
+vectorstore_graphs = get_pinecone_vectorstore(
+    embeddings_function,
+    index_name=os.getenv("PINECONE_API_INDEX_OWID"),
+    text_key="description",
+)
+vectorstore_region = get_pinecone_vectorstore(
+    embeddings_function, index_name=os.getenv("PINECONE_API_INDEX_LOCAL_V2")
+)
+llm = get_llm(provider="openai", max_tokens=1024, temperature=0.0)
 if os.environ["GRADIO_ENV"] == "local":
     reranker = get_reranker("nano")
+else:
     reranker = get_reranker("large")
+agent = make_graph_agent(
+    llm=llm,
+    vectorstore_ipcc=vectorstore,
+    vectorstore_graphs=vectorstore_graphs,
+    vectorstore_region=vectorstore_region,
+    reranker=reranker,
+    threshold_docs=0.2,
+)
+agent_poc = make_graph_agent_poc(
+    llm=llm,
+    vectorstore_ipcc=vectorstore,
+    vectorstore_graphs=vectorstore_graphs,
+    vectorstore_region=vectorstore_region,
+    reranker=reranker,
+    threshold_docs=0,
+    version="v4",
+)  # TODO put back default 0.2
+async def chat(
+    query,
+    history,
+    audience,
+    sources,
+    reports,
+    relevant_content_sources_selection,
+    search_only,
+):
     print("chat cqa - message received")
+    async for event in chat_stream(
+        agent,
+        query,
+        history,
+        audience,
+        sources,
+        reports,
+        relevant_content_sources_selection,
+        search_only,
+        share_client,
+        user_id,
+    ):
         yield event
+async def chat_poc(
+    query,
+    history,
+    audience,
+    sources,
+    reports,
+    relevant_content_sources_selection,
+    search_only,
+):
     print("chat poc - message received")
+    async for event in chat_stream(
+        agent_poc,
+        query,
+        history,
+        audience,
+        sources,
+        reports,
+        relevant_content_sources_selection,
+        search_only,
+        share_client,
+        user_id,
+    ):
         yield event
 # Gradio
 # --------------------------------------------------------------------
 # Function to update modal visibility
 def update_config_modal_visibility(config_open):
     print(config_open)
     new_config_visibility_status = not config_open
     return Modal(visible=new_config_visibility_status), new_config_visibility_status
+def update_sources_number_display(
+    sources_textbox, figures_cards, current_graphs, papers_html
+):
     sources_number = sources_textbox.count("<h2>")
     figures_number = figures_cards.count("<h2>")
     graphs_number = current_graphs.count("<iframe")
     figures_notif_label = f"Figures ({figures_number})"
     graphs_notif_label = f"Graphs ({graphs_number})"
     papers_notif_label = f"Papers ({papers_number})"
+    recommended_content_notif_label = (
+        f"Recommended content ({figures_number + graphs_number + papers_number})"
+    )
+    return (
+        gr.update(label=recommended_content_notif_label),
+        gr.update(label=sources_notif_label),
+        gr.update(label=figures_notif_label),
+        gr.update(label=graphs_notif_label),
+        gr.update(label=papers_notif_label),
+    )
+def config_event_handling(
+    main_tabs_components: list[MainTabPanel], config_componenets: ConfigPanel
+):
+    config_open = config_componenets.config_open
+    config_modal = config_componenets.config_modal
+    close_config_modal = config_componenets.close_config_modal_button
+    for button in [close_config_modal] + [
+        main_tab_component.config_button for main_tab_component in main_tabs_components
+    ]:
         button.click(
             fn=update_config_modal_visibility,
             inputs=[config_open],
+            outputs=[config_modal, config_open],
+        )
 def event_handling(
+    main_tab_components: MainTabPanel,
+    config_components: ConfigPanel,
+    tab_name="ClimateQ&A",
 ):
+    chatbot = main_tab_components.chatbot
+    textbox = main_tab_components.textbox
+    tabs = main_tab_components.tabs
+    sources_raw = main_tab_components.sources_raw
+    new_figures = main_tab_components.new_figures
+    current_graphs = main_tab_components.current_graphs
+    examples_hidden = main_tab_components.examples_hidden
+    sources_textbox = main_tab_components.sources_textbox
+    figures_cards = main_tab_components.figures_cards
+    gallery_component = main_tab_components.gallery_component
+    papers_direct_search = main_tab_components.papers_direct_search
+    papers_html = main_tab_components.papers_html
+    citations_network = main_tab_components.citations_network
+    papers_summary = main_tab_components.papers_summary
+    tab_recommended_content = main_tab_components.tab_recommended_content
+    tab_sources = main_tab_components.tab_sources
+    tab_figures = main_tab_components.tab_figures
+    tab_graphs = main_tab_components.tab_graphs
+    tab_papers = main_tab_components.tab_papers
+    graphs_container = main_tab_components.graph_container
+    follow_up_examples = main_tab_components.follow_up_examples
+    follow_up_examples_hidden = main_tab_components.follow_up_examples_hidden
+    dropdown_sources = config_components.dropdown_sources
+    dropdown_reports = config_components.dropdown_reports
+    dropdown_external_sources = config_components.dropdown_external_sources
+    search_only = config_components.search_only
+    dropdown_audience = config_components.dropdown_audience
+    after = config_components.after
+    output_query = config_components.output_query
+    output_language = config_components.output_language
     new_sources_hmtl = gr.State([])
     ttd_data = gr.State([])
     if tab_name == "ClimateQ&A":
         print("chat cqa - message sent")
         # Event for textbox
+        (
+            textbox.submit(
+                start_chat,
+                [textbox, chatbot, search_only],
+                [textbox, tabs, chatbot, sources_raw],
+                queue=False,
+                api_name=f"start_chat_{textbox.elem_id}",
+            )
+            .then(
+                chat,
+                [
+                    textbox,
+                    chatbot,
+                    dropdown_audience,
+                    dropdown_sources,
+                    dropdown_reports,
+                    dropdown_external_sources,
+                    search_only,
+                ],
+                [
+                    chatbot,
+                    new_sources_hmtl,
+                    output_query,
+                    output_language,
+                    new_figures,
+                    current_graphs,
+                    follow_up_examples.dataset,
+                ],
+                concurrency_limit=8,
+                api_name=f"chat_{textbox.elem_id}",
+            )
+            .then(
+                finish_chat, None, [textbox], api_name=f"finish_chat_{textbox.elem_id}"
+            )
         )
         # Event for examples_hidden
+        (
+            examples_hidden.change(
+                start_chat,
+                [examples_hidden, chatbot, search_only],
+                [examples_hidden, tabs, chatbot, sources_raw],
+                queue=False,
+                api_name=f"start_chat_{examples_hidden.elem_id}",
+            )
+            .then(
+                chat,
+                [
+                    examples_hidden,
+                    chatbot,
+                    dropdown_audience,
+                    dropdown_sources,
+                    dropdown_reports,
+                    dropdown_external_sources,
+                    search_only,
+                ],
+                [
+                    chatbot,
+                    new_sources_hmtl,
+                    output_query,
+                    output_language,
+                    new_figures,
+                    current_graphs,
+                    follow_up_examples.dataset,
+                ],
+                concurrency_limit=8,
+                api_name=f"chat_{examples_hidden.elem_id}",
+            )
+            .then(
+                finish_chat,
+                None,
+                [textbox],
+                api_name=f"finish_chat_{examples_hidden.elem_id}",
+            )
+        )
+        (
+            follow_up_examples_hidden.change(
+                start_chat,
+                [follow_up_examples_hidden, chatbot, search_only],
+                [follow_up_examples_hidden, tabs, chatbot, sources_raw],
+                queue=False,
+                api_name=f"start_chat_{examples_hidden.elem_id}",
+            )
+            .then(
+                chat,
+                [
+                    follow_up_examples_hidden,
+                    chatbot,
+                    dropdown_audience,
+                    dropdown_sources,
+                    dropdown_reports,
+                    dropdown_external_sources,
+                    search_only,
+                ],
+                [
+                    chatbot,
+                    new_sources_hmtl,
+                    output_query,
+                    output_language,
+                    new_figures,
+                    current_graphs,
+                    follow_up_examples.dataset,
+                ],
+                concurrency_limit=8,
+                api_name=f"chat_{examples_hidden.elem_id}",
+            )
+            .then(
+                finish_chat,
+                None,
+                [textbox],
+                api_name=f"finish_chat_{follow_up_examples_hidden.elem_id}",
+            )
         )
     elif tab_name == "Beta - POC Adapt'Action":
         print("chat poc - message sent")
         # Event for textbox
+        (
+            textbox.submit(
+                start_chat,
+                [textbox, chatbot, search_only],
+                [textbox, tabs, chatbot, sources_raw],
+                queue=False,
+                api_name=f"start_chat_{textbox.elem_id}",
+            )
+            .then(
+                chat_poc,
+                [
+                    textbox,
+                    chatbot,
+                    dropdown_audience,
+                    dropdown_sources,
+                    dropdown_reports,
+                    dropdown_external_sources,
+                    search_only,
+                ],
+                [
+                    chatbot,
+                    new_sources_hmtl,
+                    output_query,
+                    output_language,
+                    new_figures,
+                    current_graphs,
+                ],
+                concurrency_limit=8,
+                api_name=f"chat_{textbox.elem_id}",
+            )
+            .then(
+                finish_chat, None, [textbox], api_name=f"finish_chat_{textbox.elem_id}"
+            )
         )
         # Event for examples_hidden
+        (
+            examples_hidden.change(
+                start_chat,
+                [examples_hidden, chatbot, search_only],
+                [examples_hidden, tabs, chatbot, sources_raw],
+                queue=False,
+                api_name=f"start_chat_{examples_hidden.elem_id}",
+            )
+            .then(
+                chat_poc,
+                [
+                    examples_hidden,
+                    chatbot,
+                    dropdown_audience,
+                    dropdown_sources,
+                    dropdown_reports,
+                    dropdown_external_sources,
+                    search_only,
+                ],
+                [
+                    chatbot,
+                    new_sources_hmtl,
+                    output_query,
+                    output_language,
+                    new_figures,
+                    current_graphs,
+                ],
+                concurrency_limit=8,
+                api_name=f"chat_{examples_hidden.elem_id}",
+            )
+            .then(
+                finish_chat,
+                None,
+                [textbox],
+                api_name=f"finish_chat_{examples_hidden.elem_id}",
+            )
+        )
+        (
+            follow_up_examples_hidden.change(
+                start_chat,
+                [follow_up_examples_hidden, chatbot, search_only],
+                [follow_up_examples_hidden, tabs, chatbot, sources_raw],
+                queue=False,
+                api_name=f"start_chat_{examples_hidden.elem_id}",
+            )
+            .then(
+                chat,
+                [
+                    follow_up_examples_hidden,
+                    chatbot,
+                    dropdown_audience,
+                    dropdown_sources,
+                    dropdown_reports,
+                    dropdown_external_sources,
+                    search_only,
+                ],
+                [
+                    chatbot,
+                    new_sources_hmtl,
+                    output_query,
+                    output_language,
+                    new_figures,
+                    current_graphs,
+                    follow_up_examples.dataset,
+                ],
+                concurrency_limit=8,
+                api_name=f"chat_{examples_hidden.elem_id}",
+            )
+            .then(
+                finish_chat,
+                None,
+                [textbox],
+                api_name=f"finish_chat_{follow_up_examples_hidden.elem_id}",
+            )
         )
+    new_sources_hmtl.change(
+        lambda x: x, inputs=[new_sources_hmtl], outputs=[sources_textbox]
+    )
+    current_graphs.change(
+        lambda x: x, inputs=[current_graphs], outputs=[graphs_container]
+    )
+    new_figures.change(
+        process_figures,
+        inputs=[sources_raw, new_figures],
+        outputs=[sources_raw, figures_cards, gallery_component],
+    )
     # Update sources numbers
     for component in [sources_textbox, figures_cards, current_graphs, papers_html]:
+        component.change(
+            update_sources_number_display,
+            [sources_textbox, figures_cards, current_graphs, papers_html],
+            [tab_recommended_content, tab_sources, tab_figures, tab_graphs, tab_papers],
+        )
     # Search for papers
     for component in [textbox, examples_hidden, papers_direct_search]:
+        component.submit(
+            find_papers,
+            [component, after, dropdown_external_sources],
+            [papers_html, citations_network, papers_summary],
+        )
     # if tab_name == "Beta - POC Adapt'Action": # Not untill results are good enough
     #     # Drias search
     #     textbox.submit(ask_vanna, [textbox], [vanna_sql_query ,vanna_table, vanna_display])
 def main_ui():
     # config_open = gr.State(True)
+    with gr.Blocks(
+        title="Climate Q&A",
+        css_paths=os.getcwd() + "/style.css",
+        theme=theme,
+        elem_id="main-component",
+    ) as demo:
+        config_components = create_config_modal()
         with gr.Tabs():
+            cqa_components = cqa_tab(tab_name="ClimateQ&A")
+            local_cqa_components = cqa_tab(tab_name="Beta - POC Adapt'Action")
+            create_drias_tab(share_client=share_client, user_id=user_id)
             create_about_tab()
+        event_handling(cqa_components, config_components, tab_name="ClimateQ&A")
+        event_handling(
+            local_cqa_components, config_components, tab_name="Beta - POC Adapt'Action"
+        )
+        config_event_handling([cqa_components, local_cqa_components], config_components)
         demo.queue()
     return demo
 demo = main_ui()
 demo.launch(ssr_mode=False)

climateqa/chat.py CHANGED Viewed

@@ -61,6 +61,27 @@ def handle_numerical_data(event):
         return numerical_data, sql_query
     return None, None
 # Main chat function
 async def chat_stream(
     agent : CompiledStateGraph,
@@ -101,6 +122,7 @@ async def chat_stream(
     audience_prompt = init_audience(audience)
     sources = sources or ["IPCC", "IPBES"]
     reports = reports or []
     # Prepare inputs for agent
     inputs = {
@@ -109,7 +131,8 @@ async def chat_stream(
         "sources_input": sources,
         "relevant_content_sources_selection": relevant_content_sources_selection,
         "search_only": search_only,
-        "reports": reports
     }
     # Get streaming events from agent
@@ -129,6 +152,7 @@ async def chat_stream(
     retrieved_contents = []
     answer_message_content = ""
     vanna_data = {}
     # Define processing steps
     steps_display = {
@@ -200,7 +224,12 @@ async def chat_stream(
                         sub_questions = [q["question"] + "-> relevant sources : " + str(q["sources"]) for q in event["data"]["output"]["questions_list"]]
                         history[-1].content += "Decompose question into sub-questions:\n\n - " + "\n - ".join(sub_questions)
-            yield history, docs_html, output_query, output_language, related_contents, graphs_html#, vanna_data
     except Exception as e:
         print(f"Event {event} has failed")
@@ -211,4 +240,4 @@ async def chat_stream(
     # Call the function to log interaction
     log_interaction_to_azure(history, output_query, sources, docs, share_client, user_id)
-    yield history, docs_html, output_query, output_language, related_contents, graphs_html#, vanna_data

         return numerical_data, sql_query
     return None, None
+def log_drias_interaction_to_azure(query, sql_query, data, share_client, user_id):
+    try:
+        # Log interaction to Azure if not in local environment
+        if os.getenv("GRADIO_ENV") != "local":
+            timestamp = str(datetime.now().timestamp())
+            logs = {
+                "user_id": str(user_id),
+                "query": query,
+                "sql_query": sql_query,
+                # "data": data.to_dict() if data is not None else None,
+                "time": timestamp,
+            }
+            log_on_azure(f"drias_{timestamp}.json", logs, share_client)
+            print(f"Logged Drias interaction to Azure Blob Storage: {logs}")
+        else:
+            print("share_client or user_id is None, or GRADIO_ENV is local")
+    except Exception as e:
+        print(f"Error logging Drias interaction on Azure Blob Storage: {e}")
+        error_msg = f"Drias Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)"
+        raise gr.Error(error_msg)
 # Main chat function
 async def chat_stream(
     agent : CompiledStateGraph,
     audience_prompt = init_audience(audience)
     sources = sources or ["IPCC", "IPBES"]
     reports = reports or []
+    relevant_history_discussion = history[-2:] if len(history) > 1 else []
     # Prepare inputs for agent
     inputs = {
         "sources_input": sources,
         "relevant_content_sources_selection": relevant_content_sources_selection,
         "search_only": search_only,
+        "reports": reports,
+        "chat_history": relevant_history_discussion,
     }
     # Get streaming events from agent
     retrieved_contents = []
     answer_message_content = ""
     vanna_data = {}
+    follow_up_examples = gr.Dataset(samples=[])
     # Define processing steps
     steps_display = {
                         sub_questions = [q["question"] + "-> relevant sources : " + str(q["sources"]) for q in event["data"]["output"]["questions_list"]]
                         history[-1].content += "Decompose question into sub-questions:\n\n - " + "\n - ".join(sub_questions)
+                # Handle follow up questions
+                if event["name"] == "generate_follow_up" and event["event"] == "on_chain_end":
+                    follow_up_examples = event["data"]["output"].get("follow_up_questions", [])
+                    follow_up_examples = gr.Dataset(samples= [ [question] for question in follow_up_examples ])
+            yield history, docs_html, output_query, output_language, related_contents, graphs_html, follow_up_examples#, vanna_data
     except Exception as e:
         print(f"Event {event} has failed")
     # Call the function to log interaction
     log_interaction_to_azure(history, output_query, sources, docs, share_client, user_id)
+    yield history, docs_html, output_query, output_language, related_contents, graphs_html, follow_up_examples#, vanna_data

climateqa/engine/chains/answer_rag.py CHANGED Viewed

@@ -65,6 +65,7 @@ def make_rag_node(llm,with_docs = True):
     async def answer_rag(state,config):
         print("---- Answer RAG ----")
         start_time = time.time()
         print("Sources used : " +  "\n".join([x.metadata["short_name"] + " - page " + str(x.metadata["page_number"])  for x in state["documents"]]))
         answer = await rag_chain.ainvoke(state,config)
@@ -73,9 +74,10 @@ def make_rag_node(llm,with_docs = True):
         elapsed_time = end_time - start_time
         print("RAG elapsed time: ", elapsed_time)
         print("Answer size : ", len(answer))
-        # print(f"\n\nAnswer:\n{answer}")
-        return {"answer":answer}
     return answer_rag

     async def answer_rag(state,config):
         print("---- Answer RAG ----")
         start_time = time.time()
+        chat_history = state.get("chat_history",[])
         print("Sources used : " +  "\n".join([x.metadata["short_name"] + " - page " + str(x.metadata["page_number"])  for x in state["documents"]]))
         answer = await rag_chain.ainvoke(state,config)
         elapsed_time = end_time - start_time
         print("RAG elapsed time: ", elapsed_time)
         print("Answer size : ", len(answer))
+        chat_history.append({"question":state["query"],"answer":answer})
+        return {"answer":answer,"chat_history": chat_history}
     return answer_rag

climateqa/engine/chains/follow_up.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from typing import List
+from langchain.prompts import ChatPromptTemplate
+FOLLOW_UP_TEMPLATE = """Based on the previous question and answer, generate 2-3 relevant follow-up questions that would help explore the topic further.
+Previous Question: {user_input}
+Previous Answer: {answer}
+Generate short, concise, focused follow-up questions
+You don't need a full question as it will be reformulated later as a standalone question with the context. Eg. "Details the first point"
+"""
+def make_follow_up_node(llm):
+    prompt = ChatPromptTemplate.from_template(FOLLOW_UP_TEMPLATE)
+    def generate_follow_up(state):
+        print("---- Generate_follow_up ----")
+        if not state.get("answer"):
+            return state
+        response = llm.invoke(prompt.format(
+            user_input=state["user_input"],
+            answer=state["answer"]
+        ))
+        # Extract questions from response
+        follow_ups = [q.strip() for q in response.content.split("\n") if q.strip()]
+        state["follow_up_questions"] = follow_ups
+        return state
+    return generate_follow_up

climateqa/engine/chains/intent_categorization.py CHANGED Viewed

@@ -1,4 +1,3 @@
 from langchain_core.pydantic_v1 import BaseModel, Field
 from typing import List
 from typing import Literal
@@ -44,7 +43,7 @@ def make_intent_categorization_chain(llm):
     llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"IntentCategorizer"})
     prompt = ChatPromptTemplate.from_messages([
-        ("system", "You are a helpful assistant, you will analyze, translate and categorize the user input message using the function provided. Categorize the user input as ai ONLY if it is related to Artificial Intelligence, search if it is related to the environment, climate change, energy, biodiversity, nature, etc. and chitchat if it is just general conversation."),
         ("user", "input: {input}")
     ])
@@ -58,11 +57,19 @@ def make_intent_categorization_node(llm):
     def categorize_message(state):
         print("---- Categorize_message ----")
         output = categorization_chain.invoke({"input": state["user_input"]})
-        print(f"\n\nOutput intent categorization: {output}\n")
-        if "language" not in output: output["language"] = "English"
         output["query"] = state["user_input"]
         return output
     return categorize_message

 from langchain_core.pydantic_v1 import BaseModel, Field
 from typing import List
 from typing import Literal
     llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"IntentCategorizer"})
     prompt = ChatPromptTemplate.from_messages([
+        ("system", "You are a helpful assistant, you will analyze, detect the language, and categorize the user input message using the function provided. You MUST detect and return the language of the input message. Categorize the user input as ai ONLY if it is related to Artificial Intelligence, search if it is related to the environment, climate change, energy, biodiversity, nature, etc. and chitchat if it is just general conversation."),
         ("user", "input: {input}")
     ])
     def categorize_message(state):
         print("---- Categorize_message ----")
+        print(f"Input state: {state}")
         output = categorization_chain.invoke({"input": state["user_input"]})
+        print(f"\n\nRaw output from categorization: {output}\n")
+        if "language" not in output:
+            print("WARNING: Language field missing from output, setting default to English")
+            output["language"] = "English"
+        else:
+            print(f"Language detected: {output['language']}")
         output["query"] = state["user_input"]
+        print(f"Final output: {output}")
         return output
     return categorize_message

climateqa/engine/chains/retrieve_documents.py CHANGED Viewed

@@ -621,10 +621,7 @@ def make_IPx_retriever_node(vectorstore,reranker,llm,rerank_by_question=True, k_
 def make_POC_retriever_node(vectorstore,reranker,llm,rerank_by_question=True, k_final=15, k_before_reranking=100, k_summary=5):
-    async def retrieve_POC_docs_node(state, config):
-        if "POC region" not in state["relevant_content_sources_selection"]  :
-            return {}
         source_type = "POC"
         POC_questions_index = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "POC"]
@@ -665,10 +662,7 @@ def make_POC_by_ToC_retriever_node(
         k_summary=5,
     ):
-    async def retrieve_POC_docs_node(state, config):
-        if "POC region" not in state["relevant_content_sources_selection"]  :
-            return {}
         search_figures = "Figures (IPCC/IPBES)" in state["relevant_content_sources_selection"]
         search_only = state["search_only"]
         search_only = state["search_only"]

 def make_POC_retriever_node(vectorstore,reranker,llm,rerank_by_question=True, k_final=15, k_before_reranking=100, k_summary=5):
+    async def retrieve_POC_docs_node(state, config):
         source_type = "POC"
         POC_questions_index = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "POC"]
         k_summary=5,
     ):
+    async def retrieve_POC_docs_node(state, config):
         search_figures = "Figures (IPCC/IPBES)" in state["relevant_content_sources_selection"]
         search_only = state["search_only"]
         search_only = state["search_only"]

climateqa/engine/chains/standalone_question.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from langchain.prompts import ChatPromptTemplate
+def make_standalone_question_chain(llm):
+    prompt = ChatPromptTemplate.from_messages([
+        ("system", """You are a helpful assistant that transforms user questions into standalone questions
+        by incorporating context from the chat history if needed. The output should be a self-contained
+        question that can be understood without any additional context.
+        Examples:
+        Chat History: "Let's talk about renewable energy"
+        User Input: "What about solar?"
+        Output: "What are the key aspects of solar energy as a renewable energy source?"
+        Chat History: "What causes global warming?"
+        User Input: "And what are its effects?"
+        Output: "What are the effects of global warming on the environment and society?"
+        """),
+        ("user", """Chat History: {chat_history}
+        User Question: {question}
+        Transform this into a standalone question:
+        Make sure to keep the original language of the question.""")
+    ])
+    chain = prompt | llm
+    return chain
+def make_standalone_question_node(llm):
+    standalone_chain = make_standalone_question_chain(llm)
+    def transform_to_standalone(state):
+        chat_history = state.get("chat_history", "")
+        if chat_history == "":
+            return {}
+        output = standalone_chain.invoke({
+            "chat_history": chat_history,
+            "question": state["user_input"]
+        })
+        state["user_input"] = output.content
+        return state
+    return transform_to_standalone

climateqa/engine/graph.py CHANGED Viewed

@@ -23,13 +23,15 @@ from .chains.retrieve_documents import make_IPx_retriever_node, make_POC_retriev
 from .chains.answer_rag import make_rag_node
 from .chains.graph_retriever import make_graph_retriever_node
 from .chains.chitchat_categorization import make_chitchat_intent_categorization_node
-# from .chains.set_defaults import set_defaults
 class GraphState(TypedDict):
     """
     Represents the state of our graph.
     """
     user_input : str
     language : str
     intent : str
     search_graphs_chitchat : bool
@@ -49,6 +51,7 @@ class GraphState(TypedDict):
     recommended_content : List[Document] # OWID Graphs  # TODO merge with related_contents
     search_only : bool = False
     reports : List[str] = []
 def dummy(state):
     return
@@ -100,15 +103,6 @@ def route_continue_retrieve_documents(state):
     else:
         return "retrieve_documents"
-def route_continue_retrieve_local_documents(state):
-    index_question_poc = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "POC"]
-    questions_poc_finished = all(elem in state["handled_questions_index"] for elem in index_question_poc)
-    # if questions_poc_finished and state["search_only"]:
-    #     return END
-    if questions_poc_finished or ("POC region" not in state["relevant_content_sources_selection"]):
-        return "end_retrieve_local_documents"
-    else:
-        return "retrieve_local_data"
 def route_retrieve_documents(state):
     sources_to_retrieve = []
@@ -120,6 +114,11 @@ def route_retrieve_documents(state):
         return END
     return sources_to_retrieve
 def make_id_dict(values):
     return {k:k for k in values}
@@ -128,6 +127,7 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_regi
     workflow = StateGraph(GraphState)
     # Define the node functions
     categorize_intent = make_intent_categorization_node(llm)
     transform_query = make_query_transform_node(llm)
     translate_query = make_translation_node(llm)
@@ -139,9 +139,11 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_regi
     answer_rag = make_rag_node(llm, with_docs=True)
     answer_rag_no_docs = make_rag_node(llm, with_docs=False)
     chitchat_categorize_intent = make_chitchat_intent_categorization_node(llm)
     # Define the nodes
     # workflow.add_node("set_defaults", set_defaults)
     workflow.add_node("categorize_intent", categorize_intent)
     workflow.add_node("answer_climate", dummy)
     workflow.add_node("answer_search", answer_search)
@@ -155,9 +157,11 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_regi
     workflow.add_node("retrieve_documents", retrieve_documents)
     workflow.add_node("answer_rag", answer_rag)
     workflow.add_node("answer_rag_no_docs", answer_rag_no_docs)
     # Entry point
-    workflow.set_entry_point("categorize_intent")
     # CONDITIONAL EDGES
     workflow.add_conditional_edges(
@@ -189,20 +193,29 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_regi
         make_id_dict(["retrieve_graphs", END])
     )
     # Define the edges
     workflow.add_edge("translate_query", "transform_query")
     workflow.add_edge("transform_query", "retrieve_documents") #TODO put back
     # workflow.add_edge("transform_query", "retrieve_local_data")
     # workflow.add_edge("transform_query", END) # TODO remove
     workflow.add_edge("retrieve_graphs", END)
-    workflow.add_edge("answer_rag", END)
-    workflow.add_edge("answer_rag_no_docs", END)
     workflow.add_edge("answer_chitchat", "chitchat_categorize_intent")
     workflow.add_edge("retrieve_graphs_chitchat", END)
     # workflow.add_edge("retrieve_local_data", "answer_search")
     workflow.add_edge("retrieve_documents", "answer_search")
     # Compile
     app = workflow.compile()
@@ -228,6 +241,8 @@ def make_graph_agent_poc(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_
     workflow = StateGraph(GraphState)
     # Define the node functions
     categorize_intent = make_intent_categorization_node(llm)
     transform_query = make_query_transform_node(llm)
     translate_query = make_translation_node(llm)
@@ -240,9 +255,11 @@ def make_graph_agent_poc(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_
     answer_rag = make_rag_node(llm, with_docs=True)
     answer_rag_no_docs = make_rag_node(llm, with_docs=False)
     chitchat_categorize_intent = make_chitchat_intent_categorization_node(llm)
     # Define the nodes
     # workflow.add_node("set_defaults", set_defaults)
     workflow.add_node("categorize_intent", categorize_intent)
     workflow.add_node("answer_climate", dummy)
     workflow.add_node("answer_search", answer_search)
@@ -258,9 +275,10 @@ def make_graph_agent_poc(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_
     workflow.add_node("retrieve_documents", retrieve_documents)
     workflow.add_node("answer_rag", answer_rag)
     workflow.add_node("answer_rag_no_docs", answer_rag_no_docs)
     # Entry point
-    workflow.set_entry_point("categorize_intent")
     # CONDITIONAL EDGES
     workflow.add_conditional_edges(
@@ -293,22 +311,21 @@ def make_graph_agent_poc(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_
     )
     # Define the edges
     workflow.add_edge("translate_query", "transform_query")
     workflow.add_edge("transform_query", "retrieve_documents") #TODO put back
     workflow.add_edge("transform_query", "retrieve_local_data")
     # workflow.add_edge("transform_query", END) # TODO remove
     workflow.add_edge("retrieve_graphs", END)
-    workflow.add_edge("answer_rag", END)
-    workflow.add_edge("answer_rag_no_docs", END)
     workflow.add_edge("answer_chitchat", "chitchat_categorize_intent")
     workflow.add_edge("retrieve_graphs_chitchat", END)
     workflow.add_edge("retrieve_local_data", "answer_search")
     workflow.add_edge("retrieve_documents", "answer_search")
-    # workflow.add_edge("transform_query", "retrieve_drias_data")
-    # workflow.add_edge("retrieve_drias_data", END)
     # Compile

 from .chains.answer_rag import make_rag_node
 from .chains.graph_retriever import make_graph_retriever_node
 from .chains.chitchat_categorization import make_chitchat_intent_categorization_node
+from .chains.standalone_question import make_standalone_question_node
+from .chains.follow_up import make_follow_up_node  # Add this import
 class GraphState(TypedDict):
     """
     Represents the state of our graph.
     """
     user_input : str
+    chat_history : str
     language : str
     intent : str
     search_graphs_chitchat : bool
     recommended_content : List[Document] # OWID Graphs  # TODO merge with related_contents
     search_only : bool = False
     reports : List[str] = []
+    follow_up_questions: List[str] = []
 def dummy(state):
     return
     else:
         return "retrieve_documents"
 def route_retrieve_documents(state):
     sources_to_retrieve = []
         return END
     return sources_to_retrieve
+def route_follow_up(state):
+    if state["follow_up_questions"]:
+        return "process_follow_up"
+    return END
 def make_id_dict(values):
     return {k:k for k in values}
     workflow = StateGraph(GraphState)
     # Define the node functions
+    standalone_question_node = make_standalone_question_node(llm)
     categorize_intent = make_intent_categorization_node(llm)
     transform_query = make_query_transform_node(llm)
     translate_query = make_translation_node(llm)
     answer_rag = make_rag_node(llm, with_docs=True)
     answer_rag_no_docs = make_rag_node(llm, with_docs=False)
     chitchat_categorize_intent = make_chitchat_intent_categorization_node(llm)
+    generate_follow_up = make_follow_up_node(llm)
     # Define the nodes
     # workflow.add_node("set_defaults", set_defaults)
+    workflow.add_node("standalone_question", standalone_question_node)
     workflow.add_node("categorize_intent", categorize_intent)
     workflow.add_node("answer_climate", dummy)
     workflow.add_node("answer_search", answer_search)
     workflow.add_node("retrieve_documents", retrieve_documents)
     workflow.add_node("answer_rag", answer_rag)
     workflow.add_node("answer_rag_no_docs", answer_rag_no_docs)
+    workflow.add_node("generate_follow_up", generate_follow_up)
+    # workflow.add_node("process_follow_up", standalone_question_node)
     # Entry point
+    workflow.set_entry_point("standalone_question")
     # CONDITIONAL EDGES
     workflow.add_conditional_edges(
         make_id_dict(["retrieve_graphs", END])
     )
+    # workflow.add_conditional_edges(
+    #     "generate_follow_up",
+    #     route_follow_up,
+    #     make_id_dict(["process_follow_up", END])
+    # )
     # Define the edges
+    workflow.add_edge("standalone_question", "categorize_intent")
     workflow.add_edge("translate_query", "transform_query")
     workflow.add_edge("transform_query", "retrieve_documents") #TODO put back
     # workflow.add_edge("transform_query", "retrieve_local_data")
     # workflow.add_edge("transform_query", END) # TODO remove
     workflow.add_edge("retrieve_graphs", END)
+    workflow.add_edge("answer_rag", "generate_follow_up")
+    workflow.add_edge("answer_rag_no_docs", "generate_follow_up")
     workflow.add_edge("answer_chitchat", "chitchat_categorize_intent")
     workflow.add_edge("retrieve_graphs_chitchat", END)
     # workflow.add_edge("retrieve_local_data", "answer_search")
     workflow.add_edge("retrieve_documents", "answer_search")
+    workflow.add_edge("generate_follow_up",END)
+    # workflow.add_edge("process_follow_up", "categorize_intent")
     # Compile
     app = workflow.compile()
     workflow = StateGraph(GraphState)
     # Define the node functions
+    standalone_question_node = make_standalone_question_node(llm)
     categorize_intent = make_intent_categorization_node(llm)
     transform_query = make_query_transform_node(llm)
     translate_query = make_translation_node(llm)
     answer_rag = make_rag_node(llm, with_docs=True)
     answer_rag_no_docs = make_rag_node(llm, with_docs=False)
     chitchat_categorize_intent = make_chitchat_intent_categorization_node(llm)
+    generate_follow_up = make_follow_up_node(llm)
     # Define the nodes
     # workflow.add_node("set_defaults", set_defaults)
+    workflow.add_node("standalone_question", standalone_question_node)
     workflow.add_node("categorize_intent", categorize_intent)
     workflow.add_node("answer_climate", dummy)
     workflow.add_node("answer_search", answer_search)
     workflow.add_node("retrieve_documents", retrieve_documents)
     workflow.add_node("answer_rag", answer_rag)
     workflow.add_node("answer_rag_no_docs", answer_rag_no_docs)
+    workflow.add_node("generate_follow_up", generate_follow_up)
     # Entry point
+    workflow.set_entry_point("standalone_question")
     # CONDITIONAL EDGES
     workflow.add_conditional_edges(
     )
     # Define the edges
+    workflow.add_edge("standalone_question", "categorize_intent")
     workflow.add_edge("translate_query", "transform_query")
     workflow.add_edge("transform_query", "retrieve_documents") #TODO put back
     workflow.add_edge("transform_query", "retrieve_local_data")
     # workflow.add_edge("transform_query", END) # TODO remove
     workflow.add_edge("retrieve_graphs", END)
+    workflow.add_edge("answer_rag", "generate_follow_up")
+    workflow.add_edge("answer_rag_no_docs", "generate_follow_up")
     workflow.add_edge("answer_chitchat", "chitchat_categorize_intent")
     workflow.add_edge("retrieve_graphs_chitchat", END)
     workflow.add_edge("retrieve_local_data", "answer_search")
     workflow.add_edge("retrieve_documents", "answer_search")
+    workflow.add_edge("generate_follow_up",END)
     # Compile

climateqa/engine/talk_to_data/config.py ADDED Viewed

	@@ -0,0 +1,99 @@

+DRIAS_TABLES = [
+    "total_winter_precipitation",
+    "total_summer_precipiation",
+    "total_annual_precipitation",
+    "total_remarkable_daily_precipitation",
+    "frequency_of_remarkable_daily_precipitation",
+    "extreme_precipitation_intensity",
+    "mean_winter_temperature",
+    "mean_summer_temperature",
+    "mean_annual_temperature",
+    "number_of_tropical_nights",
+    "maximum_summer_temperature",
+    "number_of_days_with_tx_above_30",
+    "number_of_days_with_tx_above_35",
+    "number_of_days_with_a_dry_ground",
+]
+INDICATOR_COLUMNS_PER_TABLE = {
+    "total_winter_precipitation": "total_winter_precipitation",
+    "total_summer_precipiation": "total_summer_precipitation",
+    "total_annual_precipitation": "total_annual_precipitation",
+    "total_remarkable_daily_precipitation": "total_remarkable_daily_precipitation",
+    "frequency_of_remarkable_daily_precipitation": "frequency_of_remarkable_daily_precipitation",
+    "extreme_precipitation_intensity": "extreme_precipitation_intensity",
+    "mean_winter_temperature": "mean_winter_temperature",
+    "mean_summer_temperature": "mean_summer_temperature",
+    "mean_annual_temperature": "mean_annual_temperature",
+    "number_of_tropical_nights": "number_tropical_nights",
+    "maximum_summer_temperature": "maximum_summer_temperature",
+    "number_of_days_with_tx_above_30": "number_of_days_with_tx_above_30",
+    "number_of_days_with_tx_above_35": "number_of_days_with_tx_above_35",
+    "number_of_days_with_a_dry_ground": "number_of_days_with_dry_ground"
+}
+DRIAS_MODELS = [
+    'ALL',
+    'RegCM4-6_MPI-ESM-LR',
+    'RACMO22E_EC-EARTH',
+    'RegCM4-6_HadGEM2-ES',
+    'HadREM3-GA7_EC-EARTH',
+    'HadREM3-GA7_CNRM-CM5',
+    'REMO2015_NorESM1-M',
+    'SMHI-RCA4_EC-EARTH',
+    'WRF381P_NorESM1-M',
+    'ALADIN63_CNRM-CM5',
+    'CCLM4-8-17_MPI-ESM-LR',
+    'HIRHAM5_IPSL-CM5A-MR',
+    'HadREM3-GA7_HadGEM2-ES',
+    'SMHI-RCA4_IPSL-CM5A-MR',
+    'HIRHAM5_NorESM1-M',
+    'REMO2009_MPI-ESM-LR',
+    'CCLM4-8-17_HadGEM2-ES'
+]
+# Mapping between indicator columns and their units
+INDICATOR_TO_UNIT = {
+    "total_winter_precipitation": "mm",
+    "total_summer_precipitation": "mm",
+    "total_annual_precipitation": "mm",
+    "total_remarkable_daily_precipitation": "mm",
+    "frequency_of_remarkable_daily_precipitation": "days",
+    "extreme_precipitation_intensity": "mm",
+    "mean_winter_temperature": "°C",
+    "mean_summer_temperature": "°C",
+    "mean_annual_temperature": "°C",
+    "number_tropical_nights": "days",
+    "maximum_summer_temperature": "°C",
+    "number_of_days_with_tx_above_30": "days",
+    "number_of_days_with_tx_above_35": "days",
+    "number_of_days_with_dry_ground": "days"
+}
+DRIAS_UI_TEXT = """
+Hi, I'm **Talk to Drias**, designed to answer your questions using [**DRIAS - TRACC 2023**](https://www.drias-climat.fr/accompagnement/sections/401) data.
+I'll answer by displaying a list of SQL queries, graphs and data most relevant to your question.
+❓ **How to use?**
+You can ask me anything about these climate indicators: **temperature**, **precipitation** or **drought**.
+You can specify **location** and/or **year**.
+You can choose from a list of climate models. By default, we take the **average of each model**.
+For example, you can ask:
+- What will the temperature be like in Paris?
+- What will be the total rainfall in France in 2030?
+- How frequent will extreme events be in Lyon?
+**Example of indicators in the data**:
+- Mean temperature (annual, winter, summer)
+- Total precipitation (annual, winter, summer)
+- Number of days with remarkable precipitations, with dry ground, with temperature above 30°C
+⚠️ **Limitations**:
+- You can't ask anything that isn't related to **DRIAS - TRACC 2023** data.
+- You can only ask about **locations in France**.
+- If you specify a year, there may be **no data for that year for some models**.
+- You **cannot compare two models**.
+🛈 **Information**
+Please note that we **log your questions for meta-analysis purposes**, so avoid sharing any sensitive or personal information.
+"""

climateqa/engine/talk_to_data/main.py CHANGED Viewed

@@ -1,47 +1,115 @@
-from climateqa.engine.talk_to_data.myVanna import MyVanna
-from climateqa.engine.talk_to_data.utils import loc2coords, detect_location_with_openai, detectTable, nearestNeighbourSQL, detect_relevant_tables, replace_coordonates
-import sqlite3
-import os
-import pandas as pd
 from climateqa.engine.llm import get_llm
 import ast
 llm = get_llm(provider="openai")
-def ask_llm_to_add_table_names(sql_query, llm):
     sql_with_table_names = llm.invoke(f"Make the following sql query display the source table in the rows {sql_query}. Just answer the query. The answer should not include ```sql\n").content
     return sql_with_table_names
-def ask_llm_column_names(sql_query, llm):
     columns = llm.invoke(f"From the given sql query, list the columns that are being selected. The answer should only be a python list. Just answer the list. The SQL query : {sql_query}").content
     columns_list = ast.literal_eval(columns.strip("```python\n").strip())
     return columns_list
-def ask_vanna(vn,db_vanna_path, query):
-    try :
-        location = detect_location_with_openai(query)
-        if location:
-            coords = loc2coords(location)
-            user_input = query.lower().replace(location.lower(), f"lat, long : {coords}")
-            relevant_tables = detect_relevant_tables(user_input, llm)
-            coords_tables = [nearestNeighbourSQL(db_vanna_path, coords, relevant_tables[i]) for i in range(len(relevant_tables))]
-            user_input_with_coords = replace_coordonates(coords, user_input, coords_tables)
-            sql_query, result_dataframe, figure = vn.ask(user_input_with_coords, print_results=False, allow_llm_to_see_data=True, auto_train=False)
-            return sql_query, result_dataframe, figure
-        else :
-            empty_df = pd.DataFrame()
-            empty_fig = None
-            return "", empty_df, empty_fig
-    except Exception as e:
-        print(f"Error: {e}")
-        empty_df = pd.DataFrame()
-        empty_fig = None
-        return "", empty_df, empty_fig

+from climateqa.engine.talk_to_data.workflow import drias_workflow
 from climateqa.engine.llm import get_llm
 import ast
 llm = get_llm(provider="openai")
+def ask_llm_to_add_table_names(sql_query: str, llm) -> str:
+    """Adds table names to the SQL query result rows using LLM.
+    This function modifies the SQL query to include the source table name in each row
+    of the result set, making it easier to track which data comes from which table.
+    Args:
+        sql_query (str): The original SQL query to modify
+        llm: The language model instance to use for generating the modified query
+    Returns:
+        str: The modified SQL query with table names included in the result rows
+    """
     sql_with_table_names = llm.invoke(f"Make the following sql query display the source table in the rows {sql_query}. Just answer the query. The answer should not include ```sql\n").content
     return sql_with_table_names
+def ask_llm_column_names(sql_query: str, llm) -> list[str]:
+    """Extracts column names from a SQL query using LLM.
+    This function analyzes a SQL query to identify which columns are being selected
+    in the result set.
+    Args:
+        sql_query (str): The SQL query to analyze
+        llm: The language model instance to use for column extraction
+    Returns:
+        list[str]: A list of column names being selected in the query
+    """
     columns = llm.invoke(f"From the given sql query, list the columns that are being selected. The answer should only be a python list. Just answer the list. The SQL query : {sql_query}").content
     columns_list = ast.literal_eval(columns.strip("```python\n").strip())
     return columns_list
+async def ask_drias(query: str, index_state: int = 0) -> tuple:
+    """Main function to process a DRIAS query and return results.
+    This function orchestrates the DRIAS workflow, processing a user query to generate
+    SQL queries, dataframes, and visualizations. It handles multiple results and allows
+    pagination through them.
+    Args:
+        query (str): The user's question about climate data
+        index_state (int, optional): The index of the result to return. Defaults to 0.
+    Returns:
+        tuple: A tuple containing:
+            - sql_query (str): The SQL query used
+            - dataframe (pd.DataFrame): The resulting data
+            - figure (Callable): Function to generate the visualization
+            - sql_queries (list): All generated SQL queries
+            - result_dataframes (list): All resulting dataframes
+            - figures (list): All figure generation functions
+            - index_state (int): Current result index
+            - table_list (list): List of table names used
+            - error (str): Error message if any
+    """
+    final_state = await drias_workflow(query)
+    sql_queries = []
+    result_dataframes = []
+    figures = []
+    table_list = []
+    for plot_state in final_state['plot_states'].values():
+        for table_state in plot_state['table_states'].values():
+            if table_state['status'] == 'OK':
+                if 'table_name' in table_state:
+                    table_list.append(' '.join(table_state['table_name'].capitalize().split('_')))
+                if 'sql_query' in table_state and table_state['sql_query'] is not None:
+                    sql_queries.append(table_state['sql_query'])
+                if 'dataframe' in table_state and table_state['dataframe'] is not None:
+                    result_dataframes.append(table_state['dataframe'])
+                    if 'figure' in table_state and table_state['figure'] is not None:
+                        figures.append(table_state['figure'])
+    if "error" in final_state and final_state["error"] != "":
+        return None, None, None, [], [], [], 0, final_state["error"]
+    sql_query = sql_queries[index_state]
+    dataframe = result_dataframes[index_state]
+    figure = figures[index_state](dataframe)
+    return sql_query, dataframe, figure, sql_queries, result_dataframes, figures, index_state, table_list, ""
+# def ask_vanna(vn,db_vanna_path, query):
+#     try :
+#         location = detect_location_with_openai(query)
+#         if location:
+#             coords = loc2coords(location)
+#             user_input = query.lower().replace(location.lower(), f"lat, long : {coords}")
+#             relevant_tables = detect_relevant_tables(db_vanna_path, user_input, llm)
+#             coords_tables = [nearestNeighbourSQL(db_vanna_path, coords, relevant_tables[i]) for i in range(len(relevant_tables))]
+#             user_input_with_coords = replace_coordonates(coords, user_input, coords_tables)
+            # sql_query, result_dataframe, figure = vn.ask(user_input_with_coords, print_results=False, allow_llm_to_see_data=True, auto_train=False)
+#             return sql_query, result_dataframe, figure
+#         else :
+#             empty_df = pd.DataFrame()
+#             empty_fig = None
+#             return "", empty_df, empty_fig
+#     except Exception as e:
+#         print(f"Error: {e}")
+#         empty_df = pd.DataFrame()
+#         empty_fig = None
+#         return "", empty_df, empty_fig

climateqa/engine/talk_to_data/plot.py ADDED Viewed

	@@ -0,0 +1,402 @@

+from typing import Callable, TypedDict
+from matplotlib.figure import figaspect
+import pandas as pd
+from plotly.graph_objects import Figure
+import plotly.graph_objects as go
+import plotly.express as px
+from climateqa.engine.talk_to_data.sql_query import (
+    indicator_for_given_year_query,
+    indicator_per_year_at_location_query,
+)
+from climateqa.engine.talk_to_data.config import INDICATOR_TO_UNIT
+class Plot(TypedDict):
+    """Represents a plot configuration in the DRIAS system.
+    This class defines the structure for configuring different types of plots
+    that can be generated from climate data.
+    Attributes:
+        name (str): The name of the plot type
+        description (str): A description of what the plot shows
+        params (list[str]): List of required parameters for the plot
+        plot_function (Callable[..., Callable[..., Figure]]): Function to generate the plot
+        sql_query (Callable[..., str]): Function to generate the SQL query for the plot
+    """
+    name: str
+    description: str
+    params: list[str]
+    plot_function: Callable[..., Callable[..., Figure]]
+    sql_query: Callable[..., str]
+def plot_indicator_evolution_at_location(params: dict) -> Callable[..., Figure]:
+    """Generates a function to plot indicator evolution over time at a location.
+    This function creates a line plot showing how a climate indicator changes
+    over time at a specific location. It handles temperature, precipitation,
+    and other climate indicators.
+    Args:
+        params (dict): Dictionary containing:
+            - indicator_column (str): The column name for the indicator
+            - location (str): The location to plot
+            - model (str): The climate model to use
+    Returns:
+        Callable[..., Figure]: A function that takes a DataFrame and returns a plotly Figure
+    Example:
+        >>> plot_func = plot_indicator_evolution_at_location({
+        ...     'indicator_column': 'mean_temperature',
+        ...     'location': 'Paris',
+        ...     'model': 'ALL'
+        ... })
+        >>> fig = plot_func(df)
+    """
+    indicator = params["indicator_column"]
+    location = params["location"]
+    indicator_label = " ".join([word.capitalize() for word in indicator.split("_")])
+    unit = INDICATOR_TO_UNIT.get(indicator, "")
+    def plot_data(df: pd.DataFrame) -> Figure:
+        """Generates the actual plot from the data.
+        Args:
+            df (pd.DataFrame): DataFrame containing the data to plot
+        Returns:
+            Figure: A plotly Figure object showing the indicator evolution
+        """
+        fig = go.Figure()
+        if df['model'].nunique() != 1:
+            df_avg = df.groupby("year", as_index=False)[indicator].mean()
+            # Transform to list to avoid pandas encoding
+            indicators = df_avg[indicator].astype(float).tolist()
+            years = df_avg["year"].astype(int).tolist()
+            # Compute the 10-year rolling average
+            sliding_averages = (
+                df_avg[indicator]
+                .rolling(window=10, min_periods=1)
+                .mean()
+                .astype(float)
+                .tolist()
+            )
+            model_label = "Model Average"
+        else:
+            df_model = df
+            # Transform to list to avoid pandas encoding
+            indicators = df_model[indicator].astype(float).tolist()
+            years = df_model["year"].astype(int).tolist()
+            # Compute the 10-year rolling average
+            sliding_averages = (
+                df_model[indicator]
+                .rolling(window=10, min_periods=1)
+                .mean()
+                .astype(float)
+                .tolist()
+            )
+            model_label = f"Model : {df['model'].unique()[0]}"
+        # Indicator per year plot
+        fig.add_scatter(
+            x=years,
+            y=indicators,
+            name=f"Yearly {indicator_label}",
+            mode="lines",
+            marker=dict(color="#1f77b4"),
+            hovertemplate=f"{indicator_label}: %{{y:.2f}} {unit}<br>Year: %{{x}}<extra></extra>"
+        )
+        # Sliding average dashed line
+        fig.add_scatter(
+            x=years,
+            y=sliding_averages,
+            mode="lines",
+            name="10 years rolling average",
+            line=dict(dash="dash"),
+            marker=dict(color="#d62728"),
+            hovertemplate=f"10-year average: %{{y:.2f}} {unit}<br>Year: %{{x}}<extra></extra>"
+        )
+        fig.update_layout(
+            title=f"Plot of {indicator_label} in {location} ({model_label})",
+            xaxis_title="Year",
+            yaxis_title=f"{indicator_label} ({unit})",
+            template="plotly_white",
+        )
+        return fig
+    return plot_data
+indicator_evolution_at_location: Plot = {
+    "name": "Indicator evolution at location",
+    "description": "Plot an evolution of the indicator at a certain location",
+    "params": ["indicator_column", "location", "model"],
+    "plot_function": plot_indicator_evolution_at_location,
+    "sql_query": indicator_per_year_at_location_query,
+}
+def plot_indicator_number_of_days_per_year_at_location(
+    params: dict,
+) -> Callable[..., Figure]:
+    """Generates a function to plot the number of days per year for an indicator.
+    This function creates a bar chart showing the frequency of certain climate
+    events (like days above a temperature threshold) per year at a specific location.
+    Args:
+        params (dict): Dictionary containing:
+            - indicator_column (str): The column name for the indicator
+            - location (str): The location to plot
+            - model (str): The climate model to use
+    Returns:
+        Callable[..., Figure]: A function that takes a DataFrame and returns a plotly Figure
+    """
+    indicator = params["indicator_column"]
+    location = params["location"]
+    indicator_label = " ".join([word.capitalize() for word in indicator.split("_")])
+    unit = INDICATOR_TO_UNIT.get(indicator, "")
+    def plot_data(df: pd.DataFrame) -> Figure:
+        """Generate the figure thanks to the dataframe
+        Args:
+            df (pd.DataFrame): pandas dataframe with the required data
+        Returns:
+            Figure: Plotly figure
+        """
+        fig = go.Figure()
+        if df['model'].nunique() != 1:
+            df_avg = df.groupby("year", as_index=False)[indicator].mean()
+            # Transform to list to avoid pandas encoding
+            indicators = df_avg[indicator].astype(float).tolist()
+            years = df_avg["year"].astype(int).tolist()
+            model_label = "Model Average"
+        else:
+            df_model = df
+            # Transform to list to avoid pandas encoding
+            indicators = df_model[indicator].astype(float).tolist()
+            years = df_model["year"].astype(int).tolist()
+            model_label = f"Model : {df['model'].unique()[0]}"
+        # Bar plot
+        fig.add_trace(
+            go.Bar(
+                x=years,
+                y=indicators,
+                width=0.5,
+                marker=dict(color="#1f77b4"),
+                hovertemplate=f"{indicator_label}: %{{y:.2f}} {unit}<br>Year: %{{x}}<extra></extra>"
+            )
+        )
+        fig.update_layout(
+            title=f"{indicator_label} in {location} ({model_label})",
+            xaxis_title="Year",
+            yaxis_title=f"{indicator_label} ({unit})",
+            yaxis=dict(range=[0, max(indicators)]),
+            bargap=0.5,
+            template="plotly_white",
+        )
+        return fig
+    return plot_data
+indicator_number_of_days_per_year_at_location: Plot = {
+    "name": "Indicator number of days per year at location",
+    "description": "Plot a barchart of the number of days per year of a certain indicator at a certain location. It is appropriate for frequency indicator.",
+    "params": ["indicator_column", "location", "model"],
+    "plot_function": plot_indicator_number_of_days_per_year_at_location,
+    "sql_query": indicator_per_year_at_location_query,
+}
+def plot_distribution_of_indicator_for_given_year(
+    params: dict,
+) -> Callable[..., Figure]:
+    """Generates a function to plot the distribution of an indicator for a year.
+    This function creates a histogram showing the distribution of a climate
+    indicator across different locations for a specific year.
+    Args:
+        params (dict): Dictionary containing:
+            - indicator_column (str): The column name for the indicator
+            - year (str): The year to plot
+            - model (str): The climate model to use
+    Returns:
+        Callable[..., Figure]: A function that takes a DataFrame and returns a plotly Figure
+    """
+    indicator = params["indicator_column"]
+    year = params["year"]
+    indicator_label = " ".join([word.capitalize() for word in indicator.split("_")])
+    unit = INDICATOR_TO_UNIT.get(indicator, "")
+    def plot_data(df: pd.DataFrame) -> Figure:
+        """Generate the figure thanks to the dataframe
+        Args:
+            df (pd.DataFrame): pandas dataframe with the required data
+        Returns:
+            Figure: Plotly figure
+        """
+        fig = go.Figure()
+        if df['model'].nunique() != 1:
+            df_avg = df.groupby(["latitude", "longitude"], as_index=False)[
+                indicator
+            ].mean()
+            # Transform to list to avoid pandas encoding
+            indicators = df_avg[indicator].astype(float).tolist()
+            model_label = "Model Average"
+        else:
+            df_model = df
+            # Transform to list to avoid pandas encoding
+            indicators = df_model[indicator].astype(float).tolist()
+            model_label = f"Model : {df['model'].unique()[0]}"
+        fig.add_trace(
+            go.Histogram(
+                x=indicators,
+                opacity=0.8,
+                histnorm="percent",
+                marker=dict(color="#1f77b4"),
+                hovertemplate=f"{indicator_label}: %{{x:.2f}} {unit}<br>Frequency: %{{y:.2f}}%<extra></extra>"
+            )
+        )
+        fig.update_layout(
+            title=f"Distribution of {indicator_label} in {year} ({model_label})",
+            xaxis_title=f"{indicator_label} ({unit})",
+            yaxis_title="Frequency (%)",
+            plot_bgcolor="rgba(0, 0, 0, 0)",
+            showlegend=False,
+        )
+        return fig
+    return plot_data
+distribution_of_indicator_for_given_year: Plot = {
+    "name": "Distribution of an indicator for a given year",
+    "description": "Plot an histogram of the distribution for a given year of the values of an indicator",
+    "params": ["indicator_column", "model", "year"],
+    "plot_function": plot_distribution_of_indicator_for_given_year,
+    "sql_query": indicator_for_given_year_query,
+}
+def plot_map_of_france_of_indicator_for_given_year(
+    params: dict,
+) -> Callable[..., Figure]:
+    """Generates a function to plot a map of France for an indicator.
+    This function creates a choropleth map of France showing the spatial
+    distribution of a climate indicator for a specific year.
+    Args:
+        params (dict): Dictionary containing:
+            - indicator_column (str): The column name for the indicator
+            - year (str): The year to plot
+            - model (str): The climate model to use
+    Returns:
+        Callable[..., Figure]: A function that takes a DataFrame and returns a plotly Figure
+    """
+    indicator = params["indicator_column"]
+    year = params["year"]
+    indicator_label = " ".join([word.capitalize() for word in indicator.split("_")])
+    unit = INDICATOR_TO_UNIT.get(indicator, "")
+    def plot_data(df: pd.DataFrame) -> Figure:
+        fig = go.Figure()
+        if df['model'].nunique() != 1:
+            df_avg = df.groupby(["latitude", "longitude"], as_index=False)[
+                indicator
+            ].mean()
+            indicators = df_avg[indicator].astype(float).tolist()
+            latitudes = df_avg["latitude"].astype(float).tolist()
+            longitudes = df_avg["longitude"].astype(float).tolist()
+            model_label = "Model Average"
+        else:
+            df_model = df
+            # Transform to list to avoid pandas encoding
+            indicators = df_model[indicator].astype(float).tolist()
+            latitudes = df_model["latitude"].astype(float).tolist()
+            longitudes = df_model["longitude"].astype(float).tolist()
+            model_label = f"Model : {df['model'].unique()[0]}"
+        fig.add_trace(
+            go.Scattermapbox(
+                lat=latitudes,
+                lon=longitudes,
+                mode="markers",
+                marker=dict(
+                    size=10,
+                    color=indicators,  # Color mapped to values
+                    colorscale="Turbo",  # Color scale (can be 'Plasma', 'Jet', etc.)
+                    cmin=min(indicators),  # Minimum color range
+                    cmax=max(indicators),  # Maximum color range
+                    showscale=True,  # Show colorbar
+                ),
+                text=[f"{indicator_label}: {value:.2f} {unit}" for value in indicators],  # Add hover text showing the indicator value
+                hoverinfo="text"  # Only show the custom text on hover
+            )
+        )
+        fig.update_layout(
+            mapbox_style="open-street-map",  # Use OpenStreetMap
+            mapbox_zoom=3,
+            mapbox_center={"lat": 46.6, "lon": 2.0},
+            coloraxis_colorbar=dict(title=f"{indicator_label} ({unit})"),  # Add legend
+            title=f"{indicator_label} in {year} in France ({model_label}) " # Title
+        )
+        return fig
+    return plot_data
+map_of_france_of_indicator_for_given_year: Plot = {
+    "name": "Map of France of an indicator for a given year",
+    "description": "Heatmap on the map of France of the values of an in indicator for a given year",
+    "params": ["indicator_column", "year", "model"],
+    "plot_function": plot_map_of_france_of_indicator_for_given_year,
+    "sql_query": indicator_for_given_year_query,
+}
+PLOTS = [
+    indicator_evolution_at_location,
+    indicator_number_of_days_per_year_at_location,
+    distribution_of_indicator_for_given_year,
+    map_of_france_of_indicator_for_given_year,
+]

climateqa/engine/talk_to_data/sql_query.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import asyncio
+from concurrent.futures import ThreadPoolExecutor
+from typing import TypedDict
+import duckdb
+import pandas as pd
+async def execute_sql_query(sql_query: str) -> pd.DataFrame:
+    """Executes a SQL query on the DRIAS database and returns the results.
+    This function connects to the DuckDB database containing DRIAS climate data
+    and executes the provided SQL query. It handles the database connection and
+    returns the results as a pandas DataFrame.
+    Args:
+        sql_query (str): The SQL query to execute
+    Returns:
+        pd.DataFrame: A DataFrame containing the query results
+    Raises:
+        duckdb.Error: If there is an error executing the SQL query
+    """
+    def _execute_query():
+        # Execute the query
+        results = duckdb.sql(sql_query)
+        # return fetched data
+        return results.fetchdf()
+    # Run the query in a thread pool to avoid blocking
+    loop = asyncio.get_event_loop()
+    with ThreadPoolExecutor() as executor:
+        return await loop.run_in_executor(executor, _execute_query)
+class IndicatorPerYearAtLocationQueryParams(TypedDict, total=False):
+    """Parameters for querying an indicator's values over time at a location.
+    This class defines the parameters needed to query climate indicator data
+    for a specific location over multiple years.
+    Attributes:
+        indicator_column (str): The column name for the climate indicator
+        latitude (str): The latitude coordinate of the location
+        longitude (str): The longitude coordinate of the location
+        model (str): The climate model to use (optional)
+    """
+    indicator_column: str
+    latitude: str
+    longitude: str
+    model: str
+def indicator_per_year_at_location_query(
+    table: str, params: IndicatorPerYearAtLocationQueryParams
+) -> str:
+    """SQL Query to get the evolution of an indicator per year at a certain location
+    Args:
+        table (str): sql table of the indicator
+        params (IndicatorPerYearAtLocationQueryParams) : dictionary with the required params for the query
+    Returns:
+        str: the sql query
+    """
+    indicator_column = params.get("indicator_column")
+    latitude = params.get("latitude")
+    longitude = params.get("longitude")
+    if indicator_column is None or latitude is None or longitude is None: # If one parameter is missing, returns an empty query
+        return ""
+    table = f"'hf://datasets/timeki/drias_db/{table.lower()}.parquet'"
+    sql_query = f"SELECT year, {indicator_column}, model\nFROM {table}\nWHERE latitude = {latitude} \nAnd longitude = {longitude} \nOrder by Year"
+    return sql_query
+class IndicatorForGivenYearQueryParams(TypedDict, total=False):
+    """Parameters for querying an indicator's values across locations for a year.
+    This class defines the parameters needed to query climate indicator data
+    across different locations for a specific year.
+    Attributes:
+        indicator_column (str): The column name for the climate indicator
+        year (str): The year to query
+        model (str): The climate model to use (optional)
+    """
+    indicator_column: str
+    year: str
+    model: str
+def indicator_for_given_year_query(
+        table:str, params: IndicatorForGivenYearQueryParams
+) -> str:
+    """SQL Query to get the values of an indicator with their latitudes, longitudes and models for a given year
+    Args:
+        table (str): sql table of the indicator
+        params (IndicatorForGivenYearQueryParams): dictionarry with the required params for the query
+    Returns:
+        str: the sql query
+    """
+    indicator_column = params.get("indicator_column")
+    year = params.get('year')
+    if year is None or indicator_column is None: # If one parameter is missing, returns an empty query
+        return ""
+    table = f"'hf://datasets/timeki/drias_db/{table.lower()}.parquet'"
+    sql_query = f"Select {indicator_column}, latitude, longitude, model\nFrom {table}\nWhere year = {year}"
+    return sql_query

climateqa/engine/talk_to_data/utils.py CHANGED Viewed

@@ -1,12 +1,15 @@
 import re
-import openai
-import pandas as pd
 from geopy.geocoders import Nominatim
-import sqlite3
 import ast
 from climateqa.engine.llm import get_llm
-def detect_location_with_openai(sentence):
     """
     Detects locations in a sentence using OpenAI's API via LangChain.
     """
@@ -19,74 +22,260 @@ def detect_location_with_openai(sentence):
     Sentence: "{sentence}"
     """
-    response = llm.invoke(prompt)
     location_list = ast.literal_eval(response.content.strip("```python\n").strip())
     if location_list:
         return location_list[0]
     else:
         return ""
-def detectTable(sql_query):
     pattern = r'(?i)\bFROM\s+((?:`[^`]+`|"[^"]+"|\'[^\']+\'|\w+)(?:\.(?:`[^`]+`|"[^"]+"|\'[^\']+\'|\w+))*)'
     matches = re.findall(pattern, sql_query)
     return matches
-def loc2coords(location : str):
     geolocator = Nominatim(user_agent="city_to_latlong")
-    location = geolocator.geocode(location)
-    return (location.latitude, location.longitude)
-def coords2loc(coords : tuple):
     geolocator = Nominatim(user_agent="coords_to_city")
     try:
         location = geolocator.reverse(coords)
         return location.address
     except Exception as e:
         print(f"Error: {e}")
-        return "Unknown Location"
-def nearestNeighbourSQL(db: str, location: tuple, table : str):
-    conn = sqlite3.connect(db)
     long = round(location[1], 3)
     lat = round(location[0], 3)
-    cursor  = conn.cursor()
-    cursor.execute(f"SELECT lat, lon FROM {table} WHERE lat BETWEEN {lat - 0.3} AND {lat + 0.3} AND lon BETWEEN {long - 0.3} AND {long + 0.3}")
-    results = cursor.fetchall()
-    return results[0]
-def detect_relevant_tables(user_question, llm):
-    table_names_list = [
-        "Frequency_of_rainy_days_index",
-        "Winter_precipitation_total",
-        "Summer_precipitation_total",
-        "Annual_precipitation_total",
-        # "Remarkable_daily_precipitation_total_(Q99)",
-        "Frequency_of_remarkable_daily_precipitation",
-        "Extreme_precipitation_intensity",
-        "Mean_winter_temperature",
-        "Mean_summer_temperature",
-        "Number_of_tropical_nights",
-        "Maximum_summer_temperature",
-        "Number_of_days_with_Tx_above_30C",
-        "Number_of_days_with_Tx_above_35C",
-        "Drought_index"
-    ]
     prompt = (
-        f"You are helping to build a sql query to retrieve relevant data for a user question."
-        f"The different tables are {table_names_list}."
-        f"The user question is {user_question}. Write the relevant tables to use. Answer only a python list of table name."
     )
-    table_names = ast.literal_eval(llm.invoke(prompt).content.strip("```python\n").strip())
     return table_names
 def replace_coordonates(coords, query, coords_tables):
     n = query.count(str(coords[0]))
     for i in range(n):
-        query = query.replace(str(coords[0]), str(coords_tables[i][0]),1)
-        query = query.replace(str(coords[1]), str(coords_tables[i][1]),1)
-    return query

 import re
+from typing import Annotated, TypedDict
+import duckdb
 from geopy.geocoders import Nominatim
 import ast
 from climateqa.engine.llm import get_llm
+from climateqa.engine.talk_to_data.config import DRIAS_TABLES
+from climateqa.engine.talk_to_data.plot import PLOTS, Plot
+from langchain_core.prompts import ChatPromptTemplate
+async def detect_location_with_openai(sentence):
     """
     Detects locations in a sentence using OpenAI's API via LangChain.
     """
     Sentence: "{sentence}"
     """
+    response = await llm.ainvoke(prompt)
     location_list = ast.literal_eval(response.content.strip("```python\n").strip())
     if location_list:
         return location_list[0]
     else:
         return ""
+class ArrayOutput(TypedDict):
+    """Represents the output of a function that returns an array.
+    This class is used to type-hint functions that return arrays,
+    ensuring consistent return types across the codebase.
+    Attributes:
+        array (str): A syntactically valid Python array string
+    """
+    array: Annotated[str, "Syntactically valid python array."]
+async def detect_year_with_openai(sentence: str) -> str:
+    """
+    Detects years in a sentence using OpenAI's API via LangChain.
+    """
+    llm = get_llm()
+    prompt = """
+    Extract all years mentioned in the following sentence.
+    Return the result as a Python list. If no year are mentioned, return an empty list.
+    Sentence: "{sentence}"
+    """
+    prompt = ChatPromptTemplate.from_template(prompt)
+    structured_llm = llm.with_structured_output(ArrayOutput)
+    chain = prompt | structured_llm
+    response: ArrayOutput = await chain.ainvoke({"sentence": sentence})
+    years_list = eval(response['array'])
+    if len(years_list) > 0:
+        return years_list[0]
+    else:
+        return ""
+def detectTable(sql_query: str) -> list[str]:
+    """Extracts table names from a SQL query.
+    This function uses regular expressions to find all table names
+    referenced in a SQL query's FROM clause.
+    Args:
+        sql_query (str): The SQL query to analyze
+    Returns:
+        list[str]: A list of table names found in the query
+    Example:
+        >>> detectTable("SELECT * FROM temperature_data WHERE year > 2000")
+        ['temperature_data']
+    """
     pattern = r'(?i)\bFROM\s+((?:`[^`]+`|"[^"]+"|\'[^\']+\'|\w+)(?:\.(?:`[^`]+`|"[^"]+"|\'[^\']+\'|\w+))*)'
     matches = re.findall(pattern, sql_query)
     return matches
+def loc2coords(location: str) -> tuple[float, float]:
+    """Converts a location name to geographic coordinates.
+    This function uses the Nominatim geocoding service to convert
+    a location name (e.g., city name) to its latitude and longitude.
+    Args:
+        location (str): The name of the location to geocode
+    Returns:
+        tuple[float, float]: A tuple containing (latitude, longitude)
+    Raises:
+        AttributeError: If the location cannot be found
+    """
     geolocator = Nominatim(user_agent="city_to_latlong")
+    coords = geolocator.geocode(location)
+    return (coords.latitude, coords.longitude)
+def coords2loc(coords: tuple[float, float]) -> str:
+    """Converts geographic coordinates to a location name.
+    This function uses the Nominatim reverse geocoding service to convert
+    latitude and longitude coordinates to a human-readable location name.
+    Args:
+        coords (tuple[float, float]): A tuple containing (latitude, longitude)
+    Returns:
+        str: The address of the location, or "Unknown Location" if not found
+    Example:
+        >>> coords2loc((48.8566, 2.3522))
+        'Paris, France'
+    """
     geolocator = Nominatim(user_agent="coords_to_city")
     try:
         location = geolocator.reverse(coords)
         return location.address
     except Exception as e:
         print(f"Error: {e}")
+        return "Unknown Location"
+def nearestNeighbourSQL(location: tuple, table: str) -> tuple[str, str]:
     long = round(location[1], 3)
     lat = round(location[0], 3)
+    table = f"'hf://datasets/timeki/drias_db/{table.lower()}.parquet'"
+    results = duckdb.sql(
+        f"SELECT latitude, longitude FROM {table} WHERE latitude BETWEEN {lat - 0.3} AND {lat + 0.3} AND longitude BETWEEN {long - 0.3} AND {long + 0.3}"
+    ).fetchdf()
+    if len(results) == 0:
+        return "", ""
+    # cursor.execute(f"SELECT latitude, longitude FROM {table} WHERE latitude BETWEEN {lat - 0.3} AND {lat + 0.3} AND longitude BETWEEN {long - 0.3} AND {long + 0.3}")
+    return results['latitude'].iloc[0], results['longitude'].iloc[0]
+async def detect_relevant_tables(user_question: str, plot: Plot, llm) -> list[str]:
+    """Identifies relevant tables for a plot based on user input.
+    This function uses an LLM to analyze the user's question and the plot
+    description to determine which tables in the DRIAS database would be
+    most relevant for generating the requested visualization.
+    Args:
+        user_question (str): The user's question about climate data
+        plot (Plot): The plot configuration object
+        llm: The language model instance to use for analysis
+    Returns:
+        list[str]: A list of table names that are relevant for the plot
+    Example:
+        >>> detect_relevant_tables(
+        ...     "What will the temperature be like in Paris?",
+        ...     indicator_evolution_at_location,
+        ...     llm
+        ... )
+        ['mean_annual_temperature', 'mean_summer_temperature']
+    """
+    # Get all table names
+    table_names_list = DRIAS_TABLES
     prompt = (
+        f"You are helping to build a plot following this description : {plot['description']}."
+        f"You are given a list of tables and a user question."
+        f"Based on the description of the plot, which table are appropriate for that kind of plot."
+        f"Write the 3 most relevant tables to use. Answer only a python list of table name."
+        f"### List of tables : {table_names_list}"
+        f"### User question : {user_question}"
+        f"### List of table name : "
+    )
+    table_names = ast.literal_eval(
+        (await llm.ainvoke(prompt)).content.strip("```python\n").strip()
     )
     return table_names
 def replace_coordonates(coords, query, coords_tables):
     n = query.count(str(coords[0]))
     for i in range(n):
+        query = query.replace(str(coords[0]), str(coords_tables[i][0]), 1)
+        query = query.replace(str(coords[1]), str(coords_tables[i][1]), 1)
+    return query
+async def detect_relevant_plots(user_question: str, llm):
+    plots_description = ""
+    for plot in PLOTS:
+        plots_description += "Name: " + plot["name"]
+        plots_description += " - Description: " + plot["description"] + "\n"
+    prompt = (
+        f"You are helping to answer a quesiton with insightful visualizations."
+        f"You are given an user question and a list of plots with their name and description."
+        f"Based on the descriptions of the plots, which plot is appropriate to answer to this question."
+        f"Write the most relevant tables to use. Answer only a python list of plot name."
+        f"### Descriptions of the plots : {plots_description}"
+        f"### User question : {user_question}"
+        f"### Name of the plot : "
+    )
+    # prompt = (
+    #     f"You are helping to answer a question with insightful visualizations. "
+    #     f"Given a list of plots with their name and description: "
+    #     f"{plots_description} "
+    #     f"The user question is: {user_question}. "
+    #     f"Choose the most relevant plots to answer the question. "
+    #     f"The answer must be a Python list with the names of the relevant plots, and nothing else. "
+    #     f"Ensure the response is in the exact format: ['PlotName1', 'PlotName2']."
+    # )
+    plot_names = ast.literal_eval(
+        (await llm.ainvoke(prompt)).content.strip("```python\n").strip()
+    )
+    return plot_names
+# Next Version
+# class QueryOutput(TypedDict):
+#     """Generated SQL query."""
+#     query: Annotated[str, ..., "Syntactically valid SQL query."]
+# class PlotlyCodeOutput(TypedDict):
+#     """Generated Plotly code"""
+#     code: Annotated[str, ..., "Synatically valid Plotly python code."]
+# def write_sql_query(user_input: str, db: SQLDatabase, relevant_tables: list[str], llm):
+#     """Generate SQL query to fetch information."""
+#     prompt_params = {
+#         "dialect": db.dialect,
+#         "table_info": db.get_table_info(),
+#         "input": user_input,
+#         "relevant_tables": relevant_tables,
+#         "model": "ALADIN63_CNRM-CM5",
+#     }
+#     prompt = ChatPromptTemplate.from_template(query_prompt_template)
+#     structured_llm = llm.with_structured_output(QueryOutput)
+#     chain = prompt | structured_llm
+#     result = chain.invoke(prompt_params)
+#     return result["query"]
+# def fetch_data_from_sql_query(db: str, sql_query: str):
+#     conn = sqlite3.connect(db)
+#     cursor = conn.cursor()
+#     cursor.execute(sql_query)
+#     column_names = [desc[0] for desc in cursor.description]
+#     values = cursor.fetchall()
+#     return {"column_names": column_names, "data": values}
+# def generate_chart_code(user_input: str, sql_query: list[str], llm):
+#     """ "Generate plotly python code for the chart based on the sql query and the user question"""
+#     class PlotlyCodeOutput(TypedDict):
+#         """Generated Plotly code"""
+#         code: Annotated[str, ..., "Synatically valid Plotly python code."]
+#     prompt = ChatPromptTemplate.from_template(plot_prompt_template)
+#     structured_llm = llm.with_structured_output(PlotlyCodeOutput)
+#     chain = prompt | structured_llm
+#     result = chain.invoke({"input": user_input, "sql_query": sql_query})
+#     return result["code"]

climateqa/engine/talk_to_data/workflow.py ADDED Viewed

	@@ -0,0 +1,287 @@

+import os
+from typing import Any, Callable, NotRequired, TypedDict
+import pandas as pd
+from plotly.graph_objects import Figure
+from climateqa.engine.llm import get_llm
+from climateqa.engine.talk_to_data.config import INDICATOR_COLUMNS_PER_TABLE
+from climateqa.engine.talk_to_data.plot import PLOTS, Plot
+from climateqa.engine.talk_to_data.sql_query import execute_sql_query
+from climateqa.engine.talk_to_data.utils import (
+    detect_relevant_plots,
+    detect_year_with_openai,
+    loc2coords,
+    detect_location_with_openai,
+    nearestNeighbourSQL,
+    detect_relevant_tables,
+)
+ROOT_PATH = os.path.dirname(os.path.dirname(os.getcwd()))
+class TableState(TypedDict):
+    """Represents the state of a table in the DRIAS workflow.
+    This class defines the structure for tracking the state of a table during the
+    data processing workflow, including its name, parameters, SQL query, and results.
+    Attributes:
+        table_name (str): The name of the table in the database
+        params (dict[str, Any]): Parameters used for querying the table
+        sql_query (str, optional): The SQL query used to fetch data
+        dataframe (pd.DataFrame | None, optional): The resulting data
+        figure (Callable[..., Figure], optional): Function to generate visualization
+        status (str): The current status of the table processing ('OK' or 'ERROR')
+    """
+    table_name: str
+    params: dict[str, Any]
+    sql_query: NotRequired[str]
+    dataframe: NotRequired[pd.DataFrame | None]
+    figure: NotRequired[Callable[..., Figure]]
+    status: str
+class PlotState(TypedDict):
+    """Represents the state of a plot in the DRIAS workflow.
+    This class defines the structure for tracking the state of a plot during the
+    data processing workflow, including its name and associated tables.
+    Attributes:
+        plot_name (str): The name of the plot
+        tables (list[str]): List of tables used in the plot
+        table_states (dict[str, TableState]): States of the tables used in the plot
+    """
+    plot_name: str
+    tables: list[str]
+    table_states: dict[str, TableState]
+class State(TypedDict):
+    user_input: str
+    plots: list[str]
+    plot_states: dict[str, PlotState]
+    error: NotRequired[str]
+async def drias_workflow(user_input: str) -> State:
+    """Performs the complete workflow of Talk To Drias : from user input to sql queries, dataframes and figures generated
+    Args:
+        user_input (str): initial user input
+    Returns:
+        State: Final state with all the results
+    """
+    state: State = {
+        'user_input': user_input,
+        'plots': [],
+        'plot_states': {}
+    }
+    llm = get_llm(provider="openai")
+    plots = await find_relevant_plots(state, llm)
+    state['plots'] = plots
+    if not state['plots']:
+        state['error'] = 'There is no plot to answer to the question'
+        return state
+    have_relevant_table = False
+    have_sql_query = False
+    have_dataframe = False
+    for plot_name in state['plots']:
+        plot = next((p for p in PLOTS if p['name'] == plot_name), None) # Find the associated plot object
+        if plot is None:
+            continue
+        plot_state: PlotState = {
+            'plot_name': plot_name,
+            'tables': [],
+            'table_states': {}
+        }
+        plot_state['plot_name'] = plot_name
+        relevant_tables = await find_relevant_tables_per_plot(state, plot, llm)
+        if len(relevant_tables) > 0 :
+            have_relevant_table = True
+        plot_state['tables'] = relevant_tables
+        params = {}
+        for param_name in plot['params']:
+            param = await find_param(state, param_name, relevant_tables[0])
+            if param:
+                params.update(param)
+        for n, table in enumerate(plot_state['tables']):
+            if n > 2:
+                break
+            table_state: TableState = {
+                'table_name': table,
+                'params': params,
+                'status': 'OK'
+            }
+            table_state["params"]['indicator_column'] = find_indicator_column(table)
+            sql_query = plot['sql_query'](table, table_state['params'])
+            if sql_query == "":
+                table_state['status'] = 'ERROR'
+                continue
+            else :
+                have_sql_query = True
+            table_state['sql_query'] = sql_query
+            df = await execute_sql_query(sql_query)
+            if len(df) > 0:
+                have_dataframe = True
+            figure = plot['plot_function'](table_state['params'])
+            table_state['dataframe'] = df
+            table_state['figure'] = figure
+            plot_state['table_states'][table] = table_state
+        state['plot_states'][plot_name] = plot_state
+    if not have_relevant_table:
+        state['error'] = "There is no relevant table in the our database to answer your question"
+    elif not have_sql_query:
+        state['error'] = "There is no relevant sql query on our database that can help to answer your question"
+    elif not have_dataframe:
+        state['error'] = "There is no data in our table that can answer to your question"
+    return state
+async def find_relevant_plots(state: State, llm) -> list[str]:
+    print("---- Find relevant plots ----")
+    relevant_plots = await detect_relevant_plots(state['user_input'], llm)
+    return relevant_plots
+async def find_relevant_tables_per_plot(state: State, plot: Plot, llm) -> list[str]:
+    print(f"---- Find relevant tables for {plot['name']} ----")
+    relevant_tables = await detect_relevant_tables(state['user_input'], plot, llm)
+    return relevant_tables
+async def find_param(state: State, param_name:str, table: str) -> dict[str, Any] | None:
+    """Perform the good method to retrieve the desired parameter
+    Args:
+        state (State): state of the workflow
+        param_name (str): name of the desired parameter
+        table (str): name of the table
+    Returns:
+        dict[str, Any] | None:
+    """
+    if param_name == 'location':
+        location = await find_location(state['user_input'], table)
+        return location
+    if param_name == 'year':
+        year = await find_year(state['user_input'])
+        return {'year': year}
+    return None
+class Location(TypedDict):
+    location: str
+    latitude: NotRequired[str]
+    longitude: NotRequired[str]
+async def find_location(user_input: str, table: str) -> Location:
+    print(f"---- Find location in table {table} ----")
+    location = await detect_location_with_openai(user_input)
+    output: Location = {'location' : location}
+    if location:
+        coords = loc2coords(location)
+        neighbour = nearestNeighbourSQL(coords, table)
+        output.update({
+            "latitude": neighbour[0],
+            "longitude": neighbour[1],
+        })
+    return output
+async def find_year(user_input: str) -> str:
+    """Extracts year information from user input using LLM.
+    This function uses an LLM to identify and extract year information from the
+    user's query, which is used to filter data in subsequent queries.
+    Args:
+        user_input (str): The user's query text
+    Returns:
+        str: The extracted year, or empty string if no year found
+    """
+    print(f"---- Find year ---")
+    year = await detect_year_with_openai(user_input)
+    return year
+def find_indicator_column(table: str) -> str:
+    """Retrieves the name of the indicator column within a table.
+    This function maps table names to their corresponding indicator columns
+    using the predefined mapping in INDICATOR_COLUMNS_PER_TABLE.
+    Args:
+        table (str): Name of the table in the database
+    Returns:
+        str: Name of the indicator column for the specified table
+    Raises:
+        KeyError: If the table name is not found in the mapping
+    """
+    print(f"---- Find indicator column in table {table} ----")
+    return INDICATOR_COLUMNS_PER_TABLE[table]
+# def make_write_query_node():
+#     def write_query(state):
+#         print("---- Write query ----")
+#         for table in state["tables"]:
+#             sql_query = QUERIES[state[table]['query_type']](
+#                 table=table,
+#                 indicator_column=state[table]["columns"],
+#                 longitude=state[table]["longitude"],
+#                 latitude=state[table]["latitude"],
+#             )
+#             state[table].update({"sql_query": sql_query})
+#         return state
+#     return write_query
+# def make_fetch_data_node(db_path):
+#     def fetch_data(state):
+#         print("---- Fetch data ----")
+#         for table in state["tables"]:
+#             results = execute_sql_query(db_path, state[table]['sql_query'])
+#             state[table].update(results)
+#         return state
+#     return fetch_data
+## V2
+# def make_fetch_data_node(db_path: str, llm):
+#     def fetch_data(state):
+#         print("---- Fetch data ----")
+#         db = SQLDatabase.from_uri(f"sqlite:///{db_path}")
+#         output = {}
+#         sql_query = write_sql_query(state["query"], db, state["tables"], llm)
+#         # TO DO : Add query checker
+#         print(f"SQL query  : {sql_query}")
+#         output["sql_query"] = sql_query
+#         output.update(fetch_data_from_sql_query(db_path, sql_query))
+#         return output
+#     return fetch_data

front/tabs/__init__.py CHANGED Viewed

@@ -3,4 +3,7 @@ from .tab_examples import create_examples_tab
 from .tab_papers import create_papers_tab
 from .tab_figures import create_figures_tab
 from .chat_interface import create_chat_interface
-from .tab_about import create_about_tab

 from .tab_papers import create_papers_tab
 from .tab_figures import create_figures_tab
 from .chat_interface import create_chat_interface
+from .tab_about import create_about_tab
+from .main_tab import MainTabPanel
+from .tab_config import ConfigPanel
+from .main_tab import cqa_tab

front/tabs/chat_interface.py CHANGED Viewed

@@ -21,21 +21,21 @@ What do you want to learn ?
 """
 init_prompt_poc = """
-Hello, I am ClimateQ&A, a conversational assistant designed to help you understand climate change and biodiversity loss. I will answer your questions by **sifting through the IPCC and IPBES scientific reports, PCAET of Paris, the Plan Biodiversité 2018-2024, and Acclimaterra reports from la Région Nouvelle-Aquitaine **.
-❓ How to use
-- **Language**: You can ask me your questions in any language.
-- **Audience**: You can specify your audience (children, general public, experts) to get a more adapted answer.
-- **Sources**: You can choose to search in the IPCC or IPBES reports, and POC sources for local documents (PCAET, Plan Biodiversité, Acclimaterra).
-- **Relevant content sources**: You can choose to search for figures, papers, or graphs that can be relevant for your question.
 ⚠️ Limitations
-*Please note that the AI is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
-🛈 Information
-Please note that we log your questions for meta-analysis purposes, so avoid sharing any sensitive or personal information.
-What do you want to learn ?
 """
@@ -54,7 +54,10 @@ def create_chat_interface(tab):
         max_height="80vh",
         height="100vh"
     )
     with gr.Row(elem_id="input-message"):
         textbox = gr.Textbox(
@@ -68,7 +71,7 @@ def create_chat_interface(tab):
         config_button = gr.Button("", elem_id="config-button")
-    return chatbot, textbox, config_button

 """
 init_prompt_poc = """
+Bonjour, je suis ClimateQ&A, un assistant conversationnel conçu pour vous aider à comprendre le changement climatique et la perte de biodiversité. Je réponds à vos questions en **parcourant les rapports scientifiques du GIEC et de l'IPBES, le PCAET de Paris, le Plan Biodiversité 2018-2024, et les rapports Acclimaterra de la Région Nouvelle-Aquitaine**.
+❓ Mode d'emploi
+- **Language** : Vous pouvez me poser vos questions dans n'importe quelle langue.
+- **Audience** : Vous pouvez préciser votre public (enfants, grand public, experts) pour obtenir une réponse plus adaptée.
+- **Sources** : Vous pouvez choisir de chercher dans les rapports du GIEC ou de l'IPBES, et dans les sources POC pour les documents locaux (PCAET, Plan Biodiversité, Acclimaterra).
+- **Relevant content sources** : Vous pouvez choisir de rechercher des images, des papiers scientifiques ou des graphiques qui peuvent être pertinents pour votre question.
 ⚠️ Limitations
+*Veuillez noter que l'IA n'est pas parfaite et peut parfois donner des réponses non pertinentes. Si vous n'êtes pas satisfait de la réponse, veuillez poser une question plus précise ou nous faire part de vos commentaires pour nous aider à améliorer le système.*
+🛈 Informations
+Veuillez noter que nous enregistrons vos questions à des fins de méta-analyse, évitez donc de partager toute information sensible ou personnelle.
+Que voulez-vous apprendre ?
 """
         max_height="80vh",
         height="100vh"
     )
+    with gr.Accordion("Click here for follow up questions examples", elem_id="follow-up-examples",open = False):
+        follow_up_examples_hidden = gr.Textbox(visible=False, elem_id="follow-up-hidden")
+        follow_up_examples = gr.Examples(examples=["What evidence do we have of climate change ?"], label="", inputs= [follow_up_examples_hidden], elem_id="follow-up-button", run_on_click=False)
     with gr.Row(elem_id="input-message"):
         textbox = gr.Textbox(
         config_button = gr.Button("", elem_id="config-button")
+    return chatbot, textbox, config_button, follow_up_examples, follow_up_examples_hidden

front/tabs/main_tab.py CHANGED Viewed

@@ -1,8 +1,37 @@
 import gradio as gr
 from .chat_interface import create_chat_interface
 from .tab_examples import create_examples_tab
 from .tab_papers import create_papers_tab
 from .tab_figures import create_figures_tab
 def cqa_tab(tab_name):
     # State variables
@@ -11,14 +40,14 @@ def cqa_tab(tab_name):
         with gr.Row(elem_id="chatbot-row"):
             # Left column - Chat interface
             with gr.Column(scale=2):
-                chatbot, textbox, config_button = create_chat_interface(tab_name)
             # Right column - Content panels
             with gr.Column(scale=2, variant="panel", elem_id="right-panel"):
                 with gr.Tabs(elem_id="right_panel_tab") as tabs:
                     # Examples tab
                     with gr.TabItem("Examples", elem_id="tab-examples", id=0):
-                        examples_hidden, dropdown_samples, samples = create_examples_tab()
                     # Sources tab
                     with gr.Tab("Sources", elem_id="tab-sources", id=1) as tab_sources:
@@ -34,7 +63,7 @@ def cqa_tab(tab_name):
                             # Papers subtab
                             with gr.Tab("Papers", elem_id="tab-citations", id=4) as tab_papers:
-                                papers_summary, papers_html, citations_network, papers_modal = create_papers_tab()
                             # Graphs subtab
                             with gr.Tab("Graphs", elem_id="tab-graphs", id=5) as tab_graphs:
@@ -42,27 +71,30 @@ def cqa_tab(tab_name):
                                     "<h2>There are no graphs to be displayed at the moment. Try asking another question.</h2>",
                                     elem_id="graphs-container"
                                 )
-    return {
-        "chatbot": chatbot,
-        "textbox": textbox,
-        "tabs": tabs,
-        "sources_raw": sources_raw,
-        "new_figures": new_figures,
-        "current_graphs": current_graphs,
-        "examples_hidden": examples_hidden,
-        "dropdown_samples": dropdown_samples,
-        "samples": samples,
-        "sources_textbox": sources_textbox,
-        "figures_cards": figures_cards,
-        "gallery_component": gallery_component,
-        "config_button": config_button,
-        "papers_html": papers_html,
-        "citations_network": citations_network,
-        "papers_summary": papers_summary,
-        "tab_recommended_content": tab_recommended_content,
-        "tab_sources": tab_sources,
-        "tab_figures": tab_figures,
-        "tab_graphs": tab_graphs,
-        "tab_papers": tab_papers,
-        "graph_container": graphs_container
-    }

 import gradio as gr
+from gradio.helpers import Examples
+from typing import TypedDict
 from .chat_interface import create_chat_interface
 from .tab_examples import create_examples_tab
 from .tab_papers import create_papers_tab
 from .tab_figures import create_figures_tab
+from dataclasses import dataclass
+@dataclass
+class MainTabPanel:
+    chatbot: gr.Chatbot
+    textbox: gr.Textbox
+    tabs: gr.Tabs
+    sources_raw: gr.State
+    new_figures: gr.State
+    current_graphs: gr.State
+    examples_hidden: gr.State
+    sources_textbox: gr.HTML
+    figures_cards: gr.HTML
+    gallery_component: gr.Gallery
+    config_button: gr.Button
+    papers_direct_search: gr.TextArea
+    papers_html: gr.HTML
+    citations_network: gr.Plot
+    papers_summary: gr.Textbox
+    tab_recommended_content: gr.Tab
+    tab_sources: gr.Tab
+    tab_figures: gr.Tab
+    tab_graphs: gr.Tab
+    tab_papers: gr.Tab
+    graph_container: gr.HTML
+    follow_up_examples : Examples
+    follow_up_examples_hidden : gr.Textbox
 def cqa_tab(tab_name):
     # State variables
         with gr.Row(elem_id="chatbot-row"):
             # Left column - Chat interface
             with gr.Column(scale=2):
+                chatbot, textbox, config_button, follow_up_examples, follow_up_examples_hidden = create_chat_interface(tab_name)
             # Right column - Content panels
             with gr.Column(scale=2, variant="panel", elem_id="right-panel"):
                 with gr.Tabs(elem_id="right_panel_tab") as tabs:
                     # Examples tab
                     with gr.TabItem("Examples", elem_id="tab-examples", id=0):
+                        examples_hidden = create_examples_tab(tab_name)
                     # Sources tab
                     with gr.Tab("Sources", elem_id="tab-sources", id=1) as tab_sources:
                             # Papers subtab
                             with gr.Tab("Papers", elem_id="tab-citations", id=4) as tab_papers:
+                                papers_direct_search, papers_summary, papers_html, citations_network, papers_modal = create_papers_tab()
                             # Graphs subtab
                             with gr.Tab("Graphs", elem_id="tab-graphs", id=5) as tab_graphs:
                                     "<h2>There are no graphs to be displayed at the moment. Try asking another question.</h2>",
                                     elem_id="graphs-container"
                                 )
+    return MainTabPanel(
+        chatbot=chatbot,
+        textbox=textbox,
+        tabs=tabs,
+        sources_raw=sources_raw,
+        new_figures=new_figures,
+        current_graphs=current_graphs,
+        examples_hidden=examples_hidden,
+        sources_textbox=sources_textbox,
+        figures_cards=figures_cards,
+        gallery_component=gallery_component,
+        config_button=config_button,
+        papers_direct_search=papers_direct_search,
+        papers_html=papers_html,
+        citations_network=citations_network,
+        papers_summary=papers_summary,
+        tab_recommended_content=tab_recommended_content,
+        tab_sources=tab_sources,
+        tab_figures=tab_figures,
+        tab_graphs=tab_graphs,
+        tab_papers=tab_papers,
+        graph_container=graphs_container,
+        follow_up_examples= follow_up_examples,
+        follow_up_examples_hidden = follow_up_examples_hidden
+    )

front/tabs/tab_config.py CHANGED Viewed

@@ -2,8 +2,10 @@ import gradio as gr
 from gradio_modal import Modal
 from climateqa.constants import POSSIBLE_REPORTS
 from typing import TypedDict
-class ConfigPanel(TypedDict):
     config_open: gr.State
     config_modal: Modal
     dropdown_sources: gr.CheckboxGroup
@@ -14,6 +16,7 @@ class ConfigPanel(TypedDict):
     after: gr.Slider
     output_query: gr.Textbox
     output_language: gr.Textbox
 def create_config_modal():
@@ -37,9 +40,9 @@ def create_config_modal():
         )
         dropdown_external_sources = gr.CheckboxGroup(
-            choices=["Figures (IPCC/IPBES)", "Papers (OpenAlex)", "Graphs (OurWorldInData)","POC region"],
             label="Select database to search for relevant content",
-            value=["Figures (IPCC/IPBES)","POC region"],
             interactive=True
         )
@@ -95,29 +98,16 @@ def create_config_modal():
         close_config_modal_button = gr.Button("Validate and Close", elem_id="close-config-modal")
-        # return ConfigPanel(
-        #     config_open=config_open,
-        #     config_modal=config_modal,
-        #     dropdown_sources=dropdown_sources,
-        #     dropdown_reports=dropdown_reports,
-        #     dropdown_external_sources=dropdown_external_sources,
-        #     search_only=search_only,
-        #     dropdown_audience=dropdown_audience,
-        #     after=after,
-        #     output_query=output_query,
-        #     output_language=output_language
-        # )
-        return {
-            "config_open" : config_open,
-            "config_modal": config_modal,
-            "dropdown_sources": dropdown_sources,
-            "dropdown_reports": dropdown_reports,
-            "dropdown_external_sources": dropdown_external_sources,
-            "search_only": search_only,
-            "dropdown_audience": dropdown_audience,
-            "after": after,
-            "output_query": output_query,
-            "output_language": output_language,
-            "close_config_modal_button": close_config_modal_button
-        }

 from gradio_modal import Modal
 from climateqa.constants import POSSIBLE_REPORTS
 from typing import TypedDict
+from dataclasses import dataclass
+@dataclass
+class ConfigPanel:
     config_open: gr.State
     config_modal: Modal
     dropdown_sources: gr.CheckboxGroup
     after: gr.Slider
     output_query: gr.Textbox
     output_language: gr.Textbox
+    close_config_modal_button: gr.Button
 def create_config_modal():
         )
         dropdown_external_sources = gr.CheckboxGroup(
+            choices=["Figures (IPCC/IPBES)", "Papers (OpenAlex)", "Graphs (OurWorldInData)"],
             label="Select database to search for relevant content",
+            value=["Figures (IPCC/IPBES)"],
             interactive=True
         )
         close_config_modal_button = gr.Button("Validate and Close", elem_id="close-config-modal")
+        return ConfigPanel(
+            config_open=config_open,
+            config_modal=config_modal,
+            dropdown_sources=dropdown_sources,
+            dropdown_reports=dropdown_reports,
+            dropdown_external_sources=dropdown_external_sources,
+            search_only=search_only,
+            dropdown_audience=dropdown_audience,
+            after=after,
+            output_query=output_query,
+            output_language=output_language,
+            close_config_modal_button=close_config_modal_button
+        )

front/tabs/tab_drias.py ADDED Viewed

	@@ -0,0 +1,362 @@

+import gradio as gr
+from typing import TypedDict, List, Optional
+import os
+import pandas as pd
+from climateqa.engine.talk_to_data.main import ask_drias
+from climateqa.engine.talk_to_data.config import DRIAS_MODELS, DRIAS_UI_TEXT
+from climateqa.chat import log_drias_interaction_to_azure
+class DriasUIElements(TypedDict):
+    tab: gr.Tab
+    details_accordion: gr.Accordion
+    examples_hidden: gr.Textbox
+    examples: gr.Examples
+    drias_direct_question: gr.Textbox
+    result_text: gr.Textbox
+    table_names_display: gr.DataFrame
+    query_accordion: gr.Accordion
+    drias_sql_query: gr.Textbox
+    chart_accordion: gr.Accordion
+    model_selection: gr.Dropdown
+    drias_display: gr.Plot
+    table_accordion: gr.Accordion
+    drias_table: gr.DataFrame
+    pagination_display: gr.Markdown
+    prev_button: gr.Button
+    next_button: gr.Button
+async def ask_drias_query(query: str, index_state: int):
+    result = await ask_drias(query, index_state)
+    return result
+def show_results(sql_queries_state, dataframes_state, plots_state):
+    if not sql_queries_state or not dataframes_state or not plots_state:
+        # If all results are empty, show "No result"
+        return (
+            gr.update(visible=True),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+        )
+    else:
+        # Show the appropriate components with their data
+        return (
+            gr.update(visible=False),
+            gr.update(visible=True),
+            gr.update(visible=True),
+            gr.update(visible=True),
+            gr.update(visible=True),
+            gr.update(visible=True),
+            gr.update(visible=True),
+            gr.update(visible=True),
+        )
+def filter_by_model(dataframes, figures, index_state, model_selection):
+    df = dataframes[index_state]
+    if df.empty:
+        return df, None
+    if "model" not in df.columns:
+        return df, figures[index_state](df)
+    if model_selection != "ALL":
+        df = df[df["model"] == model_selection]
+        if df.empty:
+            return df, None
+    figure = figures[index_state](df)
+    return df, figure
+def update_pagination(index, sql_queries):
+    pagination = f"{index + 1}/{len(sql_queries)}" if sql_queries else ""
+    return pagination
+def show_previous(index, sql_queries, dataframes, plots):
+    if index > 0:
+        index -= 1
+    return (
+        sql_queries[index],
+        dataframes[index],
+        plots[index](dataframes[index]),
+        index,
+    )
+def show_next(index, sql_queries, dataframes, plots):
+    if index < len(sql_queries) - 1:
+        index += 1
+    return (
+        sql_queries[index],
+        dataframes[index],
+        plots[index](dataframes[index]),
+        index,
+    )
+def display_table_names(table_names):
+    return [table_names]
+def on_table_click(evt: gr.SelectData, table_names, sql_queries, dataframes, plots):
+    index = evt.index[1]
+    figure = plots[index](dataframes[index])
+    return (
+        sql_queries[index],
+        dataframes[index],
+        figure,
+        index,
+    )
+def create_drias_ui() -> DriasUIElements:
+    """Create and return all UI elements for the DRIAS tab."""
+    with gr.Tab("Beta - Talk to DRIAS", elem_id="tab-vanna", id=6) as tab:
+        with gr.Accordion(label="Details") as details_accordion:
+            gr.Markdown(DRIAS_UI_TEXT)
+        # Add examples for common questions
+        examples_hidden = gr.Textbox(visible=False, elem_id="drias-examples-hidden")
+        examples = gr.Examples(
+            examples=[
+                ["What will the temperature be like in Paris?"],
+                ["What will be the total rainfall in France in 2030?"],
+                ["How frequent will extreme events be in Lyon?"],
+                ["Comment va évoluer la température en France entre 2030 et 2050 ?"]
+            ],
+            label="Example Questions",
+            inputs=[examples_hidden],
+            outputs=[examples_hidden],
+        )
+        with gr.Row():
+            drias_direct_question = gr.Textbox(
+                label="Direct Question",
+                placeholder="You can write direct question here",
+                elem_id="direct-question",
+                interactive=True,
+            )
+        result_text = gr.Textbox(
+            label="", elem_id="no-result-label", interactive=False, visible=True
+        )
+        table_names_display = gr.DataFrame(
+            [], label="List of relevant indicators", headers=None, interactive=False, elem_id="table-names", visible=False
+        )
+        with gr.Accordion(label="SQL Query Used", visible=False) as query_accordion:
+            drias_sql_query = gr.Textbox(
+                label="", elem_id="sql-query", interactive=False
+            )
+        with gr.Accordion(label="Chart", visible=False) as chart_accordion:
+            model_selection = gr.Dropdown(
+                label="Model", choices=DRIAS_MODELS, value="ALL", interactive=True
+            )
+            drias_display = gr.Plot(elem_id="vanna-plot")
+        with gr.Accordion(
+            label="Data used", open=False, visible=False
+        ) as table_accordion:
+            drias_table = gr.DataFrame([], elem_id="vanna-table")
+        pagination_display = gr.Markdown(
+            value="", visible=False, elem_id="pagination-display"
+        )
+        with gr.Row():
+            prev_button = gr.Button("Previous", visible=False)
+            next_button = gr.Button("Next", visible=False)
+        return DriasUIElements(
+            tab=tab,
+            details_accordion=details_accordion,
+            examples_hidden=examples_hidden,
+            examples=examples,
+            drias_direct_question=drias_direct_question,
+            result_text=result_text,
+            table_names_display=table_names_display,
+            query_accordion=query_accordion,
+            drias_sql_query=drias_sql_query,
+            chart_accordion=chart_accordion,
+            model_selection=model_selection,
+            drias_display=drias_display,
+            table_accordion=table_accordion,
+            drias_table=drias_table,
+            pagination_display=pagination_display,
+            prev_button=prev_button,
+            next_button=next_button
+        )
+def log_drias_to_azure(query: str, sql_query: str, data, share_client, user_id):
+    """Log Drias interaction to Azure storage."""
+    print("log_drias_to_azure")
+    if share_client is not None and user_id is not None:
+        log_drias_interaction_to_azure(
+            query=query,
+            sql_query=sql_query,
+            data=data,
+            share_client=share_client,
+            user_id=user_id
+        )
+    else:
+        print("share_client or user_id is None")
+def setup_drias_events(ui_elements: DriasUIElements, share_client=None, user_id=None) -> None:
+    """Set up all event handlers for the DRIAS tab."""
+    # Create state variables
+    sql_queries_state = gr.State([])
+    dataframes_state = gr.State([])
+    plots_state = gr.State([])
+    index_state = gr.State(0)
+    table_names_list = gr.State([])
+    def log_drias_interaction(query: str, sql_query: str, data: pd.DataFrame):
+        log_drias_to_azure(query, sql_query, data, share_client, user_id)
+    # Handle example selection
+    ui_elements["examples_hidden"].change(
+        lambda x: (gr.Accordion(open=False), gr.Textbox(value=x)),
+        inputs=[ui_elements["examples_hidden"]],
+        outputs=[ui_elements["details_accordion"], ui_elements["drias_direct_question"]]
+    ).then(
+        ask_drias_query,
+        inputs=[ui_elements["examples_hidden"], index_state],
+        outputs=[
+            ui_elements["drias_sql_query"],
+            ui_elements["drias_table"],
+            ui_elements["drias_display"],
+            sql_queries_state,
+            dataframes_state,
+            plots_state,
+            index_state,
+            table_names_list,
+            ui_elements["result_text"],
+        ],
+    ).then(
+        log_drias_interaction,
+        inputs=[ui_elements["examples_hidden"], ui_elements["drias_sql_query"], ui_elements["drias_table"]],
+        outputs=[],
+    ).then(
+        show_results,
+        inputs=[sql_queries_state, dataframes_state, plots_state],
+        outputs=[
+            ui_elements["result_text"],
+            ui_elements["query_accordion"],
+            ui_elements["table_accordion"],
+            ui_elements["chart_accordion"],
+            ui_elements["prev_button"],
+            ui_elements["next_button"],
+            ui_elements["pagination_display"],
+            ui_elements["table_names_display"],
+        ],
+    ).then(
+        update_pagination,
+        inputs=[index_state, sql_queries_state],
+        outputs=[ui_elements["pagination_display"]],
+    ).then(
+        display_table_names,
+        inputs=[table_names_list],
+        outputs=[ui_elements["table_names_display"]],
+    )
+    # Handle direct question submission
+    ui_elements["drias_direct_question"].submit(
+        lambda: gr.Accordion(open=False),
+        inputs=None,
+        outputs=[ui_elements["details_accordion"]]
+    ).then(
+        ask_drias_query,
+        inputs=[ui_elements["drias_direct_question"], index_state],
+        outputs=[
+            ui_elements["drias_sql_query"],
+            ui_elements["drias_table"],
+            ui_elements["drias_display"],
+            sql_queries_state,
+            dataframes_state,
+            plots_state,
+            index_state,
+            table_names_list,
+            ui_elements["result_text"],
+        ],
+    ).then(
+        log_drias_interaction,
+        inputs=[ui_elements["drias_direct_question"], ui_elements["drias_sql_query"], ui_elements["drias_table"]],
+        outputs=[],
+    ).then(
+        show_results,
+        inputs=[sql_queries_state, dataframes_state, plots_state],
+        outputs=[
+            ui_elements["result_text"],
+            ui_elements["query_accordion"],
+            ui_elements["table_accordion"],
+            ui_elements["chart_accordion"],
+            ui_elements["prev_button"],
+            ui_elements["next_button"],
+            ui_elements["pagination_display"],
+            ui_elements["table_names_display"],
+        ],
+    ).then(
+        update_pagination,
+        inputs=[index_state, sql_queries_state],
+        outputs=[ui_elements["pagination_display"]],
+    ).then(
+        display_table_names,
+        inputs=[table_names_list],
+        outputs=[ui_elements["table_names_display"]],
+    )
+    # Handle model selection change
+    ui_elements["model_selection"].change(
+        filter_by_model,
+        inputs=[dataframes_state, plots_state, index_state, ui_elements["model_selection"]],
+        outputs=[ui_elements["drias_table"], ui_elements["drias_display"]],
+    )
+    # Handle pagination buttons
+    ui_elements["prev_button"].click(
+        show_previous,
+        inputs=[index_state, sql_queries_state, dataframes_state, plots_state],
+        outputs=[ui_elements["drias_sql_query"], ui_elements["drias_table"], ui_elements["drias_display"], index_state],
+    ).then(
+        update_pagination,
+        inputs=[index_state, sql_queries_state],
+        outputs=[ui_elements["pagination_display"]],
+    )
+    ui_elements["next_button"].click(
+        show_next,
+        inputs=[index_state, sql_queries_state, dataframes_state, plots_state],
+        outputs=[ui_elements["drias_sql_query"], ui_elements["drias_table"], ui_elements["drias_display"], index_state],
+    ).then(
+        update_pagination,
+        inputs=[index_state, sql_queries_state],
+        outputs=[ui_elements["pagination_display"]],
+    )
+    # Handle table selection
+    ui_elements["table_names_display"].select(
+        fn=on_table_click,
+        inputs=[table_names_list, sql_queries_state, dataframes_state, plots_state],
+        outputs=[ui_elements["drias_sql_query"], ui_elements["drias_table"], ui_elements["drias_display"], index_state],
+    ).then(
+        update_pagination,
+        inputs=[index_state, sql_queries_state],
+        outputs=[ui_elements["pagination_display"]],
+    )
+def create_drias_tab(share_client=None, user_id=None):
+    """Create the DRIAS tab with all its components and event handlers."""
+    ui_elements = create_drias_ui()
+    setup_drias_events(ui_elements, share_client=share_client, user_id=user_id)

style.css CHANGED Viewed

@@ -29,8 +29,6 @@ main.flex.flex-1.flex-col {
 }
-}
 .tab-nav {
     border: none !important;
 }
@@ -111,10 +109,18 @@ main.flex.flex-1.flex-col {
     border: none;
 }
-#input-textbox > label > textarea {
     border-radius: 40px;
     padding-left: 30px;
     resize: none;
 }
 #input-message > div {
@@ -474,6 +480,33 @@ a {
     text-decoration: none !important;
 }
 /* Media Queries */
 /* Desktop Media Query */
 @media screen and (min-width: 1024px) {
@@ -487,7 +520,6 @@ a {
         height: calc(100vh - 190px) !important;
         overflow-y: scroll !important;
     }
-    div#tab-vanna,
     div#sources-figures,
     div#graphs-container,
     div#tab-citations {
@@ -496,6 +528,15 @@ a {
         overflow-y: scroll !important;
     }
     div#chatbot-row {
         max-height: calc(100vh - 90px) !important;
     }
@@ -514,7 +555,11 @@ a {
 /* Mobile Media Query */
 @media screen and (max-width: 767px) {
     div#chatbot {
-        height: 500px !important;
     }
     #submit-button {
@@ -607,14 +652,61 @@ a {
 }
 #vanna-display {
-    max-height: 300px;
     /* overflow-y: scroll; */
 }
 #sql-query{
-    max-height: 100px;
     overflow-y:scroll;
 }
-#vanna-details{
-    max-height: 500px;
-    overflow-y:scroll;
 }

 }
 .tab-nav {
     border: none !important;
 }
     border: none;
 }
+#input-textbox > label > div > textarea {
     border-radius: 40px;
     padding-left: 30px;
     resize: none;
+    background-color: #d7e2ed; /* Light blue background */
+    border: 2px solid #4b8ec3; /* Blue border */
+    font-size: 16px; /* Increase font size */
+    color: #333; /* Text color */
+    box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); /* Add shadow */
+    ::placeholder {
+        color: #4b4747; /* Darker placeholder color */
+    }
 }
 #input-message > div {
     text-decoration: none !important;
 }
+/* Follow-up Examples Styles */
+#follow-up-examples {
+    max-height: 20vh;
+    overflow-y: auto;
+    gap: 8px;
+    display: flex;
+    flex-direction: column;
+    overflow-y: hidden;
+    background: rgb(229, 235, 237);
+}
+#follow-up-button {
+    overflow-y: visible;
+    display: block;
+    padding: 8px 12px;
+    margin: 4px 0;
+    border-radius: 8px;
+    background-color: #f0f8ff;
+    transition: background-color 0.2s;
+    background: rgb(240, 240, 236);
+}
+#follow-up-button:hover {
+    background-color: #e0f0ff;
+}
 /* Media Queries */
 /* Desktop Media Query */
 @media screen and (min-width: 1024px) {
         height: calc(100vh - 190px) !important;
         overflow-y: scroll !important;
     }
     div#sources-figures,
     div#graphs-container,
     div#tab-citations {
         overflow-y: scroll !important;
     }
+    div#chatbot-row {
+        max-height: calc(100vh - 200px) !important;
+    }
+    div#chatbot {
+        height: 70vh !important;
+        max-height: 70vh !important;
+    }
     div#chatbot-row {
         max-height: calc(100vh - 90px) !important;
     }
 /* Mobile Media Query */
 @media screen and (max-width: 767px) {
     div#chatbot {
+        height: 400px !important;  /* Reduced from 500px */
+    }
+    #follow-up-examples {
+        max-height: 150px;
     }
     #submit-button {
 }
 #vanna-display {
+    max-height: 200px;
     /* overflow-y: scroll; */
 }
 #sql-query{
+    max-height: 300px;
     overflow-y:scroll;
 }
+#sql-query textarea{
+    min-height: 100px !important;
+}
+#sql-query span{
+    display: none;
+}
+div#tab-vanna{
+    max-height: 100¨vh;
+    overflow-y: hidden;
+}
+#vanna-plot{
+    max-height:500px
+}
+#pagination-display{
+    text-align: center;
+    font-weight: bold;
+    font-size: 16px;
+}
+#table-names table{
+    overflow: hidden;
+}
+#table-names thead{
+    display: none;
+}
+/* DRIAS Data Table Styles */
+#vanna-table {
+    height: 400px !important;
+    overflow-y: auto !important;
+}
+#vanna-table > div[class*="table"] {
+    height: 400px !important;
+    overflow-y: None !important;
+}
+#vanna-table .table-wrap {
+    height: 400px !important;
+    overflow-y: None !important;
+}
+#vanna-table thead {
+    position: sticky;
+    top: 0;
+    background: white;
+    z-index: 1;
 }