Spaces:
Runtime error
Runtime error
# %% | |
import os | |
from time import sleep | |
from haystack.document_stores import ElasticsearchDocumentStore | |
from haystack.utils import launch_es | |
launch_es() | |
sleep(30) | |
# %% | |
os.environ["HAYSTACK_TELEMETRY_ENABLED"] = "False" | |
document_store = ElasticsearchDocumentStore(host="localhost", username="", password="", index="document") | |
# %% | |
import pandas as pd | |
df_document = pd.read_csv("data/articles.csv") | |
df_document.head() | |
# %% | |
articles = [] | |
for idx, row in df_document.iterrows(): | |
article = { | |
"id": idx, | |
"content": row["article"], | |
"meta":{ | |
"chapter_name": row["chapter_name"], | |
"article_page": row["article_page"], | |
"article_number": row["article_number"], | |
"article_name": row["article_name"], | |
}, | |
} | |
articles.append(article) | |
document_store.write_documents(articles, index="document") | |
print(f"Loaded {document_store.get_document_count()} documents") | |
# %% | |
from haystack.nodes import BM25Retriever | |
retriever = BM25Retriever(document_store=document_store) | |
# %% | |
from haystack.nodes import FARMReader | |
model_ckpt = "mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es" | |
reader = FARMReader( | |
model_name_or_path=model_ckpt, | |
progress_bar=False, | |
max_seq_len=384, | |
doc_stride=128, | |
return_no_answer=False, | |
use_gpu=False, | |
) | |
# %% | |
from haystack.pipelines import ExtractiveQAPipeline | |
pipe = ExtractiveQAPipeline(reader, retriever) | |
# %% | |
from textwrap import fill | |
def run_qa_pipeline(question): | |
results = pipe.run( | |
query=question, | |
params={ | |
"Retriever": {"top_k": 10}, | |
"Reader": {"top_k": 5} | |
} | |
) | |
return results | |
def results_as_markdown(results): | |
top_answers = [] | |
for count, result in enumerate(results["answers"]): | |
article = document_store.get_document_by_id(result.document_id) | |
meta = result.meta | |
formatted_answer = """**Capítulo: {}.\t número: {}.\t nombre: {}.\t página: {}.** | |
{} | |
""".format( | |
meta["chapter_name"], | |
meta["article_number"], | |
meta["article_name"], | |
meta["article_page"], | |
fill(article.content, 80), | |
) | |
top_answers.append(formatted_answer) | |
return "\n\n".join(top_answers) | |
def query_qa_pipeline(question): | |
results = run_qa_pipeline(question) | |
return results_as_markdown(results) | |
# %% | |
import gradio as gr | |
title = "**CONSOLIDADO NORMAS APROBADAS PARA LA PROPUESTA CONSTITUCIONAL POR EL PLENO DE LA CONVENCIÓN**" | |
default_question = "educación gratuita" | |
with gr.Blocks() as demo: | |
gr.Markdown(title) | |
with gr.Column(): | |
with gr.Row(): | |
question = gr.Textbox(lines=2, max_lines=3, label="Pregunta:", placeholder=default_question) | |
with gr.Row(): | |
btn = gr.Button("Buscar") | |
with gr.Row(): | |
answers = gr.Markdown() | |
btn.click( | |
fn=query_qa_pipeline, | |
inputs=question, | |
outputs=answers, | |
) | |
demo.launch(share=True) | |
# %% | |