Spaces:
Runtime error
Runtime error
File size: 3,017 Bytes
b19c8bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
# %%
import os
from time import sleep
from haystack.document_stores import ElasticsearchDocumentStore
from haystack.utils import launch_es
launch_es()
sleep(30)
# %%
os.environ["HAYSTACK_TELEMETRY_ENABLED"] = "False"
document_store = ElasticsearchDocumentStore(host="localhost", username="", password="", index="document")
# %%
import pandas as pd
df_document = pd.read_csv("data/articles.csv")
df_document.head()
# %%
articles = []
for idx, row in df_document.iterrows():
article = {
"id": idx,
"content": row["article"],
"meta":{
"chapter_name": row["chapter_name"],
"article_page": row["article_page"],
"article_number": row["article_number"],
"article_name": row["article_name"],
},
}
articles.append(article)
document_store.write_documents(articles, index="document")
print(f"Loaded {document_store.get_document_count()} documents")
# %%
from haystack.nodes import BM25Retriever
retriever = BM25Retriever(document_store=document_store)
# %%
from haystack.nodes import FARMReader
model_ckpt = "mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es"
reader = FARMReader(
model_name_or_path=model_ckpt,
progress_bar=False,
max_seq_len=384,
doc_stride=128,
return_no_answer=False,
use_gpu=False,
)
# %%
from haystack.pipelines import ExtractiveQAPipeline
pipe = ExtractiveQAPipeline(reader, retriever)
# %%
from textwrap import fill
def run_qa_pipeline(question):
results = pipe.run(
query=question,
params={
"Retriever": {"top_k": 10},
"Reader": {"top_k": 5}
}
)
return results
def results_as_markdown(results):
top_answers = []
for count, result in enumerate(results["answers"]):
article = document_store.get_document_by_id(result.document_id)
meta = result.meta
formatted_answer = """**Capítulo: {}.\t número: {}.\t nombre: {}.\t página: {}.**
{}
""".format(
meta["chapter_name"],
meta["article_number"],
meta["article_name"],
meta["article_page"],
fill(article.content, 80),
)
top_answers.append(formatted_answer)
return "\n\n".join(top_answers)
def query_qa_pipeline(question):
results = run_qa_pipeline(question)
return results_as_markdown(results)
# %%
import gradio as gr
title = "**CONSOLIDADO NORMAS APROBADAS PARA LA PROPUESTA CONSTITUCIONAL POR EL PLENO DE LA CONVENCIÓN**"
default_question = "educación gratuita"
with gr.Blocks() as demo:
gr.Markdown(title)
with gr.Column():
with gr.Row():
question = gr.Textbox(lines=2, max_lines=3, label="Pregunta:", placeholder=default_question)
with gr.Row():
btn = gr.Button("Buscar")
with gr.Row():
answers = gr.Markdown()
btn.click(
fn=query_qa_pipeline,
inputs=question,
outputs=answers,
)
demo.launch(share=True)
# %%
|