momenaca's picture
add feature for hackathon management
ae465d3
raw
history blame
5.78 kB
from queue import SimpleQueue
from dotenv import load_dotenv
import re
from langchain.callbacks.base import BaseCallbackHandler
job_done = object() # signals the processing is done
class StreamingGradioCallbackHandler(BaseCallbackHandler):
"""Callback handler for streaming. Only works with LLMs that support streaming."""
def __init__(self, q):
self.q = q
def on_llm_start(self, serialized, prompts, **kwargs) -> None:
"""Run when LLM starts running."""
while not self.q.empty():
try:
self.q.get(block=False)
except SimpleQueue.empty:
continue
def on_llm_new_token(self, token, **kwargs) -> None:
"""Run on new LLM token. Only available when streaming is enabled."""
self.q.put(token)
def on_llm_end(self, response, **kwargs) -> None:
"""Run when LLM ends running."""
self.q.put(job_done)
def on_llm_error(self, error, **kwargs) -> None:
"""Run when LLM errors."""
self.q.put(job_done)
def add_gradio_streaming(llm):
q = SimpleQueue()
job_done = object() # signals the processing is done
llm.callbacks = [StreamingGradioCallbackHandler(q)]
return llm, q
def gradio_stream(llm, prompt):
thread = Thread(target=llm.predict, kwargs={"text": prompt})
thread.start()
text = ""
while True:
next_token = q.get(block=True) # Blocks until an input is available
if next_token is job_done:
break
text += next_token
time.sleep(0.03)
yield text
thread.join()
def get_source_link(metadata):
return metadata["file_url"] + f"#page={metadata['content_page_number'] + 1}"
def make_html_source(source, i, score, config):
meta = source.metadata
if meta["file_source_type"] == "AFP":
return f"""
<div class="card" id="doc{i}">
<div class="card-content">
<h2>Doc {i} - {meta['file_title']} - {meta['file_type']} AFP</h2>
<p>{source.page_content}</p>
</div>
<div class="card-footer">
<span>{meta['file_source_type']}</span>
<span>Relevance Score : {round(100*score,1)}%</span>
</div>
</div>
"""
if meta["file_source_type"] == "Presse":
if meta["file_url"] != "none":
return f"""
<div class="card" id="doc{i}">
<div class="card-content">
<h2>Doc {i} - {meta['file_title']} - {meta['file_publisher']}</h2>
<p>{source.page_content}</p>
</div>
<div class="card-footer">
<span>{meta['file_source_type']}</span>
<span>Relevance Score : {round(100*score,1)}%</span>
<a href={meta['file_url']} target="_blank">
<span role="img" aria-label="Open PDF">🔗</span>
</a>
</div>
</div>
"""
else:
return f"""
<div class="card" id="doc{i}">
<div class="card-content">
<h2>Doc {i} - {meta['file_title']} - {meta['file_publisher']}</h2>
<p>{source.page_content}</p>
</div>
<div class="card-footer">
<span>{meta['file_source_type']}</span>
<span>Relevance Score : {round(100*score,1)}%</span>
</div>
</div>
"""
if meta["file_url"]:
return f"""
<div class="card" id="doc{i}">
<div class="card-content">
<h2>Doc {i} - {meta['file_title']} - Page {meta['content_page_number'] + 1}</h2>
<p>{source.page_content.replace(config["passage_preprompt"], "")}</p>
</div>
<div class="card-footer">
<span>{meta['file_source_type']}</span>
<span>Relevance Score : {round(100*score,1)}%</span>
<a href="{get_source_link(meta)}" target="_blank">
<span role="img" aria-label="Open PDF">🔗</span>
</a>
</div>
</div>
"""
else:
return f"""
<div class="card" id="doc{i}">
<div class="card-content">
<h2>Doc {i} - {meta['file_title']} - Page {meta['content_page_number'] + 1}</h2>
<p>{source.page_content.replace(config["passage_preprompt"], "")}</p>
</div>
<div class="card-footer">
<span>{meta['file_source_type']}</span>
<span>Relevance Score : {round(100*score,1)}%</span>
</div>
</div>
"""
def parse_output_llm_with_sources(output):
content_parts = re.split(
r"[\[(]?(Doc\s?\d+(?:,\s?Doc\s?\d+)*|doc\s?\d+(?:,\s?doc\s?\d+)*|Doc\s\d+)[\])?]",
output,
)
parts = []
for part in content_parts:
if part.lower().startswith("doc"):
subparts = part.split(",")
subparts = [
subpart.lower().replace("doc", "").strip() for subpart in subparts
]
subparts = [
f"""<a href="#doc{subpart}" class="a-doc-ref" target="_self"><span class='doc-ref'><sup>{subpart}</sup></span></a>"""
for subpart in subparts
]
parts.append("".join(subparts))
else:
parts.append(part)
content_parts = "".join(parts)
return content_parts
def clear_text_box(textbox):
return ""
def add_text(chatbot, text):
chatbot = chatbot + [(text, None)]
return chatbot, text
def init_env():
try:
load_dotenv()
except:
pass