Spaces:
Sleeping
Sleeping
update major frontend and backend features
Browse files- README.md +1 -1
- app.py +254 -350
- assets/logos/apig.png +0 -0
- assets/logos/help.png +0 -0
- assets/logos/question.png +0 -0
- assets/logos/rsf.png +0 -0
- assets/logos/spinoza.png +0 -0
- assets/source_information.md +14 -0
- assets/style.css +313 -48
- assets/utils_javascript.py +108 -0
- poetry.lock +0 -0
- pyproject.toml +1 -1
- requirements.txt +0 -0
- spinoza_project/source/frontend/utils.py +40 -24
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: 🐨
|
|
4 |
colorFrom: green
|
5 |
colorTo: indigo
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 4.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
hf_oauth: true
|
|
|
4 |
colorFrom: green
|
5 |
colorTo: indigo
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 4.37.2
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
hf_oauth: true
|
app.py
CHANGED
@@ -1,15 +1,15 @@
|
|
|
|
1 |
import time
|
2 |
import yaml
|
3 |
-
import logging
|
4 |
-
import gradio as gr
|
5 |
from langchain.prompts.chat import ChatPromptTemplate
|
6 |
-
from huggingface_hub import hf_hub_download
|
7 |
from spinoza_project.source.backend.llm_utils import get_llm, get_vectorstore
|
8 |
from spinoza_project.source.backend.document_store import pickle_to_document_store
|
9 |
from spinoza_project.source.backend.get_prompts import get_qa_prompts
|
10 |
from spinoza_project.source.frontend.utils import (
|
11 |
make_html_source,
|
12 |
make_html_presse_source,
|
|
|
13 |
init_env,
|
14 |
)
|
15 |
from spinoza_project.source.backend.prompt_utils import (
|
@@ -17,6 +17,13 @@ from spinoza_project.source.backend.prompt_utils import (
|
|
17 |
SpecialTokens,
|
18 |
)
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
init_env()
|
21 |
|
22 |
with open("./spinoza_project/config.yaml") as f:
|
@@ -55,18 +62,12 @@ for source, prompt in prompts.items():
|
|
55 |
chat_qa_prompt, chat_reformulation_prompt = get_qa_prompts(config, prompt)
|
56 |
chat_qa_prompts[source] = chat_qa_prompt
|
57 |
chat_reformulation_prompts[source] = chat_reformulation_prompt
|
58 |
-
# chat_summarize_memory_prompts[source] = chat_summarize_memory_prompt
|
59 |
|
60 |
|
61 |
with open("./assets/style.css", "r") as f:
|
62 |
css = f.read()
|
63 |
|
64 |
|
65 |
-
def update_tabs(outil, visible_tabs):
|
66 |
-
visible_tabs = outil
|
67 |
-
return visible_tabs
|
68 |
-
|
69 |
-
|
70 |
special_tokens = SpecialTokens(config)
|
71 |
|
72 |
synthesis_template = """You are a factual journalist that summarize the secialized awnsers from thechnical sources.
|
@@ -117,80 +118,6 @@ def zip_longest_fill(*args, fillvalue=None):
|
|
117 |
yield tuple(values)
|
118 |
|
119 |
|
120 |
-
def build_data_dict(config):
|
121 |
-
data_dict = {}
|
122 |
-
for tab in config["tabs"]:
|
123 |
-
data_dict[tab] = {
|
124 |
-
"tab": {
|
125 |
-
"init_value": tab,
|
126 |
-
"component": None,
|
127 |
-
"elem_id": "tab",
|
128 |
-
},
|
129 |
-
"description": {
|
130 |
-
"init_value": config["tabs"][tab],
|
131 |
-
"component": None,
|
132 |
-
"elem_id": "desc",
|
133 |
-
},
|
134 |
-
"question": {
|
135 |
-
"init_value": None,
|
136 |
-
"component": None,
|
137 |
-
"elem_id": "question",
|
138 |
-
},
|
139 |
-
"answer": {
|
140 |
-
"init_value": None,
|
141 |
-
"component": None,
|
142 |
-
"elem_id": "answer",
|
143 |
-
},
|
144 |
-
"sources": {
|
145 |
-
"init_value": None,
|
146 |
-
"component": None,
|
147 |
-
"elem_id": "src",
|
148 |
-
},
|
149 |
-
}
|
150 |
-
return data_dict
|
151 |
-
|
152 |
-
|
153 |
-
def init_gradio(data, config=config):
|
154 |
-
for t in data:
|
155 |
-
data[t]["tab"]["component"] = gr.Tab(
|
156 |
-
data[t]["tab"]["init_value"], elem_id="tab"
|
157 |
-
)
|
158 |
-
with data[t]["tab"]["component"]:
|
159 |
-
for fields in data[t]:
|
160 |
-
if fields == "question":
|
161 |
-
data[t][fields]["component"] = gr.Textbox(
|
162 |
-
elem_id=data[t][fields]["elem_id"],
|
163 |
-
show_label=False,
|
164 |
-
interactive=True,
|
165 |
-
placeholder="",
|
166 |
-
)
|
167 |
-
# elif fields == "answer":
|
168 |
-
# data[t][fields]["component"] = gr.Textbox(
|
169 |
-
# elem_id=data[t][fields]["elem_id"],
|
170 |
-
# show_label=True,
|
171 |
-
# interactive=True,
|
172 |
-
# placeholder="",
|
173 |
-
# show_copy_button=True
|
174 |
-
# )
|
175 |
-
elif fields != "tab":
|
176 |
-
data[t][fields]["component"] = gr.Markdown(
|
177 |
-
data[t][fields]["init_value"],
|
178 |
-
elem_id=data[t][fields]["elem_id"],
|
179 |
-
)
|
180 |
-
# data[t][fields]["component"] = gr.Textbox(
|
181 |
-
# value=data[t][fields]["init_value"],
|
182 |
-
# elem_id=data[t][fields]["elem_id"],
|
183 |
-
# show_label=True,
|
184 |
-
# interactive=False,
|
185 |
-
# show_copy_button=True,
|
186 |
-
# )
|
187 |
-
return data
|
188 |
-
|
189 |
-
|
190 |
-
def add_warning():
|
191 |
-
return "*Les éléments cochés ont commencé à être généré dans les onglets spécifiques, la synthèse ne sera disponible qu'après la mise à disposition de ces derniers.*"
|
192 |
-
|
193 |
-
|
194 |
def format_question(question):
|
195 |
return f"{question}" # ###
|
196 |
|
@@ -202,8 +129,8 @@ def parse_question(question):
|
|
202 |
return x
|
203 |
|
204 |
|
205 |
-
def reformulate(
|
206 |
-
if tab in
|
207 |
return llm.stream(
|
208 |
chat_reformulation_prompts[config["source_mapping"][tab]],
|
209 |
{"question": parse_question(question)},
|
@@ -212,15 +139,15 @@ def reformulate(outils, question, tab, config=config):
|
|
212 |
return iter([None] * 5)
|
213 |
|
214 |
|
215 |
-
def reformulate_single_question(
|
216 |
-
for elt in reformulate(
|
217 |
time.sleep(0.02)
|
218 |
yield elt
|
219 |
|
220 |
|
221 |
-
def reformulate_questions(
|
222 |
for elt in zip_longest_fill(
|
223 |
-
*[reformulate(
|
224 |
):
|
225 |
time.sleep(0.02)
|
226 |
yield elt
|
@@ -230,8 +157,8 @@ def add_question(question):
|
|
230 |
return question
|
231 |
|
232 |
|
233 |
-
def answer(question, source,
|
234 |
-
if tab in
|
235 |
if len(source) < 10:
|
236 |
return iter(["Aucune source trouvée, veuillez reformuler votre question"])
|
237 |
else:
|
@@ -247,49 +174,35 @@ def answer(question, source, outils, tab, config=config):
|
|
247 |
return iter([None] * 5)
|
248 |
|
249 |
|
250 |
-
def answer_single_question(
|
251 |
-
for elt in answer(question, source,
|
252 |
time.sleep(0.02)
|
253 |
yield elt
|
254 |
|
255 |
|
256 |
-
def answer_questions(
|
257 |
-
|
258 |
questions = [elt for elt in questions_sources[: len(questions_sources) // 2]]
|
259 |
sources = [elt for elt in questions_sources[len(questions_sources) // 2 :]]
|
260 |
|
261 |
for elt in zip_longest_fill(
|
262 |
*[
|
263 |
-
answer(question, source,
|
264 |
for question, source, tab in zip(questions, sources, config["tabs"])
|
265 |
]
|
266 |
):
|
267 |
time.sleep(0.02)
|
268 |
-
yield
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
return metadata["file_url"] + f"#page={metadata['content_page_number'] + 1}"
|
273 |
-
|
274 |
-
|
275 |
-
def get_button(i, tag):
|
276 |
-
return f"""<button id="btn_{tag}_{i}" type="button" style="margin: 0; display: inline; align="right">[{i}]</button>"""
|
277 |
-
|
278 |
-
|
279 |
-
def get_html_sources(buttons, cards):
|
280 |
-
return f"""
|
281 |
-
<p style="margin: 0; display: inline;"><strong><br>Sources utilisées : </strong></p>
|
282 |
-
{buttons}
|
283 |
-
{cards}
|
284 |
-
"""
|
285 |
|
286 |
|
287 |
-
def get_sources(
|
288 |
-
outils, question, tab, qdrants=qdrants, bdd_presse=bdd_presse, config=config
|
289 |
-
):
|
290 |
k = config["num_document_retrieved"]
|
291 |
min_similarity = config["min_similarity"]
|
292 |
-
|
|
|
|
|
293 |
sources = (
|
294 |
(
|
295 |
bdd_presse.similarity_search_with_relevance_scores(
|
@@ -307,66 +220,49 @@ def get_sources(
|
|
307 |
)
|
308 |
)
|
309 |
|
310 |
-
sources = [
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
[
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
[
|
319 |
-
make_html_presse_source(source[0], i, tab, source[1], config)
|
320 |
-
for i, source in zip(buttons_ids, sources)
|
321 |
-
]
|
322 |
-
)
|
323 |
if tab == "Presse"
|
324 |
-
else
|
325 |
-
[
|
326 |
-
|
327 |
-
|
328 |
-
]
|
329 |
-
)
|
330 |
)
|
331 |
-
|
332 |
-
text = "\n\n".join(
|
333 |
[
|
334 |
-
|
335 |
-
|
336 |
-
|
|
|
|
|
|
|
|
|
337 |
]
|
338 |
)
|
339 |
-
|
340 |
-
|
341 |
-
|
|
|
342 |
|
343 |
|
344 |
def retrieve_sources(
|
345 |
-
|
346 |
):
|
347 |
-
|
348 |
-
get_sources(outils, question, tab, qdrants, bdd_presse, config)
|
349 |
-
for question, tab in zip(questions, config["tabs"])
|
350 |
-
]
|
351 |
-
formated_sources = [source[0] for source in results]
|
352 |
-
text_sources = [source[1] for source in results]
|
353 |
-
return tuple(formated_sources + text_sources)
|
354 |
-
|
355 |
|
356 |
-
|
357 |
-
return "\n\n".join(
|
358 |
-
[
|
359 |
-
f"{tab}\n{answers[i]}"
|
360 |
-
for i, tab in enumerate(config["tabs"])
|
361 |
-
if (tab in outils)
|
362 |
-
]
|
363 |
-
)
|
364 |
|
365 |
|
366 |
-
def get_synthesis(
|
367 |
answer = []
|
368 |
for i, tab in enumerate(config["tabs"]):
|
369 |
-
if (
|
370 |
answer.append(
|
371 |
f"{tab}\n{answers[i]}".replace("<p>", "").replace("</p>\n", "")
|
372 |
)
|
@@ -382,67 +278,7 @@ def get_synthesis(outils, question, *answers, config=config):
|
|
382 |
},
|
383 |
):
|
384 |
time.sleep(0.01)
|
385 |
-
yield elt
|
386 |
-
|
387 |
-
|
388 |
-
def get_listener():
|
389 |
-
return """
|
390 |
-
function my_func_body() {
|
391 |
-
const body = document.querySelector("body");
|
392 |
-
body.addEventListener("click", e => {
|
393 |
-
console.log(e)
|
394 |
-
const sourceId = "btn_" + e.target.id.split("_")[1] + "_" + e.target.id.split("_")[2] + "_source"
|
395 |
-
console.log(sourceId)
|
396 |
-
if (document.getElementById(sourceId).style.display === "none") {
|
397 |
-
document.getElementById(sourceId).style.display = "";
|
398 |
-
} else {
|
399 |
-
document.getElementById(sourceId).style.display = "none";
|
400 |
-
}
|
401 |
-
}
|
402 |
-
)}
|
403 |
-
"""
|
404 |
-
|
405 |
-
|
406 |
-
def get_source_template(buttons, divs_source):
|
407 |
-
return """
|
408 |
-
<div class="source">
|
409 |
-
<p style="margin: 0; display: inline;"><strong><br>Sources utilisées :</strong></p>
|
410 |
-
{buttons}
|
411 |
-
{divs_source}
|
412 |
-
</div>
|
413 |
-
</div>
|
414 |
-
"""
|
415 |
-
|
416 |
-
|
417 |
-
def activate_questions(outils, *textboxes, config=config):
|
418 |
-
activated_textboxes = []
|
419 |
-
for i, tab in enumerate(config["tabs"]):
|
420 |
-
if tab in outils:
|
421 |
-
activated_textboxes.append(
|
422 |
-
gr.Textbox(
|
423 |
-
show_label=False,
|
424 |
-
interactive=True,
|
425 |
-
placeholder="Sélectionnez cet outil et posez une question sur l'onglet de synthèse",
|
426 |
-
)
|
427 |
-
)
|
428 |
-
|
429 |
-
else:
|
430 |
-
activated_textboxes.append(
|
431 |
-
gr.Textbox(
|
432 |
-
show_label=False,
|
433 |
-
interactive=False,
|
434 |
-
placeholder="Sélectionnez cet outil et posez une question sur l'onglet de synthèse",
|
435 |
-
)
|
436 |
-
)
|
437 |
-
return activated_textboxes
|
438 |
-
|
439 |
-
|
440 |
-
def empty():
|
441 |
-
return ""
|
442 |
-
|
443 |
-
|
444 |
-
def empty_none():
|
445 |
-
return None
|
446 |
|
447 |
|
448 |
theme = gr.themes.Base(
|
@@ -451,151 +287,219 @@ theme = gr.themes.Base(
|
|
451 |
font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"],
|
452 |
)
|
453 |
|
|
|
|
|
454 |
|
455 |
-
|
456 |
-
|
457 |
-
|
458 |
-
⚠️ Limitations
|
459 |
-
*Please note that this chatbot is in an early stage phase, it is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
|
460 |
-
|
461 |
-
What do you want to learn ?
|
462 |
-
"""
|
463 |
|
464 |
-
|
465 |
-
|
466 |
|
467 |
-
data = build_data_dict(config)
|
468 |
|
|
|
|
|
|
|
|
|
|
|
469 |
|
470 |
-
def update_visible(oauth_token: gr.OAuthToken | None):
|
471 |
-
if oauth_token is None:
|
472 |
-
return {
|
473 |
-
bloc_1: gr.update(visible=True),
|
474 |
-
bloc_2: gr.update(visible=False),
|
475 |
-
bloc_3: gr.update(visible=False),
|
476 |
-
}
|
477 |
|
478 |
-
|
|
|
479 |
|
480 |
-
|
481 |
-
|
482 |
-
bloc_1: gr.update(visible=False),
|
483 |
-
bloc_2: gr.update(visible=True),
|
484 |
-
bloc_3: gr.update(visible=False),
|
485 |
-
}
|
486 |
|
487 |
-
|
488 |
-
|
489 |
-
bloc_1: gr.update(visible=False),
|
490 |
-
bloc_2: gr.update(visible=False),
|
491 |
-
bloc_3: gr.update(visible=True),
|
492 |
-
}
|
493 |
|
|
|
|
|
494 |
|
495 |
with gr.Blocks(
|
496 |
-
title=f"🔍
|
497 |
css=css,
|
498 |
-
js=get_listener(),
|
499 |
theme=theme,
|
500 |
) as demo:
|
501 |
-
|
502 |
-
|
503 |
-
|
504 |
-
|
505 |
-
|
506 |
-
|
507 |
-
|
|
|
|
|
|
|
|
|
508 |
|
509 |
-
|
510 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
511 |
with gr.Row():
|
512 |
-
with gr.Column(scale=3):
|
513 |
-
outils = gr.CheckboxGroup(
|
514 |
-
choices=list(config["tabs"].keys()),
|
515 |
-
value=list(config["tabs"].keys()),
|
516 |
-
type="value",
|
517 |
-
label="Choisir les bases de données à interroger",
|
518 |
-
)
|
519 |
with gr.Column(scale=1):
|
520 |
-
|
521 |
-
"Relancer la Synthèse", variant="primary", elem_id="synthese_btn"
|
522 |
-
)
|
523 |
|
524 |
-
|
525 |
-
|
526 |
-
|
527 |
-
|
528 |
-
|
529 |
-
|
530 |
-
|
531 |
-
|
532 |
-
|
533 |
-
|
534 |
-
|
535 |
-
|
536 |
-
|
537 |
-
|
538 |
-
|
539 |
-
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
-
|
549 |
-
|
550 |
-
|
551 |
-
|
552 |
-
|
553 |
-
|
554 |
-
|
555 |
-
|
556 |
-
+ [data[tab]["question"]["component"] for tab in config["tabs"]]
|
557 |
-
+ [text_sources[tab] for tab in config["tabs"]],
|
558 |
-
[data[tab]["answer"]["component"] for tab in config["tabs"]],
|
559 |
-
)
|
560 |
-
.then(
|
561 |
-
get_synthesis,
|
562 |
-
[outils, md_question]
|
563 |
-
+ [data[tab]["answer"]["component"] for tab in config["tabs"]],
|
564 |
-
[synthesis],
|
565 |
-
)
|
566 |
-
)
|
567 |
-
|
568 |
-
for tab in config["tabs"]:
|
569 |
-
(
|
570 |
-
data[tab]["question"]["component"]
|
571 |
-
.submit(empty, [], [data[tab]["sources"]["component"]])
|
572 |
-
.then(empty, [], [text_sources[tab]])
|
573 |
-
.then(empty, [], [data[tab]["answer"]["component"]])
|
574 |
-
.then(
|
575 |
-
get_sources,
|
576 |
-
[outils, data[tab]["question"]["component"], tab_states[tab]],
|
577 |
-
[data[tab]["sources"]["component"], text_sources[tab]],
|
578 |
-
)
|
579 |
-
.then(
|
580 |
-
answer_single_question,
|
581 |
-
[
|
582 |
-
outils,
|
583 |
-
text_sources[tab],
|
584 |
-
data[tab]["question"]["component"],
|
585 |
-
tab_states[tab],
|
586 |
-
],
|
587 |
-
[data[tab]["answer"]["component"]],
|
588 |
-
)
|
589 |
-
)
|
590 |
|
591 |
-
(
|
592 |
-
submit_btn.click(empty, [], [synthesis]).then(
|
593 |
-
get_synthesis,
|
594 |
-
[outils, md_question]
|
595 |
-
+ [data[tab]["answer"]["component"] for tab in config["tabs"]],
|
596 |
-
[synthesis],
|
597 |
-
)
|
598 |
-
)
|
599 |
|
600 |
if __name__ == "__main__":
|
601 |
demo.queue().launch(share=True, debug=True)
|
|
|
1 |
+
import gradio as gr
|
2 |
import time
|
3 |
import yaml
|
|
|
|
|
4 |
from langchain.prompts.chat import ChatPromptTemplate
|
5 |
+
from huggingface_hub import hf_hub_download
|
6 |
from spinoza_project.source.backend.llm_utils import get_llm, get_vectorstore
|
7 |
from spinoza_project.source.backend.document_store import pickle_to_document_store
|
8 |
from spinoza_project.source.backend.get_prompts import get_qa_prompts
|
9 |
from spinoza_project.source.frontend.utils import (
|
10 |
make_html_source,
|
11 |
make_html_presse_source,
|
12 |
+
parse_output_llm_with_sources,
|
13 |
init_env,
|
14 |
)
|
15 |
from spinoza_project.source.backend.prompt_utils import (
|
|
|
17 |
SpecialTokens,
|
18 |
)
|
19 |
|
20 |
+
from assets.utils_javascript import (
|
21 |
+
accordion_trigger,
|
22 |
+
accordion_trigger_end,
|
23 |
+
accordion_trigger_spinoza,
|
24 |
+
accordion_trigger_spinoza_end,
|
25 |
+
)
|
26 |
+
|
27 |
init_env()
|
28 |
|
29 |
with open("./spinoza_project/config.yaml") as f:
|
|
|
62 |
chat_qa_prompt, chat_reformulation_prompt = get_qa_prompts(config, prompt)
|
63 |
chat_qa_prompts[source] = chat_qa_prompt
|
64 |
chat_reformulation_prompts[source] = chat_reformulation_prompt
|
|
|
65 |
|
66 |
|
67 |
with open("./assets/style.css", "r") as f:
|
68 |
css = f.read()
|
69 |
|
70 |
|
|
|
|
|
|
|
|
|
|
|
71 |
special_tokens = SpecialTokens(config)
|
72 |
|
73 |
synthesis_template = """You are a factual journalist that summarize the secialized awnsers from thechnical sources.
|
|
|
118 |
yield tuple(values)
|
119 |
|
120 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
def format_question(question):
|
122 |
return f"{question}" # ###
|
123 |
|
|
|
129 |
return x
|
130 |
|
131 |
|
132 |
+
def reformulate(question, tab, config=config):
|
133 |
+
if tab in list(config["tabs"].keys()):
|
134 |
return llm.stream(
|
135 |
chat_reformulation_prompts[config["source_mapping"][tab]],
|
136 |
{"question": parse_question(question)},
|
|
|
139 |
return iter([None] * 5)
|
140 |
|
141 |
|
142 |
+
def reformulate_single_question(question, tab, config=config):
|
143 |
+
for elt in reformulate(question, tab, config=config):
|
144 |
time.sleep(0.02)
|
145 |
yield elt
|
146 |
|
147 |
|
148 |
+
def reformulate_questions(question, config=config):
|
149 |
for elt in zip_longest_fill(
|
150 |
+
*[reformulate(question, tab, config=config) for tab in config["tabs"]]
|
151 |
):
|
152 |
time.sleep(0.02)
|
153 |
yield elt
|
|
|
157 |
return question
|
158 |
|
159 |
|
160 |
+
def answer(question, source, tab, config=config):
|
161 |
+
if tab in list(config["tabs"].keys()):
|
162 |
if len(source) < 10:
|
163 |
return iter(["Aucune source trouvée, veuillez reformuler votre question"])
|
164 |
else:
|
|
|
174 |
return iter([None] * 5)
|
175 |
|
176 |
|
177 |
+
def answer_single_question(source, question, tab, config=config):
|
178 |
+
for elt in answer(question, source, tab, config=config):
|
179 |
time.sleep(0.02)
|
180 |
yield elt
|
181 |
|
182 |
|
183 |
+
def answer_questions(*questions_sources, config=config):
|
|
|
184 |
questions = [elt for elt in questions_sources[: len(questions_sources) // 2]]
|
185 |
sources = [elt for elt in questions_sources[len(questions_sources) // 2 :]]
|
186 |
|
187 |
for elt in zip_longest_fill(
|
188 |
*[
|
189 |
+
answer(question, source, tab, config=config)
|
190 |
for question, source, tab in zip(questions, sources, config["tabs"])
|
191 |
]
|
192 |
):
|
193 |
time.sleep(0.02)
|
194 |
+
yield [
|
195 |
+
[(question, parse_output_llm_with_sources(ans))]
|
196 |
+
for question, ans in zip(questions, elt)
|
197 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
198 |
|
199 |
|
200 |
+
def get_sources(questions, qdrants=qdrants, bdd_presse=bdd_presse, config=config):
|
|
|
|
|
201 |
k = config["num_document_retrieved"]
|
202 |
min_similarity = config["min_similarity"]
|
203 |
+
formated = []
|
204 |
+
text = []
|
205 |
+
for i, (question, tab) in enumerate(zip(questions, list(config["tabs"].keys()))):
|
206 |
sources = (
|
207 |
(
|
208 |
bdd_presse.similarity_search_with_relevance_scores(
|
|
|
220 |
)
|
221 |
)
|
222 |
|
223 |
+
sources = [
|
224 |
+
(doc, score) for doc, score in sources
|
225 |
+
] # if score >= min_similarity]
|
226 |
+
formated.extend(
|
227 |
+
[
|
228 |
+
make_html_presse_source(source[0], j, source[1], config)
|
229 |
+
for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
|
230 |
+
]
|
|
|
|
|
|
|
|
|
|
|
231 |
if tab == "Presse"
|
232 |
+
else [
|
233 |
+
make_html_source(source[0], j, source[1], config)
|
234 |
+
for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
|
235 |
+
]
|
|
|
|
|
236 |
)
|
237 |
+
text.extend(
|
|
|
238 |
[
|
239 |
+
"\n\n".join(
|
240 |
+
[
|
241 |
+
f"Doc {str(j)} with source type {source[0].metadata.get('file_source_type')}:\n"
|
242 |
+
+ source[0].page_content
|
243 |
+
for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
|
244 |
+
]
|
245 |
+
)
|
246 |
]
|
247 |
)
|
248 |
+
|
249 |
+
formated = "".join(formated)
|
250 |
+
|
251 |
+
return formated, text
|
252 |
|
253 |
|
254 |
def retrieve_sources(
|
255 |
+
*questions, qdrants=qdrants, bdd_presse=bdd_presse, config=config
|
256 |
):
|
257 |
+
formated_sources, text_sources = get_sources(questions, qdrants, bdd_presse, config)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
258 |
|
259 |
+
return (formated_sources, *text_sources)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
260 |
|
261 |
|
262 |
+
def get_synthesis(question, *answers, config=config):
|
263 |
answer = []
|
264 |
for i, tab in enumerate(config["tabs"]):
|
265 |
+
if (len(str(answers[i])) >= 100):
|
266 |
answer.append(
|
267 |
f"{tab}\n{answers[i]}".replace("<p>", "").replace("</p>\n", "")
|
268 |
)
|
|
|
278 |
},
|
279 |
):
|
280 |
time.sleep(0.01)
|
281 |
+
yield [(question, elt)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
282 |
|
283 |
|
284 |
theme = gr.themes.Base(
|
|
|
287 |
font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"],
|
288 |
)
|
289 |
|
290 |
+
with open("./assets/style.css", "r") as f:
|
291 |
+
css = f.read()
|
292 |
|
293 |
+
with open("./assets/source_information.md", "r") as f:
|
294 |
+
source_information = f.read()
|
|
|
|
|
|
|
|
|
|
|
|
|
295 |
|
296 |
+
def start_agents():
|
297 |
+
gr.Info(message="The agents and Spinoza are loading...", duration=3)
|
298 |
|
|
|
299 |
|
300 |
+
def end_agents():
|
301 |
+
gr.Info(
|
302 |
+
message="The agents and Spinoza have finished answering your question",
|
303 |
+
duration=3,
|
304 |
+
)
|
305 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
306 |
|
307 |
+
def next_call():
|
308 |
+
print("Next call")
|
309 |
|
310 |
+
init_prompt = """
|
311 |
+
Hello, I am Spinoza, a conversational assistant designed to help you in your journalistic journey. I will answer your questions based **on the provided sources**.
|
|
|
|
|
|
|
|
|
312 |
|
313 |
+
⚠️ Limitations
|
314 |
+
*Please note that this chatbot is in an early stage phase, it is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
|
|
|
|
|
|
|
|
|
315 |
|
316 |
+
What do you want to learn ?
|
317 |
+
"""
|
318 |
|
319 |
with gr.Blocks(
|
320 |
+
title=f"🔍 Spinoza",
|
321 |
css=css,
|
|
|
322 |
theme=theme,
|
323 |
) as demo:
|
324 |
+
chatbots = {}
|
325 |
+
question = gr.State("")
|
326 |
+
docs_textbox = gr.State([""])
|
327 |
+
agent_questions = {elt: gr.State("") for elt in config["tabs"]}
|
328 |
+
component_sources = {elt: gr.State("") for elt in config["tabs"]}
|
329 |
+
text_sources = {elt: gr.State("") for elt in config["tabs"]}
|
330 |
+
tab_states = {elt: gr.State(elt) for elt in config["tabs"]}
|
331 |
+
chatbot_states = [
|
332 |
+
gr.State(name)
|
333 |
+
for name in ["science", "presse", "politique", "legal", "spinoza"]
|
334 |
+
]
|
335 |
|
336 |
+
with gr.Tab("Q&A", elem_id="main-component"):
|
337 |
+
with gr.Row(elem_id="chatbot-row"):
|
338 |
+
with gr.Column(scale=2, elem_id="center-panel"):
|
339 |
+
with gr.Group(elem_id="chatbot-group"):
|
340 |
+
with gr.Accordion(
|
341 |
+
"Science agent",
|
342 |
+
open=False,
|
343 |
+
elem_id="accordion-science",
|
344 |
+
elem_classes="accordion",
|
345 |
+
):
|
346 |
+
chatbots[list(config["tabs"].keys())[0]] = gr.Chatbot(
|
347 |
+
show_copy_button=True,
|
348 |
+
show_share_button=False,
|
349 |
+
show_label=False,
|
350 |
+
elem_id="chatbot-science",
|
351 |
+
layout="panel",
|
352 |
+
avatar_images=(
|
353 |
+
"./assets/logos/help.png",
|
354 |
+
None,
|
355 |
+
),
|
356 |
+
)
|
357 |
+
|
358 |
+
with gr.Accordion(
|
359 |
+
"Law agent",
|
360 |
+
open=False,
|
361 |
+
elem_id="accordion-legal",
|
362 |
+
elem_classes="accordion",
|
363 |
+
):
|
364 |
+
chatbots[list(config["tabs"].keys())[1]] = gr.Chatbot(
|
365 |
+
show_copy_button=True,
|
366 |
+
show_share_button=False,
|
367 |
+
show_label=False,
|
368 |
+
elem_id="chatbot-legal",
|
369 |
+
layout="panel",
|
370 |
+
avatar_images=(
|
371 |
+
"./assets/logos/help.png",
|
372 |
+
None,
|
373 |
+
),
|
374 |
+
)
|
375 |
+
|
376 |
+
with gr.Accordion(
|
377 |
+
"Politics agent",
|
378 |
+
open=False,
|
379 |
+
elem_id="accordion-politique",
|
380 |
+
elem_classes="accordion",
|
381 |
+
):
|
382 |
+
chatbots[list(config["tabs"].keys())[2]] = gr.Chatbot(
|
383 |
+
show_copy_button=True,
|
384 |
+
show_share_button=False,
|
385 |
+
show_label=False,
|
386 |
+
elem_id="chatbot-politique",
|
387 |
+
layout="panel",
|
388 |
+
avatar_images=(
|
389 |
+
"./assets/logos/help.png",
|
390 |
+
None, # "https://i.ibb.co/cN0czLp/celsius-logo.png",
|
391 |
+
),
|
392 |
+
)
|
393 |
+
|
394 |
+
with gr.Accordion(
|
395 |
+
"ADEME agent",
|
396 |
+
open=False,
|
397 |
+
elem_id="accordion-ademe",
|
398 |
+
elem_classes="accordion",
|
399 |
+
):
|
400 |
+
chatbots[list(config["tabs"].keys())[3]] = gr.Chatbot(
|
401 |
+
show_copy_button=True,
|
402 |
+
show_share_button=False,
|
403 |
+
show_label=False,
|
404 |
+
elem_id="chatbot-ademe",
|
405 |
+
layout="panel",
|
406 |
+
avatar_images=(
|
407 |
+
"./assets/logos/help.png",
|
408 |
+
None, # "https://i.ibb.co/cN0czLp/celsius-logo.png",
|
409 |
+
),
|
410 |
+
)
|
411 |
+
|
412 |
+
with gr.Accordion(
|
413 |
+
"Press agent",
|
414 |
+
open=False,
|
415 |
+
elem_id="accordion-presse",
|
416 |
+
elem_classes="accordion",
|
417 |
+
):
|
418 |
+
chatbots[list(config["tabs"].keys())[4]] = gr.Chatbot(
|
419 |
+
show_copy_button=True,
|
420 |
+
show_share_button=False,
|
421 |
+
show_label=False,
|
422 |
+
elem_id="chatbot-presse",
|
423 |
+
layout="panel",
|
424 |
+
avatar_images=(
|
425 |
+
"./assets/logos/help.png",
|
426 |
+
None, # "https://i.ibb.co/cN0czLp/celsius-logo.png",
|
427 |
+
),
|
428 |
+
)
|
429 |
+
|
430 |
+
with gr.Accordion(
|
431 |
+
"Spinoza",
|
432 |
+
open=True,
|
433 |
+
elem_id="accordion-spinoza",
|
434 |
+
elem_classes="accordion",
|
435 |
+
):
|
436 |
+
chatbots["spinoza"] = gr.Chatbot(
|
437 |
+
value=[(None, init_prompt)],
|
438 |
+
show_copy_button=True,
|
439 |
+
show_share_button=False,
|
440 |
+
show_label=False,
|
441 |
+
elem_id="chatbot-spinoza",
|
442 |
+
layout="panel",
|
443 |
+
avatar_images=(
|
444 |
+
"./assets/logos/help.png",
|
445 |
+
"./assets/logos/spinoza.png",
|
446 |
+
),
|
447 |
+
)
|
448 |
+
|
449 |
+
with gr.Row(elem_id="input-message"):
|
450 |
+
ask = gr.Textbox(
|
451 |
+
placeholder="Ask me anything here!",
|
452 |
+
show_label=False,
|
453 |
+
scale=7,
|
454 |
+
lines=1,
|
455 |
+
interactive=True,
|
456 |
+
elem_id="input-textbox",
|
457 |
+
)
|
458 |
+
|
459 |
+
with gr.Column(scale=1, variant="panel", elem_id="right-panel"):
|
460 |
+
with gr.TabItem("Sources", elem_id="tab-sources", id=0):
|
461 |
+
sources_textbox = gr.HTML(
|
462 |
+
show_label=False, elem_id="sources-textbox"
|
463 |
+
)
|
464 |
+
|
465 |
+
with gr.Tab("Source information", elem_id="source-component"):
|
466 |
with gr.Row():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
467 |
with gr.Column(scale=1):
|
468 |
+
gr.Markdown(source_information)
|
|
|
|
|
469 |
|
470 |
+
with gr.Tab("Contact", elem_id="contact-component"):
|
471 |
+
with gr.Row():
|
472 |
+
with gr.Column(scale=1):
|
473 |
+
gr.Markdown("For any issue contact **spinoza.[email protected]**.")
|
474 |
+
|
475 |
+
ask.submit(start_agents, inputs=[], outputs=[], js=accordion_trigger()).then(
|
476 |
+
fn=reformulate_questions,
|
477 |
+
inputs=[ask],
|
478 |
+
outputs=[agent_questions[tab] for tab in config["tabs"]],
|
479 |
+
).then(
|
480 |
+
fn=retrieve_sources,
|
481 |
+
inputs=[agent_questions[tab] for tab in config["tabs"]],
|
482 |
+
outputs=[sources_textbox] + [text_sources[tab] for tab in config["tabs"]],
|
483 |
+
).then(
|
484 |
+
fn=answer_questions,
|
485 |
+
inputs=[agent_questions[tab] for tab in config["tabs"]]
|
486 |
+
+ [text_sources[tab] for tab in config["tabs"]],
|
487 |
+
outputs=[chatbots[tab] for tab in config["tabs"]],
|
488 |
+
).then(
|
489 |
+
fn=next_call, inputs=[], outputs=[], js=accordion_trigger_end()
|
490 |
+
).then(
|
491 |
+
fn=next_call, inputs=[], outputs=[], js=accordion_trigger_spinoza()
|
492 |
+
).then(
|
493 |
+
fn=get_synthesis,
|
494 |
+
inputs=[agent_questions[list(config["tabs"].keys())[1]]]
|
495 |
+
+ [chatbots[tab] for tab in config["tabs"]],
|
496 |
+
outputs=[chatbots["spinoza"]],
|
497 |
+
).then(
|
498 |
+
fn=next_call, inputs=[], outputs=[], js=accordion_trigger_spinoza_end()
|
499 |
+
).then(
|
500 |
+
fn=end_agents, inputs=[], outputs=[]
|
501 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
502 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
503 |
|
504 |
if __name__ == "__main__":
|
505 |
demo.queue().launch(share=True, debug=True)
|
assets/logos/apig.png
ADDED
![]() |
assets/logos/help.png
ADDED
![]() |
assets/logos/question.png
ADDED
![]() |
assets/logos/rsf.png
ADDED
![]() |
assets/logos/spinoza.png
ADDED
![]() |
assets/source_information.md
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Here is a quick introduction to the sources of data accessed by the different agents.
|
2 |
+
|
3 |
+
1. **Science:** this tool is composed if IPCC and IPBES reports.
|
4 |
+
|
5 |
+
2. **Legal:** this tool is based on french law, it gathers 21 of the "codes" that were modified by "la loi climat" of 2021.
|
6 |
+
|
7 |
+
3. **Politic:** this tool is questioning the national french low carbon policy : SNBC.
|
8 |
+
|
9 |
+
4. **ADEME:** this tool is dedicated to ADEME data and we have selected various categories of reports :
|
10 |
+
|
11 |
+
- Guides made available to the general public
|
12 |
+
- Reports on experiences with new technologies
|
13 |
+
- Studies and research on local impacts institutional documents (analyses requested by France & activity reports)
|
14 |
+
- Sectoral transition plans for the industrial sectors with the highest emissions (glass, paper, cement, steel, aluminium, chemicals, sugar)
|
assets/style.css
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
-
:root {
|
2 |
--user-image: url('https://ih1.redbubble.net/image.4776899543.6215/st,small,507x507-pad,600x600,f8f8f8.jpg');
|
3 |
-
|
4 |
|
5 |
.warning-box {
|
6 |
background-color: #fff3cd;
|
@@ -11,21 +11,32 @@
|
|
11 |
color: #856404;
|
12 |
display: inline-block;
|
13 |
margin-bottom: 15px;
|
14 |
-
|
|
|
15 |
|
16 |
.tip-box {
|
17 |
background-color: #f0f9ff;
|
18 |
border: 1px solid #80d4fa;
|
19 |
border-radius: 4px;
|
20 |
-
margin-top:20px;
|
21 |
padding: 15px 20px;
|
22 |
font-size: 14px;
|
23 |
-
color: #006064;
|
24 |
display: inline-block;
|
25 |
margin-bottom: 15px;
|
26 |
width: auto;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
}
|
28 |
|
|
|
29 |
.tip-box-title {
|
30 |
font-weight: bold;
|
31 |
font-size: 14px;
|
@@ -37,14 +48,16 @@
|
|
37 |
margin-right: 5px;
|
38 |
}
|
39 |
|
40 |
-
.gr-box {
|
|
|
|
|
41 |
|
42 |
-
#hidden-message{
|
43 |
-
display:none;
|
44 |
}
|
45 |
|
46 |
-
.message{
|
47 |
-
font-size:14px !important;
|
48 |
}
|
49 |
|
50 |
|
@@ -60,7 +73,7 @@ a {
|
|
60 |
overflow: hidden;
|
61 |
display: flex;
|
62 |
flex-direction: column;
|
63 |
-
margin:20px;
|
64 |
}
|
65 |
|
66 |
.card-content {
|
@@ -71,8 +84,9 @@ a {
|
|
71 |
font-size: 14px !important;
|
72 |
font-weight: bold;
|
73 |
margin-bottom: 10px;
|
74 |
-
margin-top:0px !important;
|
75 |
-
color
|
|
|
76 |
}
|
77 |
|
78 |
.card-content p {
|
@@ -80,16 +94,6 @@ a {
|
|
80 |
margin-bottom: 0;
|
81 |
}
|
82 |
|
83 |
-
.card-content-column-1 {
|
84 |
-
float: left;
|
85 |
-
width: 20%;
|
86 |
-
}
|
87 |
-
|
88 |
-
.card-content-column-2 {
|
89 |
-
float: left;
|
90 |
-
width: 80%;
|
91 |
-
}
|
92 |
-
|
93 |
.card-footer {
|
94 |
background-color: #f4f4f4;
|
95 |
font-size: 10px;
|
@@ -109,55 +113,51 @@ a {
|
|
109 |
display: inline-flex;
|
110 |
align-items: center;
|
111 |
margin-left: auto;
|
112 |
-
text-decoration: none!important;
|
113 |
font-size: 14px;
|
114 |
}
|
115 |
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
color
|
|
|
|
|
120 |
}
|
121 |
|
122 |
-
.message.bot{
|
123 |
-
background-color:#f2f2f7 !important;
|
124 |
-
border:none;
|
125 |
}
|
126 |
|
127 |
-
.gallery-item > div:hover{
|
128 |
background-color:#7494b0 !important;
|
129 |
color:white!important;
|
130 |
}
|
131 |
-
|
132 |
.gallery-item:hover{
|
133 |
border:#7494b0 !important;
|
134 |
}
|
135 |
-
|
136 |
.gallery-item > div{
|
137 |
background-color:white !important;
|
138 |
color:#577b9b!important;
|
139 |
}
|
140 |
-
|
141 |
.label{
|
142 |
color:#577b9b!important;
|
143 |
-
}
|
144 |
|
145 |
-
|
146 |
-
background:none !important;
|
147 |
-
}
|
148 |
-
|
149 |
-
.paginate{
|
150 |
color:#577b9b!important;
|
151 |
-
}
|
152 |
|
153 |
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
|
|
158 |
|
159 |
/* Pseudo-element for the circularly cropped picture */
|
160 |
-
.message.bot::before {
|
161 |
content: '';
|
162 |
position: absolute;
|
163 |
top: -10px;
|
@@ -169,4 +169,269 @@ label > span{
|
|
169 |
background-position: center;
|
170 |
border-radius: 50%;
|
171 |
z-index: 10;
|
172 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/* :root {
|
2 |
--user-image: url('https://ih1.redbubble.net/image.4776899543.6215/st,small,507x507-pad,600x600,f8f8f8.jpg');
|
3 |
+
} */
|
4 |
|
5 |
.warning-box {
|
6 |
background-color: #fff3cd;
|
|
|
11 |
color: #856404;
|
12 |
display: inline-block;
|
13 |
margin-bottom: 15px;
|
14 |
+
}
|
15 |
+
|
16 |
|
17 |
.tip-box {
|
18 |
background-color: #f0f9ff;
|
19 |
border: 1px solid #80d4fa;
|
20 |
border-radius: 4px;
|
21 |
+
margin-top: 20px;
|
22 |
padding: 15px 20px;
|
23 |
font-size: 14px;
|
|
|
24 |
display: inline-block;
|
25 |
margin-bottom: 15px;
|
26 |
width: auto;
|
27 |
+
color: black !important;
|
28 |
+
}
|
29 |
+
|
30 |
+
body.dark .warning-box * {
|
31 |
+
color: black !important;
|
32 |
+
}
|
33 |
+
|
34 |
+
|
35 |
+
body.dark .tip-box * {
|
36 |
+
color: black !important;
|
37 |
}
|
38 |
|
39 |
+
|
40 |
.tip-box-title {
|
41 |
font-weight: bold;
|
42 |
font-size: 14px;
|
|
|
48 |
margin-right: 5px;
|
49 |
}
|
50 |
|
51 |
+
.gr-box {
|
52 |
+
border-color: #d6c37c
|
53 |
+
}
|
54 |
|
55 |
+
#hidden-message {
|
56 |
+
display: none;
|
57 |
}
|
58 |
|
59 |
+
.message {
|
60 |
+
font-size: 14px !important;
|
61 |
}
|
62 |
|
63 |
|
|
|
73 |
overflow: hidden;
|
74 |
display: flex;
|
75 |
flex-direction: column;
|
76 |
+
margin: 20px;
|
77 |
}
|
78 |
|
79 |
.card-content {
|
|
|
84 |
font-size: 14px !important;
|
85 |
font-weight: bold;
|
86 |
margin-bottom: 10px;
|
87 |
+
margin-top: 0px !important;
|
88 |
+
color: #dc2626 !important;
|
89 |
+
;
|
90 |
}
|
91 |
|
92 |
.card-content p {
|
|
|
94 |
margin-bottom: 0;
|
95 |
}
|
96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
.card-footer {
|
98 |
background-color: #f4f4f4;
|
99 |
font-size: 10px;
|
|
|
113 |
display: inline-flex;
|
114 |
align-items: center;
|
115 |
margin-left: auto;
|
116 |
+
text-decoration: none !important;
|
117 |
font-size: 14px;
|
118 |
}
|
119 |
|
120 |
+
|
121 |
+
|
122 |
+
.message.user {
|
123 |
+
/* background-color:#7494b0 !important; */
|
124 |
+
border: none;
|
125 |
+
/* color:white!important; */
|
126 |
}
|
127 |
|
128 |
+
.message.bot {
|
129 |
+
/* background-color:#f2f2f7 !important; */
|
130 |
+
border: none;
|
131 |
}
|
132 |
|
133 |
+
/* .gallery-item > div:hover{
|
134 |
background-color:#7494b0 !important;
|
135 |
color:white!important;
|
136 |
}
|
|
|
137 |
.gallery-item:hover{
|
138 |
border:#7494b0 !important;
|
139 |
}
|
|
|
140 |
.gallery-item > div{
|
141 |
background-color:white !important;
|
142 |
color:#577b9b!important;
|
143 |
}
|
|
|
144 |
.label{
|
145 |
color:#577b9b!important;
|
146 |
+
} */
|
147 |
|
148 |
+
/* .paginate{
|
|
|
|
|
|
|
|
|
149 |
color:#577b9b!important;
|
150 |
+
} */
|
151 |
|
152 |
|
153 |
+
|
154 |
+
/* span[data-testid="block-info"]{
|
155 |
+
background:none !important;
|
156 |
+
color:#577b9b;
|
157 |
+
} */
|
158 |
|
159 |
/* Pseudo-element for the circularly cropped picture */
|
160 |
+
/* .message.bot::before {
|
161 |
content: '';
|
162 |
position: absolute;
|
163 |
top: -10px;
|
|
|
169 |
background-position: center;
|
170 |
border-radius: 50%;
|
171 |
z-index: 10;
|
172 |
+
}
|
173 |
+
*/
|
174 |
+
|
175 |
+
label.selected {
|
176 |
+
background: none !important;
|
177 |
+
}
|
178 |
+
|
179 |
+
#submit-button {
|
180 |
+
padding: 0px !important;
|
181 |
+
}
|
182 |
+
|
183 |
+
|
184 |
+
@media screen and (min-width: 1024px) {
|
185 |
+
div#sources-textbox {
|
186 |
+
height: calc(100vh - 190px) !important;
|
187 |
+
overflow-y: auto !important;
|
188 |
+
scrollbar-width: none;
|
189 |
+
-ms-overflow-style: none;
|
190 |
+
}
|
191 |
+
|
192 |
+
div#sources-textbox::-webkit-scrollbar {
|
193 |
+
width: 0;
|
194 |
+
height: 0;
|
195 |
+
}
|
196 |
+
|
197 |
+
div.svelte-iyf88w {
|
198 |
+
scrollbar-width: none;
|
199 |
+
}
|
200 |
+
|
201 |
+
div.svelte-iyf88w::-webkit-scrollbar {
|
202 |
+
width: 0;
|
203 |
+
height: 0;
|
204 |
+
}
|
205 |
+
|
206 |
+
div#chatbot-row {
|
207 |
+
height: calc(100vh - 90px) !important;
|
208 |
+
}
|
209 |
+
|
210 |
+
.max-height {
|
211 |
+
height: calc(100vh - 90px) !important;
|
212 |
+
overflow-y: auto;
|
213 |
+
}
|
214 |
+
|
215 |
+
div.svelte-iyf88w {
|
216 |
+
height: calc(100vh - 160px) !important;
|
217 |
+
overflow-y: auto;
|
218 |
+
}
|
219 |
+
|
220 |
+
#accordion-spinoza {
|
221 |
+
height: calc(100vh - 160px) !important;
|
222 |
+
}
|
223 |
+
|
224 |
+
.form {
|
225 |
+
position: relative;
|
226 |
+
top: 10px;
|
227 |
+
}
|
228 |
+
|
229 |
+
#accordion-spinoza>open>span:nth-child(1) {
|
230 |
+
color: #000000;
|
231 |
+
font-size: large;
|
232 |
+
font-weight: bold;
|
233 |
+
}
|
234 |
+
|
235 |
+
#accordion-spinoza>button:nth-child(2)>span:nth-child(1) {
|
236 |
+
color: #000000;
|
237 |
+
font-size: large;
|
238 |
+
font-weight: bold;
|
239 |
+
}
|
240 |
+
|
241 |
+
#accordion-science>button:nth-child(2)>span:nth-child(1) {
|
242 |
+
color: #9ca1a5e7;
|
243 |
+
font-weight: bold;
|
244 |
+
}
|
245 |
+
|
246 |
+
#accordion-presse>button:nth-child(2)>span:nth-child(1) {
|
247 |
+
color: #9ca1a5e7;
|
248 |
+
font-weight: bold;
|
249 |
+
}
|
250 |
+
|
251 |
+
#accordion-legal>button:nth-child(2)>span:nth-child(1) {
|
252 |
+
color: #9ca1a5e7;
|
253 |
+
font-weight: bold;
|
254 |
+
}
|
255 |
+
|
256 |
+
#accordion-politique>button:nth-child(2)>span:nth-child(1) {
|
257 |
+
color: #9ca1a5e7;
|
258 |
+
font-weight: bold;
|
259 |
+
}
|
260 |
+
|
261 |
+
#accordion-ademe>button:nth-child(2)>span:nth-child(1) {
|
262 |
+
color: #9ca1a5e7;
|
263 |
+
font-weight: bold;
|
264 |
+
}
|
265 |
+
|
266 |
+
}
|
267 |
+
|
268 |
+
footer {
|
269 |
+
visibility: hidden;
|
270 |
+
display: none !important;
|
271 |
+
}
|
272 |
+
|
273 |
+
/* @media screen and (max-width: 767px) {
|
274 |
+
/* Your mobile-specific styles go here */
|
275 |
+
|
276 |
+
div#chatbot {
|
277 |
+
height: 500px !important;
|
278 |
+
}
|
279 |
+
|
280 |
+
#submit-button {
|
281 |
+
padding: 0px !important;
|
282 |
+
min-width: 80px;
|
283 |
+
}
|
284 |
+
|
285 |
+
/* This will hide all list items */
|
286 |
+
div.tab-nav button {
|
287 |
+
display: none !important;
|
288 |
+
}
|
289 |
+
|
290 |
+
/* This will show only the first list item */
|
291 |
+
div.tab-nav button:first-child {
|
292 |
+
display: block !important;
|
293 |
+
}
|
294 |
+
|
295 |
+
/* This will show only the first list item */
|
296 |
+
div.tab-nav button:nth-child(2) {
|
297 |
+
display: block !important;
|
298 |
+
}
|
299 |
+
|
300 |
+
/* This will show only the first list item */
|
301 |
+
div.tab-nav button:nth-child(3) {
|
302 |
+
display: block !important;
|
303 |
+
}
|
304 |
+
|
305 |
+
#right-panel button {
|
306 |
+
display: block !important;
|
307 |
+
}
|
308 |
+
|
309 |
+
/* ... add other mobile-specific styles ... */
|
310 |
+
|
311 |
+
*/ body.dark .card {
|
312 |
+
background-color: #374151;
|
313 |
+
}
|
314 |
+
|
315 |
+
body.dark .card-content h2 {
|
316 |
+
color: #f4dbd3 !important;
|
317 |
+
}
|
318 |
+
|
319 |
+
body.dark .card-footer {
|
320 |
+
background-color: #404652;
|
321 |
+
}
|
322 |
+
|
323 |
+
body.dark .card-footer span {
|
324 |
+
color: white !important;
|
325 |
+
}
|
326 |
+
|
327 |
+
|
328 |
+
.doc-ref {
|
329 |
+
color: #dc2626 !important;
|
330 |
+
margin-right: 1px;
|
331 |
+
}
|
332 |
+
|
333 |
+
.tabitem {
|
334 |
+
border: none !important;
|
335 |
+
}
|
336 |
+
|
337 |
+
.other-tabs>div {
|
338 |
+
padding-left: 40px;
|
339 |
+
padding-right: 40px;
|
340 |
+
padding-top: 10px;
|
341 |
+
}
|
342 |
+
|
343 |
+
.gallery-item>div {
|
344 |
+
white-space: normal !important;
|
345 |
+
/* Allow the text to wrap */
|
346 |
+
word-break: break-word !important;
|
347 |
+
/* Break words to prevent overflow */
|
348 |
+
overflow-wrap: break-word !important;
|
349 |
+
/* Break long words if necessary */
|
350 |
+
}
|
351 |
+
|
352 |
+
span.chatbot>p>img {
|
353 |
+
margin-top: 40px !important;
|
354 |
+
max-height: none !important;
|
355 |
+
max-width: 80% !important;
|
356 |
+
border-radius: 0px !important;
|
357 |
+
}
|
358 |
+
|
359 |
+
|
360 |
+
.chatbot-caption {
|
361 |
+
font-size: 11px;
|
362 |
+
font-style: italic;
|
363 |
+
color: #508094;
|
364 |
+
}
|
365 |
+
|
366 |
+
.ai-generated {
|
367 |
+
font-size: 11px !important;
|
368 |
+
font-style: italic;
|
369 |
+
color: #73b8d4 !important;
|
370 |
+
}
|
371 |
+
|
372 |
+
.card-image>.card-content {
|
373 |
+
background-color: #f1f7fa !important;
|
374 |
+
}
|
375 |
+
|
376 |
+
|
377 |
+
|
378 |
+
.tab-nav>button.selected {
|
379 |
+
color: #4b8ec3;
|
380 |
+
font-weight: bold;
|
381 |
+
border: none;
|
382 |
+
}
|
383 |
+
|
384 |
+
.tab-nav {
|
385 |
+
border: none !important;
|
386 |
+
}
|
387 |
+
|
388 |
+
#input-textbox>label>textarea {
|
389 |
+
border-radius: 40px;
|
390 |
+
padding-left: 30px;
|
391 |
+
resize: none;
|
392 |
+
}
|
393 |
+
|
394 |
+
#input-message>div {
|
395 |
+
border: none;
|
396 |
+
}
|
397 |
+
|
398 |
+
#dropdown-samples {
|
399 |
+
/*! border:none !important; */
|
400 |
+
/*! border-width:0px !important; */
|
401 |
+
background: none !important;
|
402 |
+
|
403 |
+
}
|
404 |
+
|
405 |
+
#dropdown-samples>.container>.wrap {
|
406 |
+
background-color: white;
|
407 |
+
}
|
408 |
+
|
409 |
+
/* HTML: <div class="loader"></div> */
|
410 |
+
/* .loader {
|
411 |
+
display: inline-flex;
|
412 |
+
gap: 5px;
|
413 |
+
animation: l2-0 1s infinite;
|
414 |
+
}
|
415 |
+
.loader:before,
|
416 |
+
.loader:after {
|
417 |
+
content: "";
|
418 |
+
width: 25px;
|
419 |
+
aspect-ratio: 1;
|
420 |
+
box-shadow: 0 0 0 3px inset #fff;
|
421 |
+
animation: l2-1 1s infinite;
|
422 |
+
}
|
423 |
+
.loader:after {
|
424 |
+
--s: -1;
|
425 |
+
}
|
426 |
+
@keyframes l2-0 {
|
427 |
+
0%,
|
428 |
+
50% {transform:rotate(0deg)}
|
429 |
+
80%,
|
430 |
+
100% {transform:rotate(180deg)}
|
431 |
+
}
|
432 |
+
@keyframes l2-1 {
|
433 |
+
0% {transform:translate(0)}
|
434 |
+
50%,
|
435 |
+
80% {transform:translate(calc(var(--s,1)*2.5px))}
|
436 |
+
100% {transform:translate(0)}
|
437 |
+
} */
|
assets/utils_javascript.py
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def accordion_trigger():
|
2 |
+
return """
|
3 |
+
function accordion_trigger() {
|
4 |
+
input_textbox = document.getElementById("input-textbox")
|
5 |
+
input_textbox.addEventListener('keyup', function (e) {
|
6 |
+
if (e.key === 'Enter' || e.keyCode === 13) {
|
7 |
+
var accordion_science = document.getElementById("accordion-science")
|
8 |
+
var accordion_presse = document.getElementById("accordion-presse")
|
9 |
+
var accordion_politique = document.getElementById("accordion-politique")
|
10 |
+
var accordion_legal = document.getElementById("accordion-legal")
|
11 |
+
var accordion_ademe= document.getElementById("accordion-ademe")
|
12 |
+
accordion_science.children[1].children[0].textContent = "Science agent - loading";
|
13 |
+
accordion_science.children[1].children[1].classList.add('loader');
|
14 |
+
accordion_presse.children[1].children[0].textContent = "Press agent - loading";
|
15 |
+
accordion_presse.children[1].children[0].classList.add('loader');
|
16 |
+
accordion_politique.children[1].children[0].textContent = "Politics agent - loading";
|
17 |
+
accordion_politique.children[1].children[0].classList.add('loader');
|
18 |
+
accordion_legal.children[1].children[0].textContent = "Law agent - loading";
|
19 |
+
accordion_legal.children[1].children[0].classList.add('loader');
|
20 |
+
accordion_ademe.children[1].children[0].textContent = "ADEME agent - loading";
|
21 |
+
accordion_ademe.children[1].children[0].classList.add('loader');
|
22 |
+
}
|
23 |
+
});
|
24 |
+
}
|
25 |
+
"""
|
26 |
+
|
27 |
+
|
28 |
+
def accordion_trigger_end():
|
29 |
+
return """
|
30 |
+
function accordion_trigger_end() {
|
31 |
+
var accordion_science = document.getElementById("accordion-science")
|
32 |
+
var accordion_presse = document.getElementById("accordion-presse")
|
33 |
+
var accordion_politique = document.getElementById("accordion-politique")
|
34 |
+
var accordion_legal = document.getElementById("accordion-legal")
|
35 |
+
var accordion_ademe = document.getElementById("accordion-ademe")
|
36 |
+
accordion_science.children[1].children[0].textContent = "Science agent - ready";
|
37 |
+
accordion_science.children[1].children[1].classList.remove('loader');
|
38 |
+
accordion_presse.children[1].children[0].textContent = "Press agent - ready";
|
39 |
+
accordion_presse.children[1].children[0].classList.remove('loader');
|
40 |
+
accordion_politique.children[1].children[0].textContent = "Politics agent - ready";
|
41 |
+
accordion_politique.children[1].children[0].classList.remove('loader');
|
42 |
+
accordion_legal.children[1].children[0].textContent = "Law agent - ready";
|
43 |
+
accordion_legal.children[1].children[0].classList.remove('loader');
|
44 |
+
accordion_ademe.children[1].children[0].textContent = "ADEME agent - ready";
|
45 |
+
accordion_ademe.children[1].children[0].classList.remove('loader');
|
46 |
+
|
47 |
+
}
|
48 |
+
"""
|
49 |
+
|
50 |
+
|
51 |
+
def accordion_trigger_spinoza():
|
52 |
+
return """
|
53 |
+
function accordion_trigger_spinoza() {
|
54 |
+
var accordion_spinoza = document.getElementById("accordion-spinoza")
|
55 |
+
accordion_spinoza.children[1].children[0].textContent = "Spinoza - generating";
|
56 |
+
accordion_spinoza.children[1].children[0].classList.add('loader');
|
57 |
+
}
|
58 |
+
"""
|
59 |
+
|
60 |
+
|
61 |
+
def accordion_trigger_spinoza_end():
|
62 |
+
return """
|
63 |
+
function accordion_trigger_spinoza_end() {
|
64 |
+
var accordion_spinoza = document.getElementById("accordion-spinoza")
|
65 |
+
accordion_spinoza.children[1].children[0].textContent = "Spinoza - ready";
|
66 |
+
accordion_spinoza.children[1].children[0].classList.remove('loader');
|
67 |
+
}
|
68 |
+
"""
|
69 |
+
|
70 |
+
|
71 |
+
def accordion_trigger_end_science():
|
72 |
+
return """
|
73 |
+
function accordion_trigger_end() {
|
74 |
+
var accordion_science = document.getElementById("accordion-science")
|
75 |
+
accordion_science.children[1].children[0].textContent = "Science agent - ready";
|
76 |
+
console.log("DONE - end science")
|
77 |
+
}
|
78 |
+
"""
|
79 |
+
|
80 |
+
|
81 |
+
def accordion_trigger_end_presse():
|
82 |
+
return """
|
83 |
+
function accordion_trigger_end() {
|
84 |
+
var accordion_presse = document.getElementById("accordion-presse")
|
85 |
+
accordion_presse.children[1].children[0].textContent = "Presse agent - ready";
|
86 |
+
console.log("DONE - end presse")
|
87 |
+
}
|
88 |
+
"""
|
89 |
+
|
90 |
+
|
91 |
+
def accordion_trigger_end_politique():
|
92 |
+
return """
|
93 |
+
function accordion_trigger_end() {
|
94 |
+
var accordion_politique = document.getElementById("accordion-politique")
|
95 |
+
accordion_politique.children[1].children[0].textContent = "Politique agent - ready";
|
96 |
+
console.log("DONE - end politique")
|
97 |
+
}
|
98 |
+
"""
|
99 |
+
|
100 |
+
|
101 |
+
def accordion_trigger_end_legal():
|
102 |
+
return """
|
103 |
+
function accordion_trigger_end() {
|
104 |
+
var accordion_legal = document.getElementById("accordion-legal")
|
105 |
+
accordion_legal.children[1].children[0].textContent = "Legal agent - ready";
|
106 |
+
console.log("DONE - end legal")
|
107 |
+
}
|
108 |
+
"""
|
poetry.lock
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
CHANGED
@@ -9,7 +9,7 @@ package-mode = true
|
|
9 |
[tool.poetry.dependencies]
|
10 |
python = "^3.10"
|
11 |
langchain = "^0.2.5"
|
12 |
-
gradio =
|
13 |
sentence-transformers = "2.2.2"
|
14 |
msal = "^1.28.1"
|
15 |
langchain-openai = "^0.1.8"
|
|
|
9 |
[tool.poetry.dependencies]
|
10 |
python = "^3.10"
|
11 |
langchain = "^0.2.5"
|
12 |
+
gradio = "4.37.2"
|
13 |
sentence-transformers = "2.2.2"
|
14 |
msal = "^1.28.1"
|
15 |
langchain-openai = "^0.1.8"
|
requirements.txt
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
spinoza_project/source/frontend/utils.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
from queue import SimpleQueue
|
2 |
from dotenv import load_dotenv
|
|
|
3 |
from langchain.callbacks.base import BaseCallbackHandler
|
4 |
|
5 |
job_done = object() # signals the processing is done
|
@@ -57,51 +58,66 @@ def get_source_link(metadata):
|
|
57 |
return metadata["file_url"] + f"#page={metadata['content_page_number'] + 1}"
|
58 |
|
59 |
|
60 |
-
def make_html_presse_source(source, i,
|
61 |
meta = source.metadata
|
62 |
return f"""
|
63 |
-
<div class="card" id="
|
64 |
<div class="card-content">
|
65 |
-
<
|
66 |
-
|
67 |
-
<p><strong>Auteur/s :</strong> {meta['file_source_type']}</p>
|
68 |
-
<p><strong>Date :</strong> {meta['file_date_publishing']}</p>
|
69 |
-
</div>
|
70 |
-
<div class="card-content-column-2">
|
71 |
-
<p><strong>Paragraphe id :</strong> {source.page_content}</p>
|
72 |
-
</div>
|
73 |
</div>
|
74 |
<div class="card-footer">
|
75 |
-
<span>[
|
76 |
<span>Relevance Score : {round(100*score,1)}%</span>
|
|
|
|
|
|
|
77 |
</div>
|
78 |
</div>
|
79 |
"""
|
80 |
|
81 |
|
82 |
-
def make_html_source(source, i,
|
83 |
meta = source.metadata
|
84 |
return f"""
|
85 |
-
<div class="card" id="
|
86 |
<div class="card-content">
|
87 |
-
<
|
88 |
-
|
89 |
-
<p><strong>Auteur/s :</strong> {meta['file_source_type']}</p>
|
90 |
-
<p><strong>Date :</strong> {meta['file_date_publishing']}</p>
|
91 |
-
</div>
|
92 |
-
<div class="card-content-column-2">
|
93 |
-
<p><strong>Paragraphe id :</strong> {source.page_content.replace(config["passage_preprompt"], "")}</p>
|
94 |
-
</div>
|
95 |
</div>
|
96 |
<div class="card-footer">
|
97 |
-
<span>[
|
98 |
-
<span><a href="{get_source_link(meta)}" target="_blank">Lien source</a></span>
|
99 |
-
<span>Page {meta['content_page_number'] + 1}</span>
|
100 |
<span>Relevance Score : {round(100*score,1)}%</span>
|
|
|
|
|
|
|
101 |
</div>
|
102 |
</div>
|
103 |
"""
|
104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
def clear_text_box(textbox):
|
107 |
return ""
|
|
|
1 |
from queue import SimpleQueue
|
2 |
from dotenv import load_dotenv
|
3 |
+
import re
|
4 |
from langchain.callbacks.base import BaseCallbackHandler
|
5 |
|
6 |
job_done = object() # signals the processing is done
|
|
|
58 |
return metadata["file_url"] + f"#page={metadata['content_page_number'] + 1}"
|
59 |
|
60 |
|
61 |
+
def make_html_presse_source(source, i, score, config):
|
62 |
meta = source.metadata
|
63 |
return f"""
|
64 |
+
<div class="card" id="doc{i}">
|
65 |
<div class="card-content">
|
66 |
+
<h2>Doc {i} - {meta['file_title']}</h2>
|
67 |
+
<p>{source.page_content}</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
</div>
|
69 |
<div class="card-footer">
|
70 |
+
<span>{meta['file_source_type']}</span>
|
71 |
<span>Relevance Score : {round(100*score,1)}%</span>
|
72 |
+
<a href="https://fr.wikipedia.org/wiki/Baruch_Spinoza" target="_blank">
|
73 |
+
<span role="img" aria-label="Open PDF">🔗</span>
|
74 |
+
</a>
|
75 |
</div>
|
76 |
</div>
|
77 |
"""
|
78 |
|
79 |
|
80 |
+
def make_html_source(source, i, score, config):
|
81 |
meta = source.metadata
|
82 |
return f"""
|
83 |
+
<div class="card" id="doc{i}">
|
84 |
<div class="card-content">
|
85 |
+
<h2>Doc {i} - {meta['file_title']} - Page {meta['content_page_number'] + 1}</h2>
|
86 |
+
<p>{source.page_content.replace(config["passage_preprompt"], "")}</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
</div>
|
88 |
<div class="card-footer">
|
89 |
+
<span>{meta['file_source_type']}</span>
|
|
|
|
|
90 |
<span>Relevance Score : {round(100*score,1)}%</span>
|
91 |
+
<a href="{get_source_link(meta)}" target="_blank">
|
92 |
+
<span role="img" aria-label="Open PDF">🔗</span>
|
93 |
+
</a>
|
94 |
</div>
|
95 |
</div>
|
96 |
"""
|
97 |
|
98 |
+
def parse_output_llm_with_sources(output):
|
99 |
+
content_parts = re.split(
|
100 |
+
r"[\[(]?(Doc\s?\d+(?:,\s?Doc\s?\d+)*|doc\s?\d+(?:,\s?doc\s?\d+)*|Doc\s\d+)[\])?]",
|
101 |
+
output,
|
102 |
+
)
|
103 |
+
parts = []
|
104 |
+
for part in content_parts:
|
105 |
+
if part.lower().startswith("doc"):
|
106 |
+
subparts = part.split(",")
|
107 |
+
subparts = [
|
108 |
+
subpart.lower().replace("doc", "").strip() for subpart in subparts
|
109 |
+
]
|
110 |
+
subparts = [
|
111 |
+
f"""<a href="#doc{subpart}" class="a-doc-ref" target="_self"><span class='doc-ref'><sup>{subpart}</sup></span></a>"""
|
112 |
+
for subpart in subparts
|
113 |
+
]
|
114 |
+
parts.append("".join(subparts))
|
115 |
+
else:
|
116 |
+
parts.append(part)
|
117 |
+
content_parts = "".join(parts)
|
118 |
+
|
119 |
+
return content_parts
|
120 |
+
|
121 |
|
122 |
def clear_text_box(textbox):
|
123 |
return ""
|