momenaca commited on
Commit
1ac0e91
·
1 Parent(s): 4a2a831

update major frontend and backend features

Browse files
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🐨
4
  colorFrom: green
5
  colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 4.36.1
8
  app_file: app.py
9
  pinned: false
10
  hf_oauth: true
 
4
  colorFrom: green
5
  colorTo: indigo
6
  sdk: gradio
7
+ sdk_version: 4.37.2
8
  app_file: app.py
9
  pinned: false
10
  hf_oauth: true
app.py CHANGED
@@ -1,15 +1,15 @@
 
1
  import time
2
  import yaml
3
- import logging
4
- import gradio as gr
5
  from langchain.prompts.chat import ChatPromptTemplate
6
- from huggingface_hub import hf_hub_download, whoami
7
  from spinoza_project.source.backend.llm_utils import get_llm, get_vectorstore
8
  from spinoza_project.source.backend.document_store import pickle_to_document_store
9
  from spinoza_project.source.backend.get_prompts import get_qa_prompts
10
  from spinoza_project.source.frontend.utils import (
11
  make_html_source,
12
  make_html_presse_source,
 
13
  init_env,
14
  )
15
  from spinoza_project.source.backend.prompt_utils import (
@@ -17,6 +17,13 @@ from spinoza_project.source.backend.prompt_utils import (
17
  SpecialTokens,
18
  )
19
 
 
 
 
 
 
 
 
20
  init_env()
21
 
22
  with open("./spinoza_project/config.yaml") as f:
@@ -55,18 +62,12 @@ for source, prompt in prompts.items():
55
  chat_qa_prompt, chat_reformulation_prompt = get_qa_prompts(config, prompt)
56
  chat_qa_prompts[source] = chat_qa_prompt
57
  chat_reformulation_prompts[source] = chat_reformulation_prompt
58
- # chat_summarize_memory_prompts[source] = chat_summarize_memory_prompt
59
 
60
 
61
  with open("./assets/style.css", "r") as f:
62
  css = f.read()
63
 
64
 
65
- def update_tabs(outil, visible_tabs):
66
- visible_tabs = outil
67
- return visible_tabs
68
-
69
-
70
  special_tokens = SpecialTokens(config)
71
 
72
  synthesis_template = """You are a factual journalist that summarize the secialized awnsers from thechnical sources.
@@ -117,80 +118,6 @@ def zip_longest_fill(*args, fillvalue=None):
117
  yield tuple(values)
118
 
119
 
120
- def build_data_dict(config):
121
- data_dict = {}
122
- for tab in config["tabs"]:
123
- data_dict[tab] = {
124
- "tab": {
125
- "init_value": tab,
126
- "component": None,
127
- "elem_id": "tab",
128
- },
129
- "description": {
130
- "init_value": config["tabs"][tab],
131
- "component": None,
132
- "elem_id": "desc",
133
- },
134
- "question": {
135
- "init_value": None,
136
- "component": None,
137
- "elem_id": "question",
138
- },
139
- "answer": {
140
- "init_value": None,
141
- "component": None,
142
- "elem_id": "answer",
143
- },
144
- "sources": {
145
- "init_value": None,
146
- "component": None,
147
- "elem_id": "src",
148
- },
149
- }
150
- return data_dict
151
-
152
-
153
- def init_gradio(data, config=config):
154
- for t in data:
155
- data[t]["tab"]["component"] = gr.Tab(
156
- data[t]["tab"]["init_value"], elem_id="tab"
157
- )
158
- with data[t]["tab"]["component"]:
159
- for fields in data[t]:
160
- if fields == "question":
161
- data[t][fields]["component"] = gr.Textbox(
162
- elem_id=data[t][fields]["elem_id"],
163
- show_label=False,
164
- interactive=True,
165
- placeholder="",
166
- )
167
- # elif fields == "answer":
168
- # data[t][fields]["component"] = gr.Textbox(
169
- # elem_id=data[t][fields]["elem_id"],
170
- # show_label=True,
171
- # interactive=True,
172
- # placeholder="",
173
- # show_copy_button=True
174
- # )
175
- elif fields != "tab":
176
- data[t][fields]["component"] = gr.Markdown(
177
- data[t][fields]["init_value"],
178
- elem_id=data[t][fields]["elem_id"],
179
- )
180
- # data[t][fields]["component"] = gr.Textbox(
181
- # value=data[t][fields]["init_value"],
182
- # elem_id=data[t][fields]["elem_id"],
183
- # show_label=True,
184
- # interactive=False,
185
- # show_copy_button=True,
186
- # )
187
- return data
188
-
189
-
190
- def add_warning():
191
- return "*Les éléments cochés ont commencé à être généré dans les onglets spécifiques, la synthèse ne sera disponible qu'après la mise à disposition de ces derniers.*"
192
-
193
-
194
  def format_question(question):
195
  return f"{question}" # ###
196
 
@@ -202,8 +129,8 @@ def parse_question(question):
202
  return x
203
 
204
 
205
- def reformulate(outils, question, tab, config=config):
206
- if tab in outils:
207
  return llm.stream(
208
  chat_reformulation_prompts[config["source_mapping"][tab]],
209
  {"question": parse_question(question)},
@@ -212,15 +139,15 @@ def reformulate(outils, question, tab, config=config):
212
  return iter([None] * 5)
213
 
214
 
215
- def reformulate_single_question(outils, question, tab, config=config):
216
- for elt in reformulate(outils, question, tab, config=config):
217
  time.sleep(0.02)
218
  yield elt
219
 
220
 
221
- def reformulate_questions(outils, question, config=config):
222
  for elt in zip_longest_fill(
223
- *[reformulate(outils, question, tab, config=config) for tab in config["tabs"]]
224
  ):
225
  time.sleep(0.02)
226
  yield elt
@@ -230,8 +157,8 @@ def add_question(question):
230
  return question
231
 
232
 
233
- def answer(question, source, outils, tab, config=config):
234
- if tab in outils:
235
  if len(source) < 10:
236
  return iter(["Aucune source trouvée, veuillez reformuler votre question"])
237
  else:
@@ -247,49 +174,35 @@ def answer(question, source, outils, tab, config=config):
247
  return iter([None] * 5)
248
 
249
 
250
- def answer_single_question(outils, source, question, tab, config=config):
251
- for elt in answer(question, source, outils, tab, config=config):
252
  time.sleep(0.02)
253
  yield elt
254
 
255
 
256
- def answer_questions(outils, *questions_sources, config=config):
257
-
258
  questions = [elt for elt in questions_sources[: len(questions_sources) // 2]]
259
  sources = [elt for elt in questions_sources[len(questions_sources) // 2 :]]
260
 
261
  for elt in zip_longest_fill(
262
  *[
263
- answer(question, source, outils, tab, config=config)
264
  for question, source, tab in zip(questions, sources, config["tabs"])
265
  ]
266
  ):
267
  time.sleep(0.02)
268
- yield elt
269
-
270
-
271
- def get_source_link(metadata):
272
- return metadata["file_url"] + f"#page={metadata['content_page_number'] + 1}"
273
-
274
-
275
- def get_button(i, tag):
276
- return f"""<button id="btn_{tag}_{i}" type="button" style="margin: 0; display: inline; align="right">[{i}]</button>"""
277
-
278
-
279
- def get_html_sources(buttons, cards):
280
- return f"""
281
- <p style="margin: 0; display: inline;"><strong><br>Sources utilisées : </strong></p>
282
- {buttons}
283
- {cards}
284
- """
285
 
286
 
287
- def get_sources(
288
- outils, question, tab, qdrants=qdrants, bdd_presse=bdd_presse, config=config
289
- ):
290
  k = config["num_document_retrieved"]
291
  min_similarity = config["min_similarity"]
292
- if tab in outils:
 
 
293
  sources = (
294
  (
295
  bdd_presse.similarity_search_with_relevance_scores(
@@ -307,66 +220,49 @@ def get_sources(
307
  )
308
  )
309
 
310
- sources = [(doc, score) for doc, score in sources if score >= min_similarity]
311
-
312
- buttons_ids = list(range(len(sources)))
313
- buttons = " ".join(
314
- [get_button(i, tab) for i, source in zip(buttons_ids, sources)]
315
- )
316
- formated = (
317
- "\n\n".join(
318
- [
319
- make_html_presse_source(source[0], i, tab, source[1], config)
320
- for i, source in zip(buttons_ids, sources)
321
- ]
322
- )
323
  if tab == "Presse"
324
- else "\n\n".join(
325
- [
326
- make_html_source(source[0], i, tab, source[1], config)
327
- for i, source in zip(buttons_ids, sources)
328
- ]
329
- )
330
  )
331
- formated = get_html_sources(buttons, formated) if sources else ""
332
- text = "\n\n".join(
333
  [
334
- f"Doc {str(i)} with source type {elt[0].metadata.get('file_source_type')}:\n"
335
- + elt[0].page_content
336
- for i, elt in enumerate(sources)
 
 
 
 
337
  ]
338
  )
339
- return str(formated), str(text) # formated_sources, text_sources
340
- else:
341
- return "", ""
 
342
 
343
 
344
  def retrieve_sources(
345
- outils, *questions, qdrants=qdrants, bdd_presse=bdd_presse, config=config
346
  ):
347
- results = [
348
- get_sources(outils, question, tab, qdrants, bdd_presse, config)
349
- for question, tab in zip(questions, config["tabs"])
350
- ]
351
- formated_sources = [source[0] for source in results]
352
- text_sources = [source[1] for source in results]
353
- return tuple(formated_sources + text_sources)
354
-
355
 
356
- def get_experts(outils, *answers, config=config):
357
- return "\n\n".join(
358
- [
359
- f"{tab}\n{answers[i]}"
360
- for i, tab in enumerate(config["tabs"])
361
- if (tab in outils)
362
- ]
363
- )
364
 
365
 
366
- def get_synthesis(outils, question, *answers, config=config):
367
  answer = []
368
  for i, tab in enumerate(config["tabs"]):
369
- if (tab in outils) & (len(str(answers[i])) >= 100):
370
  answer.append(
371
  f"{tab}\n{answers[i]}".replace("<p>", "").replace("</p>\n", "")
372
  )
@@ -382,67 +278,7 @@ def get_synthesis(outils, question, *answers, config=config):
382
  },
383
  ):
384
  time.sleep(0.01)
385
- yield elt
386
-
387
-
388
- def get_listener():
389
- return """
390
- function my_func_body() {
391
- const body = document.querySelector("body");
392
- body.addEventListener("click", e => {
393
- console.log(e)
394
- const sourceId = "btn_" + e.target.id.split("_")[1] + "_" + e.target.id.split("_")[2] + "_source"
395
- console.log(sourceId)
396
- if (document.getElementById(sourceId).style.display === "none") {
397
- document.getElementById(sourceId).style.display = "";
398
- } else {
399
- document.getElementById(sourceId).style.display = "none";
400
- }
401
- }
402
- )}
403
- """
404
-
405
-
406
- def get_source_template(buttons, divs_source):
407
- return """
408
- <div class="source">
409
- <p style="margin: 0; display: inline;"><strong><br>Sources utilisées :</strong></p>
410
- {buttons}
411
- {divs_source}
412
- </div>
413
- </div>
414
- """
415
-
416
-
417
- def activate_questions(outils, *textboxes, config=config):
418
- activated_textboxes = []
419
- for i, tab in enumerate(config["tabs"]):
420
- if tab in outils:
421
- activated_textboxes.append(
422
- gr.Textbox(
423
- show_label=False,
424
- interactive=True,
425
- placeholder="Sélectionnez cet outil et posez une question sur l'onglet de synthèse",
426
- )
427
- )
428
-
429
- else:
430
- activated_textboxes.append(
431
- gr.Textbox(
432
- show_label=False,
433
- interactive=False,
434
- placeholder="Sélectionnez cet outil et posez une question sur l'onglet de synthèse",
435
- )
436
- )
437
- return activated_textboxes
438
-
439
-
440
- def empty():
441
- return ""
442
-
443
-
444
- def empty_none():
445
- return None
446
 
447
 
448
  theme = gr.themes.Base(
@@ -451,151 +287,219 @@ theme = gr.themes.Base(
451
  font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"],
452
  )
453
 
 
 
454
 
455
- init_prompt = """
456
- Hello, I am Spinoza Q&A, a conversational assistant designed to help journalists by providing secialized answers from technical sources. I will answer your questions based **on the official definition of each ESRS as well as guidelines**.
457
-
458
- ⚠️ Limitations
459
- *Please note that this chatbot is in an early stage phase, it is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
460
-
461
- What do you want to learn ?
462
- """
463
 
464
- logo_rsf = config["logo_rsf"]
465
- logo_ap = config["logo_ap"]
466
 
467
- data = build_data_dict(config)
468
 
 
 
 
 
 
469
 
470
- def update_visible(oauth_token: gr.OAuthToken | None):
471
- if oauth_token is None:
472
- return {
473
- bloc_1: gr.update(visible=True),
474
- bloc_2: gr.update(visible=False),
475
- bloc_3: gr.update(visible=False),
476
- }
477
 
478
- org_names = [org["name"] for org in whoami(oauth_token.token)["orgs"]]
 
479
 
480
- if "SpinozaProject" in org_names: # logged in group
481
- return {
482
- bloc_1: gr.update(visible=False),
483
- bloc_2: gr.update(visible=True),
484
- bloc_3: gr.update(visible=False),
485
- }
486
 
487
- else: # logged but not in group
488
- return {
489
- bloc_1: gr.update(visible=False),
490
- bloc_2: gr.update(visible=False),
491
- bloc_3: gr.update(visible=True),
492
- }
493
 
 
 
494
 
495
  with gr.Blocks(
496
- title=f"🔍{config['demo_name']}",
497
  css=css,
498
- js=get_listener(),
499
  theme=theme,
500
  ) as demo:
501
- with gr.Column(visible=True):
502
- gr.HTML(
503
- f"""<div class="row_logo">
504
- <img src={logo_rsf} alt="logo RSF" style="float:left; width:120px; height:70px">
505
- <img src={logo_ap} alt="logo AP" style="width:120px; height:70px">
506
- </div>"""
507
- )
 
 
 
 
508
 
509
- text_sources = {elt: gr.State("") for elt in config["tabs"]}
510
- tab_states = {elt: gr.State(elt) for elt in config["tabs"]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
  with gr.Row():
512
- with gr.Column(scale=3):
513
- outils = gr.CheckboxGroup(
514
- choices=list(config["tabs"].keys()),
515
- value=list(config["tabs"].keys()),
516
- type="value",
517
- label="Choisir les bases de données à interroger",
518
- )
519
  with gr.Column(scale=1):
520
- submit_btn = gr.Button(
521
- "Relancer la Synthèse", variant="primary", elem_id="synthese_btn"
522
- )
523
 
524
- # Synthesis tab
525
- synthesis_tab = gr.Tab("Synthesis", elem_id="tab")
526
- with synthesis_tab:
527
- question = gr.Textbox(
528
- show_label=True,
529
- label="Posez une question à Spinoza",
530
- placeholder="Quelle est votre question ?",
531
- )
532
- md_question = gr.Markdown(None, visible=False)
533
- warning = gr.Markdown(None, elem_id="warn")
534
- synthesis = gr.Markdown(None, elem_id="synthesis")
535
-
536
- data = init_gradio(data)
537
- (
538
- question.submit(add_question, [question], [md_question])
539
- .then(add_warning, [], [warning])
540
- .then(empty, [], [synthesis])
541
- .then(
542
- reformulate_questions,
543
- [outils, md_question],
544
- [data[tab]["question"]["component"] for tab in config["tabs"]],
545
- )
546
- .then(
547
- retrieve_sources,
548
- [outils]
549
- + [data[tab]["question"]["component"] for tab in config["tabs"]],
550
- [data[tab]["sources"]["component"] for tab in config["tabs"]]
551
- + [text_sources[tab] for tab in config["tabs"]],
552
- )
553
- .then(
554
- answer_questions,
555
- [outils]
556
- + [data[tab]["question"]["component"] for tab in config["tabs"]]
557
- + [text_sources[tab] for tab in config["tabs"]],
558
- [data[tab]["answer"]["component"] for tab in config["tabs"]],
559
- )
560
- .then(
561
- get_synthesis,
562
- [outils, md_question]
563
- + [data[tab]["answer"]["component"] for tab in config["tabs"]],
564
- [synthesis],
565
- )
566
- )
567
-
568
- for tab in config["tabs"]:
569
- (
570
- data[tab]["question"]["component"]
571
- .submit(empty, [], [data[tab]["sources"]["component"]])
572
- .then(empty, [], [text_sources[tab]])
573
- .then(empty, [], [data[tab]["answer"]["component"]])
574
- .then(
575
- get_sources,
576
- [outils, data[tab]["question"]["component"], tab_states[tab]],
577
- [data[tab]["sources"]["component"], text_sources[tab]],
578
- )
579
- .then(
580
- answer_single_question,
581
- [
582
- outils,
583
- text_sources[tab],
584
- data[tab]["question"]["component"],
585
- tab_states[tab],
586
- ],
587
- [data[tab]["answer"]["component"]],
588
- )
589
- )
590
 
591
- (
592
- submit_btn.click(empty, [], [synthesis]).then(
593
- get_synthesis,
594
- [outils, md_question]
595
- + [data[tab]["answer"]["component"] for tab in config["tabs"]],
596
- [synthesis],
597
- )
598
- )
599
 
600
  if __name__ == "__main__":
601
  demo.queue().launch(share=True, debug=True)
 
1
+ import gradio as gr
2
  import time
3
  import yaml
 
 
4
  from langchain.prompts.chat import ChatPromptTemplate
5
+ from huggingface_hub import hf_hub_download
6
  from spinoza_project.source.backend.llm_utils import get_llm, get_vectorstore
7
  from spinoza_project.source.backend.document_store import pickle_to_document_store
8
  from spinoza_project.source.backend.get_prompts import get_qa_prompts
9
  from spinoza_project.source.frontend.utils import (
10
  make_html_source,
11
  make_html_presse_source,
12
+ parse_output_llm_with_sources,
13
  init_env,
14
  )
15
  from spinoza_project.source.backend.prompt_utils import (
 
17
  SpecialTokens,
18
  )
19
 
20
+ from assets.utils_javascript import (
21
+ accordion_trigger,
22
+ accordion_trigger_end,
23
+ accordion_trigger_spinoza,
24
+ accordion_trigger_spinoza_end,
25
+ )
26
+
27
  init_env()
28
 
29
  with open("./spinoza_project/config.yaml") as f:
 
62
  chat_qa_prompt, chat_reformulation_prompt = get_qa_prompts(config, prompt)
63
  chat_qa_prompts[source] = chat_qa_prompt
64
  chat_reformulation_prompts[source] = chat_reformulation_prompt
 
65
 
66
 
67
  with open("./assets/style.css", "r") as f:
68
  css = f.read()
69
 
70
 
 
 
 
 
 
71
  special_tokens = SpecialTokens(config)
72
 
73
  synthesis_template = """You are a factual journalist that summarize the secialized awnsers from thechnical sources.
 
118
  yield tuple(values)
119
 
120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  def format_question(question):
122
  return f"{question}" # ###
123
 
 
129
  return x
130
 
131
 
132
+ def reformulate(question, tab, config=config):
133
+ if tab in list(config["tabs"].keys()):
134
  return llm.stream(
135
  chat_reformulation_prompts[config["source_mapping"][tab]],
136
  {"question": parse_question(question)},
 
139
  return iter([None] * 5)
140
 
141
 
142
+ def reformulate_single_question(question, tab, config=config):
143
+ for elt in reformulate(question, tab, config=config):
144
  time.sleep(0.02)
145
  yield elt
146
 
147
 
148
+ def reformulate_questions(question, config=config):
149
  for elt in zip_longest_fill(
150
+ *[reformulate(question, tab, config=config) for tab in config["tabs"]]
151
  ):
152
  time.sleep(0.02)
153
  yield elt
 
157
  return question
158
 
159
 
160
+ def answer(question, source, tab, config=config):
161
+ if tab in list(config["tabs"].keys()):
162
  if len(source) < 10:
163
  return iter(["Aucune source trouvée, veuillez reformuler votre question"])
164
  else:
 
174
  return iter([None] * 5)
175
 
176
 
177
+ def answer_single_question(source, question, tab, config=config):
178
+ for elt in answer(question, source, tab, config=config):
179
  time.sleep(0.02)
180
  yield elt
181
 
182
 
183
+ def answer_questions(*questions_sources, config=config):
 
184
  questions = [elt for elt in questions_sources[: len(questions_sources) // 2]]
185
  sources = [elt for elt in questions_sources[len(questions_sources) // 2 :]]
186
 
187
  for elt in zip_longest_fill(
188
  *[
189
+ answer(question, source, tab, config=config)
190
  for question, source, tab in zip(questions, sources, config["tabs"])
191
  ]
192
  ):
193
  time.sleep(0.02)
194
+ yield [
195
+ [(question, parse_output_llm_with_sources(ans))]
196
+ for question, ans in zip(questions, elt)
197
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
198
 
199
 
200
+ def get_sources(questions, qdrants=qdrants, bdd_presse=bdd_presse, config=config):
 
 
201
  k = config["num_document_retrieved"]
202
  min_similarity = config["min_similarity"]
203
+ formated = []
204
+ text = []
205
+ for i, (question, tab) in enumerate(zip(questions, list(config["tabs"].keys()))):
206
  sources = (
207
  (
208
  bdd_presse.similarity_search_with_relevance_scores(
 
220
  )
221
  )
222
 
223
+ sources = [
224
+ (doc, score) for doc, score in sources
225
+ ] # if score >= min_similarity]
226
+ formated.extend(
227
+ [
228
+ make_html_presse_source(source[0], j, source[1], config)
229
+ for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
230
+ ]
 
 
 
 
 
231
  if tab == "Presse"
232
+ else [
233
+ make_html_source(source[0], j, source[1], config)
234
+ for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
235
+ ]
 
 
236
  )
237
+ text.extend(
 
238
  [
239
+ "\n\n".join(
240
+ [
241
+ f"Doc {str(j)} with source type {source[0].metadata.get('file_source_type')}:\n"
242
+ + source[0].page_content
243
+ for j, source in zip(range(k * i + 1, k * (i + 1) + 1), sources)
244
+ ]
245
+ )
246
  ]
247
  )
248
+
249
+ formated = "".join(formated)
250
+
251
+ return formated, text
252
 
253
 
254
  def retrieve_sources(
255
+ *questions, qdrants=qdrants, bdd_presse=bdd_presse, config=config
256
  ):
257
+ formated_sources, text_sources = get_sources(questions, qdrants, bdd_presse, config)
 
 
 
 
 
 
 
258
 
259
+ return (formated_sources, *text_sources)
 
 
 
 
 
 
 
260
 
261
 
262
+ def get_synthesis(question, *answers, config=config):
263
  answer = []
264
  for i, tab in enumerate(config["tabs"]):
265
+ if (len(str(answers[i])) >= 100):
266
  answer.append(
267
  f"{tab}\n{answers[i]}".replace("<p>", "").replace("</p>\n", "")
268
  )
 
278
  },
279
  ):
280
  time.sleep(0.01)
281
+ yield [(question, elt)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
 
283
 
284
  theme = gr.themes.Base(
 
287
  font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"],
288
  )
289
 
290
+ with open("./assets/style.css", "r") as f:
291
+ css = f.read()
292
 
293
+ with open("./assets/source_information.md", "r") as f:
294
+ source_information = f.read()
 
 
 
 
 
 
295
 
296
+ def start_agents():
297
+ gr.Info(message="The agents and Spinoza are loading...", duration=3)
298
 
 
299
 
300
+ def end_agents():
301
+ gr.Info(
302
+ message="The agents and Spinoza have finished answering your question",
303
+ duration=3,
304
+ )
305
 
 
 
 
 
 
 
 
306
 
307
+ def next_call():
308
+ print("Next call")
309
 
310
+ init_prompt = """
311
+ Hello, I am Spinoza, a conversational assistant designed to help you in your journalistic journey. I will answer your questions based **on the provided sources**.
 
 
 
 
312
 
313
+ ⚠️ Limitations
314
+ *Please note that this chatbot is in an early stage phase, it is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
 
 
 
 
315
 
316
+ What do you want to learn ?
317
+ """
318
 
319
  with gr.Blocks(
320
+ title=f"🔍 Spinoza",
321
  css=css,
 
322
  theme=theme,
323
  ) as demo:
324
+ chatbots = {}
325
+ question = gr.State("")
326
+ docs_textbox = gr.State([""])
327
+ agent_questions = {elt: gr.State("") for elt in config["tabs"]}
328
+ component_sources = {elt: gr.State("") for elt in config["tabs"]}
329
+ text_sources = {elt: gr.State("") for elt in config["tabs"]}
330
+ tab_states = {elt: gr.State(elt) for elt in config["tabs"]}
331
+ chatbot_states = [
332
+ gr.State(name)
333
+ for name in ["science", "presse", "politique", "legal", "spinoza"]
334
+ ]
335
 
336
+ with gr.Tab("Q&A", elem_id="main-component"):
337
+ with gr.Row(elem_id="chatbot-row"):
338
+ with gr.Column(scale=2, elem_id="center-panel"):
339
+ with gr.Group(elem_id="chatbot-group"):
340
+ with gr.Accordion(
341
+ "Science agent",
342
+ open=False,
343
+ elem_id="accordion-science",
344
+ elem_classes="accordion",
345
+ ):
346
+ chatbots[list(config["tabs"].keys())[0]] = gr.Chatbot(
347
+ show_copy_button=True,
348
+ show_share_button=False,
349
+ show_label=False,
350
+ elem_id="chatbot-science",
351
+ layout="panel",
352
+ avatar_images=(
353
+ "./assets/logos/help.png",
354
+ None,
355
+ ),
356
+ )
357
+
358
+ with gr.Accordion(
359
+ "Law agent",
360
+ open=False,
361
+ elem_id="accordion-legal",
362
+ elem_classes="accordion",
363
+ ):
364
+ chatbots[list(config["tabs"].keys())[1]] = gr.Chatbot(
365
+ show_copy_button=True,
366
+ show_share_button=False,
367
+ show_label=False,
368
+ elem_id="chatbot-legal",
369
+ layout="panel",
370
+ avatar_images=(
371
+ "./assets/logos/help.png",
372
+ None,
373
+ ),
374
+ )
375
+
376
+ with gr.Accordion(
377
+ "Politics agent",
378
+ open=False,
379
+ elem_id="accordion-politique",
380
+ elem_classes="accordion",
381
+ ):
382
+ chatbots[list(config["tabs"].keys())[2]] = gr.Chatbot(
383
+ show_copy_button=True,
384
+ show_share_button=False,
385
+ show_label=False,
386
+ elem_id="chatbot-politique",
387
+ layout="panel",
388
+ avatar_images=(
389
+ "./assets/logos/help.png",
390
+ None, # "https://i.ibb.co/cN0czLp/celsius-logo.png",
391
+ ),
392
+ )
393
+
394
+ with gr.Accordion(
395
+ "ADEME agent",
396
+ open=False,
397
+ elem_id="accordion-ademe",
398
+ elem_classes="accordion",
399
+ ):
400
+ chatbots[list(config["tabs"].keys())[3]] = gr.Chatbot(
401
+ show_copy_button=True,
402
+ show_share_button=False,
403
+ show_label=False,
404
+ elem_id="chatbot-ademe",
405
+ layout="panel",
406
+ avatar_images=(
407
+ "./assets/logos/help.png",
408
+ None, # "https://i.ibb.co/cN0czLp/celsius-logo.png",
409
+ ),
410
+ )
411
+
412
+ with gr.Accordion(
413
+ "Press agent",
414
+ open=False,
415
+ elem_id="accordion-presse",
416
+ elem_classes="accordion",
417
+ ):
418
+ chatbots[list(config["tabs"].keys())[4]] = gr.Chatbot(
419
+ show_copy_button=True,
420
+ show_share_button=False,
421
+ show_label=False,
422
+ elem_id="chatbot-presse",
423
+ layout="panel",
424
+ avatar_images=(
425
+ "./assets/logos/help.png",
426
+ None, # "https://i.ibb.co/cN0czLp/celsius-logo.png",
427
+ ),
428
+ )
429
+
430
+ with gr.Accordion(
431
+ "Spinoza",
432
+ open=True,
433
+ elem_id="accordion-spinoza",
434
+ elem_classes="accordion",
435
+ ):
436
+ chatbots["spinoza"] = gr.Chatbot(
437
+ value=[(None, init_prompt)],
438
+ show_copy_button=True,
439
+ show_share_button=False,
440
+ show_label=False,
441
+ elem_id="chatbot-spinoza",
442
+ layout="panel",
443
+ avatar_images=(
444
+ "./assets/logos/help.png",
445
+ "./assets/logos/spinoza.png",
446
+ ),
447
+ )
448
+
449
+ with gr.Row(elem_id="input-message"):
450
+ ask = gr.Textbox(
451
+ placeholder="Ask me anything here!",
452
+ show_label=False,
453
+ scale=7,
454
+ lines=1,
455
+ interactive=True,
456
+ elem_id="input-textbox",
457
+ )
458
+
459
+ with gr.Column(scale=1, variant="panel", elem_id="right-panel"):
460
+ with gr.TabItem("Sources", elem_id="tab-sources", id=0):
461
+ sources_textbox = gr.HTML(
462
+ show_label=False, elem_id="sources-textbox"
463
+ )
464
+
465
+ with gr.Tab("Source information", elem_id="source-component"):
466
  with gr.Row():
 
 
 
 
 
 
 
467
  with gr.Column(scale=1):
468
+ gr.Markdown(source_information)
 
 
469
 
470
+ with gr.Tab("Contact", elem_id="contact-component"):
471
+ with gr.Row():
472
+ with gr.Column(scale=1):
473
+ gr.Markdown("For any issue contact **spinoza.[email protected]**.")
474
+
475
+ ask.submit(start_agents, inputs=[], outputs=[], js=accordion_trigger()).then(
476
+ fn=reformulate_questions,
477
+ inputs=[ask],
478
+ outputs=[agent_questions[tab] for tab in config["tabs"]],
479
+ ).then(
480
+ fn=retrieve_sources,
481
+ inputs=[agent_questions[tab] for tab in config["tabs"]],
482
+ outputs=[sources_textbox] + [text_sources[tab] for tab in config["tabs"]],
483
+ ).then(
484
+ fn=answer_questions,
485
+ inputs=[agent_questions[tab] for tab in config["tabs"]]
486
+ + [text_sources[tab] for tab in config["tabs"]],
487
+ outputs=[chatbots[tab] for tab in config["tabs"]],
488
+ ).then(
489
+ fn=next_call, inputs=[], outputs=[], js=accordion_trigger_end()
490
+ ).then(
491
+ fn=next_call, inputs=[], outputs=[], js=accordion_trigger_spinoza()
492
+ ).then(
493
+ fn=get_synthesis,
494
+ inputs=[agent_questions[list(config["tabs"].keys())[1]]]
495
+ + [chatbots[tab] for tab in config["tabs"]],
496
+ outputs=[chatbots["spinoza"]],
497
+ ).then(
498
+ fn=next_call, inputs=[], outputs=[], js=accordion_trigger_spinoza_end()
499
+ ).then(
500
+ fn=end_agents, inputs=[], outputs=[]
501
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
502
 
 
 
 
 
 
 
 
 
503
 
504
  if __name__ == "__main__":
505
  demo.queue().launch(share=True, debug=True)
assets/logos/apig.png ADDED
assets/logos/help.png ADDED
assets/logos/question.png ADDED
assets/logos/rsf.png ADDED
assets/logos/spinoza.png ADDED
assets/source_information.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Here is a quick introduction to the sources of data accessed by the different agents.
2
+
3
+ 1. **Science:** this tool is composed if IPCC and IPBES reports.
4
+
5
+ 2. **Legal:** this tool is based on french law, it gathers 21 of the "codes" that were modified by "la loi climat" of 2021.
6
+
7
+ 3. **Politic:** this tool is questioning the national french low carbon policy : SNBC.
8
+
9
+ 4. **ADEME:** this tool is dedicated to ADEME data and we have selected various categories of reports :
10
+
11
+ - Guides made available to the general public
12
+ - Reports on experiences with new technologies
13
+ - Studies and research on local impacts institutional documents (analyses requested by France & activity reports)
14
+ - Sectoral transition plans for the industrial sectors with the highest emissions (glass, paper, cement, steel, aluminium, chemicals, sugar)
assets/style.css CHANGED
@@ -1,6 +1,6 @@
1
- :root {
2
  --user-image: url('https://ih1.redbubble.net/image.4776899543.6215/st,small,507x507-pad,600x600,f8f8f8.jpg');
3
- }
4
 
5
  .warning-box {
6
  background-color: #fff3cd;
@@ -11,21 +11,32 @@
11
  color: #856404;
12
  display: inline-block;
13
  margin-bottom: 15px;
14
- }
 
15
 
16
  .tip-box {
17
  background-color: #f0f9ff;
18
  border: 1px solid #80d4fa;
19
  border-radius: 4px;
20
- margin-top:20px;
21
  padding: 15px 20px;
22
  font-size: 14px;
23
- color: #006064;
24
  display: inline-block;
25
  margin-bottom: 15px;
26
  width: auto;
 
 
 
 
 
 
 
 
 
 
27
  }
28
 
 
29
  .tip-box-title {
30
  font-weight: bold;
31
  font-size: 14px;
@@ -37,14 +48,16 @@
37
  margin-right: 5px;
38
  }
39
 
40
- .gr-box {border-color: #d6c37c}
 
 
41
 
42
- #hidden-message{
43
- display:none;
44
  }
45
 
46
- .message{
47
- font-size:14px !important;
48
  }
49
 
50
 
@@ -60,7 +73,7 @@ a {
60
  overflow: hidden;
61
  display: flex;
62
  flex-direction: column;
63
- margin:20px;
64
  }
65
 
66
  .card-content {
@@ -71,8 +84,9 @@ a {
71
  font-size: 14px !important;
72
  font-weight: bold;
73
  margin-bottom: 10px;
74
- margin-top:0px !important;
75
- color:#577b9b!important;;
 
76
  }
77
 
78
  .card-content p {
@@ -80,16 +94,6 @@ a {
80
  margin-bottom: 0;
81
  }
82
 
83
- .card-content-column-1 {
84
- float: left;
85
- width: 20%;
86
- }
87
-
88
- .card-content-column-2 {
89
- float: left;
90
- width: 80%;
91
- }
92
-
93
  .card-footer {
94
  background-color: #f4f4f4;
95
  font-size: 10px;
@@ -109,55 +113,51 @@ a {
109
  display: inline-flex;
110
  align-items: center;
111
  margin-left: auto;
112
- text-decoration: none!important;
113
  font-size: 14px;
114
  }
115
 
116
- .message.user{
117
- background-color:#7494b0 !important;
118
- border:none;
119
- color:white!important;
 
 
120
  }
121
 
122
- .message.bot{
123
- background-color:#f2f2f7 !important;
124
- border:none;
125
  }
126
 
127
- .gallery-item > div:hover{
128
  background-color:#7494b0 !important;
129
  color:white!important;
130
  }
131
-
132
  .gallery-item:hover{
133
  border:#7494b0 !important;
134
  }
135
-
136
  .gallery-item > div{
137
  background-color:white !important;
138
  color:#577b9b!important;
139
  }
140
-
141
  .label{
142
  color:#577b9b!important;
143
- }
144
 
145
- label.selected{
146
- background:none !important;
147
- }
148
-
149
- .paginate{
150
  color:#577b9b!important;
151
- }
152
 
153
 
154
- label > span{
155
- background-color:white !important;
156
- color:#577b9b!important;
157
- }
 
158
 
159
  /* Pseudo-element for the circularly cropped picture */
160
- .message.bot::before {
161
  content: '';
162
  position: absolute;
163
  top: -10px;
@@ -169,4 +169,269 @@ label > span{
169
  background-position: center;
170
  border-radius: 50%;
171
  z-index: 10;
172
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* :root {
2
  --user-image: url('https://ih1.redbubble.net/image.4776899543.6215/st,small,507x507-pad,600x600,f8f8f8.jpg');
3
+ } */
4
 
5
  .warning-box {
6
  background-color: #fff3cd;
 
11
  color: #856404;
12
  display: inline-block;
13
  margin-bottom: 15px;
14
+ }
15
+
16
 
17
  .tip-box {
18
  background-color: #f0f9ff;
19
  border: 1px solid #80d4fa;
20
  border-radius: 4px;
21
+ margin-top: 20px;
22
  padding: 15px 20px;
23
  font-size: 14px;
 
24
  display: inline-block;
25
  margin-bottom: 15px;
26
  width: auto;
27
+ color: black !important;
28
+ }
29
+
30
+ body.dark .warning-box * {
31
+ color: black !important;
32
+ }
33
+
34
+
35
+ body.dark .tip-box * {
36
+ color: black !important;
37
  }
38
 
39
+
40
  .tip-box-title {
41
  font-weight: bold;
42
  font-size: 14px;
 
48
  margin-right: 5px;
49
  }
50
 
51
+ .gr-box {
52
+ border-color: #d6c37c
53
+ }
54
 
55
+ #hidden-message {
56
+ display: none;
57
  }
58
 
59
+ .message {
60
+ font-size: 14px !important;
61
  }
62
 
63
 
 
73
  overflow: hidden;
74
  display: flex;
75
  flex-direction: column;
76
+ margin: 20px;
77
  }
78
 
79
  .card-content {
 
84
  font-size: 14px !important;
85
  font-weight: bold;
86
  margin-bottom: 10px;
87
+ margin-top: 0px !important;
88
+ color: #dc2626 !important;
89
+ ;
90
  }
91
 
92
  .card-content p {
 
94
  margin-bottom: 0;
95
  }
96
 
 
 
 
 
 
 
 
 
 
 
97
  .card-footer {
98
  background-color: #f4f4f4;
99
  font-size: 10px;
 
113
  display: inline-flex;
114
  align-items: center;
115
  margin-left: auto;
116
+ text-decoration: none !important;
117
  font-size: 14px;
118
  }
119
 
120
+
121
+
122
+ .message.user {
123
+ /* background-color:#7494b0 !important; */
124
+ border: none;
125
+ /* color:white!important; */
126
  }
127
 
128
+ .message.bot {
129
+ /* background-color:#f2f2f7 !important; */
130
+ border: none;
131
  }
132
 
133
+ /* .gallery-item > div:hover{
134
  background-color:#7494b0 !important;
135
  color:white!important;
136
  }
 
137
  .gallery-item:hover{
138
  border:#7494b0 !important;
139
  }
 
140
  .gallery-item > div{
141
  background-color:white !important;
142
  color:#577b9b!important;
143
  }
 
144
  .label{
145
  color:#577b9b!important;
146
+ } */
147
 
148
+ /* .paginate{
 
 
 
 
149
  color:#577b9b!important;
150
+ } */
151
 
152
 
153
+
154
+ /* span[data-testid="block-info"]{
155
+ background:none !important;
156
+ color:#577b9b;
157
+ } */
158
 
159
  /* Pseudo-element for the circularly cropped picture */
160
+ /* .message.bot::before {
161
  content: '';
162
  position: absolute;
163
  top: -10px;
 
169
  background-position: center;
170
  border-radius: 50%;
171
  z-index: 10;
172
+ }
173
+ */
174
+
175
+ label.selected {
176
+ background: none !important;
177
+ }
178
+
179
+ #submit-button {
180
+ padding: 0px !important;
181
+ }
182
+
183
+
184
+ @media screen and (min-width: 1024px) {
185
+ div#sources-textbox {
186
+ height: calc(100vh - 190px) !important;
187
+ overflow-y: auto !important;
188
+ scrollbar-width: none;
189
+ -ms-overflow-style: none;
190
+ }
191
+
192
+ div#sources-textbox::-webkit-scrollbar {
193
+ width: 0;
194
+ height: 0;
195
+ }
196
+
197
+ div.svelte-iyf88w {
198
+ scrollbar-width: none;
199
+ }
200
+
201
+ div.svelte-iyf88w::-webkit-scrollbar {
202
+ width: 0;
203
+ height: 0;
204
+ }
205
+
206
+ div#chatbot-row {
207
+ height: calc(100vh - 90px) !important;
208
+ }
209
+
210
+ .max-height {
211
+ height: calc(100vh - 90px) !important;
212
+ overflow-y: auto;
213
+ }
214
+
215
+ div.svelte-iyf88w {
216
+ height: calc(100vh - 160px) !important;
217
+ overflow-y: auto;
218
+ }
219
+
220
+ #accordion-spinoza {
221
+ height: calc(100vh - 160px) !important;
222
+ }
223
+
224
+ .form {
225
+ position: relative;
226
+ top: 10px;
227
+ }
228
+
229
+ #accordion-spinoza>open>span:nth-child(1) {
230
+ color: #000000;
231
+ font-size: large;
232
+ font-weight: bold;
233
+ }
234
+
235
+ #accordion-spinoza>button:nth-child(2)>span:nth-child(1) {
236
+ color: #000000;
237
+ font-size: large;
238
+ font-weight: bold;
239
+ }
240
+
241
+ #accordion-science>button:nth-child(2)>span:nth-child(1) {
242
+ color: #9ca1a5e7;
243
+ font-weight: bold;
244
+ }
245
+
246
+ #accordion-presse>button:nth-child(2)>span:nth-child(1) {
247
+ color: #9ca1a5e7;
248
+ font-weight: bold;
249
+ }
250
+
251
+ #accordion-legal>button:nth-child(2)>span:nth-child(1) {
252
+ color: #9ca1a5e7;
253
+ font-weight: bold;
254
+ }
255
+
256
+ #accordion-politique>button:nth-child(2)>span:nth-child(1) {
257
+ color: #9ca1a5e7;
258
+ font-weight: bold;
259
+ }
260
+
261
+ #accordion-ademe>button:nth-child(2)>span:nth-child(1) {
262
+ color: #9ca1a5e7;
263
+ font-weight: bold;
264
+ }
265
+
266
+ }
267
+
268
+ footer {
269
+ visibility: hidden;
270
+ display: none !important;
271
+ }
272
+
273
+ /* @media screen and (max-width: 767px) {
274
+ /* Your mobile-specific styles go here */
275
+
276
+ div#chatbot {
277
+ height: 500px !important;
278
+ }
279
+
280
+ #submit-button {
281
+ padding: 0px !important;
282
+ min-width: 80px;
283
+ }
284
+
285
+ /* This will hide all list items */
286
+ div.tab-nav button {
287
+ display: none !important;
288
+ }
289
+
290
+ /* This will show only the first list item */
291
+ div.tab-nav button:first-child {
292
+ display: block !important;
293
+ }
294
+
295
+ /* This will show only the first list item */
296
+ div.tab-nav button:nth-child(2) {
297
+ display: block !important;
298
+ }
299
+
300
+ /* This will show only the first list item */
301
+ div.tab-nav button:nth-child(3) {
302
+ display: block !important;
303
+ }
304
+
305
+ #right-panel button {
306
+ display: block !important;
307
+ }
308
+
309
+ /* ... add other mobile-specific styles ... */
310
+
311
+ */ body.dark .card {
312
+ background-color: #374151;
313
+ }
314
+
315
+ body.dark .card-content h2 {
316
+ color: #f4dbd3 !important;
317
+ }
318
+
319
+ body.dark .card-footer {
320
+ background-color: #404652;
321
+ }
322
+
323
+ body.dark .card-footer span {
324
+ color: white !important;
325
+ }
326
+
327
+
328
+ .doc-ref {
329
+ color: #dc2626 !important;
330
+ margin-right: 1px;
331
+ }
332
+
333
+ .tabitem {
334
+ border: none !important;
335
+ }
336
+
337
+ .other-tabs>div {
338
+ padding-left: 40px;
339
+ padding-right: 40px;
340
+ padding-top: 10px;
341
+ }
342
+
343
+ .gallery-item>div {
344
+ white-space: normal !important;
345
+ /* Allow the text to wrap */
346
+ word-break: break-word !important;
347
+ /* Break words to prevent overflow */
348
+ overflow-wrap: break-word !important;
349
+ /* Break long words if necessary */
350
+ }
351
+
352
+ span.chatbot>p>img {
353
+ margin-top: 40px !important;
354
+ max-height: none !important;
355
+ max-width: 80% !important;
356
+ border-radius: 0px !important;
357
+ }
358
+
359
+
360
+ .chatbot-caption {
361
+ font-size: 11px;
362
+ font-style: italic;
363
+ color: #508094;
364
+ }
365
+
366
+ .ai-generated {
367
+ font-size: 11px !important;
368
+ font-style: italic;
369
+ color: #73b8d4 !important;
370
+ }
371
+
372
+ .card-image>.card-content {
373
+ background-color: #f1f7fa !important;
374
+ }
375
+
376
+
377
+
378
+ .tab-nav>button.selected {
379
+ color: #4b8ec3;
380
+ font-weight: bold;
381
+ border: none;
382
+ }
383
+
384
+ .tab-nav {
385
+ border: none !important;
386
+ }
387
+
388
+ #input-textbox>label>textarea {
389
+ border-radius: 40px;
390
+ padding-left: 30px;
391
+ resize: none;
392
+ }
393
+
394
+ #input-message>div {
395
+ border: none;
396
+ }
397
+
398
+ #dropdown-samples {
399
+ /*! border:none !important; */
400
+ /*! border-width:0px !important; */
401
+ background: none !important;
402
+
403
+ }
404
+
405
+ #dropdown-samples>.container>.wrap {
406
+ background-color: white;
407
+ }
408
+
409
+ /* HTML: <div class="loader"></div> */
410
+ /* .loader {
411
+ display: inline-flex;
412
+ gap: 5px;
413
+ animation: l2-0 1s infinite;
414
+ }
415
+ .loader:before,
416
+ .loader:after {
417
+ content: "";
418
+ width: 25px;
419
+ aspect-ratio: 1;
420
+ box-shadow: 0 0 0 3px inset #fff;
421
+ animation: l2-1 1s infinite;
422
+ }
423
+ .loader:after {
424
+ --s: -1;
425
+ }
426
+ @keyframes l2-0 {
427
+ 0%,
428
+ 50% {transform:rotate(0deg)}
429
+ 80%,
430
+ 100% {transform:rotate(180deg)}
431
+ }
432
+ @keyframes l2-1 {
433
+ 0% {transform:translate(0)}
434
+ 50%,
435
+ 80% {transform:translate(calc(var(--s,1)*2.5px))}
436
+ 100% {transform:translate(0)}
437
+ } */
assets/utils_javascript.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def accordion_trigger():
2
+ return """
3
+ function accordion_trigger() {
4
+ input_textbox = document.getElementById("input-textbox")
5
+ input_textbox.addEventListener('keyup', function (e) {
6
+ if (e.key === 'Enter' || e.keyCode === 13) {
7
+ var accordion_science = document.getElementById("accordion-science")
8
+ var accordion_presse = document.getElementById("accordion-presse")
9
+ var accordion_politique = document.getElementById("accordion-politique")
10
+ var accordion_legal = document.getElementById("accordion-legal")
11
+ var accordion_ademe= document.getElementById("accordion-ademe")
12
+ accordion_science.children[1].children[0].textContent = "Science agent - loading";
13
+ accordion_science.children[1].children[1].classList.add('loader');
14
+ accordion_presse.children[1].children[0].textContent = "Press agent - loading";
15
+ accordion_presse.children[1].children[0].classList.add('loader');
16
+ accordion_politique.children[1].children[0].textContent = "Politics agent - loading";
17
+ accordion_politique.children[1].children[0].classList.add('loader');
18
+ accordion_legal.children[1].children[0].textContent = "Law agent - loading";
19
+ accordion_legal.children[1].children[0].classList.add('loader');
20
+ accordion_ademe.children[1].children[0].textContent = "ADEME agent - loading";
21
+ accordion_ademe.children[1].children[0].classList.add('loader');
22
+ }
23
+ });
24
+ }
25
+ """
26
+
27
+
28
+ def accordion_trigger_end():
29
+ return """
30
+ function accordion_trigger_end() {
31
+ var accordion_science = document.getElementById("accordion-science")
32
+ var accordion_presse = document.getElementById("accordion-presse")
33
+ var accordion_politique = document.getElementById("accordion-politique")
34
+ var accordion_legal = document.getElementById("accordion-legal")
35
+ var accordion_ademe = document.getElementById("accordion-ademe")
36
+ accordion_science.children[1].children[0].textContent = "Science agent - ready";
37
+ accordion_science.children[1].children[1].classList.remove('loader');
38
+ accordion_presse.children[1].children[0].textContent = "Press agent - ready";
39
+ accordion_presse.children[1].children[0].classList.remove('loader');
40
+ accordion_politique.children[1].children[0].textContent = "Politics agent - ready";
41
+ accordion_politique.children[1].children[0].classList.remove('loader');
42
+ accordion_legal.children[1].children[0].textContent = "Law agent - ready";
43
+ accordion_legal.children[1].children[0].classList.remove('loader');
44
+ accordion_ademe.children[1].children[0].textContent = "ADEME agent - ready";
45
+ accordion_ademe.children[1].children[0].classList.remove('loader');
46
+
47
+ }
48
+ """
49
+
50
+
51
+ def accordion_trigger_spinoza():
52
+ return """
53
+ function accordion_trigger_spinoza() {
54
+ var accordion_spinoza = document.getElementById("accordion-spinoza")
55
+ accordion_spinoza.children[1].children[0].textContent = "Spinoza - generating";
56
+ accordion_spinoza.children[1].children[0].classList.add('loader');
57
+ }
58
+ """
59
+
60
+
61
+ def accordion_trigger_spinoza_end():
62
+ return """
63
+ function accordion_trigger_spinoza_end() {
64
+ var accordion_spinoza = document.getElementById("accordion-spinoza")
65
+ accordion_spinoza.children[1].children[0].textContent = "Spinoza - ready";
66
+ accordion_spinoza.children[1].children[0].classList.remove('loader');
67
+ }
68
+ """
69
+
70
+
71
+ def accordion_trigger_end_science():
72
+ return """
73
+ function accordion_trigger_end() {
74
+ var accordion_science = document.getElementById("accordion-science")
75
+ accordion_science.children[1].children[0].textContent = "Science agent - ready";
76
+ console.log("DONE - end science")
77
+ }
78
+ """
79
+
80
+
81
+ def accordion_trigger_end_presse():
82
+ return """
83
+ function accordion_trigger_end() {
84
+ var accordion_presse = document.getElementById("accordion-presse")
85
+ accordion_presse.children[1].children[0].textContent = "Presse agent - ready";
86
+ console.log("DONE - end presse")
87
+ }
88
+ """
89
+
90
+
91
+ def accordion_trigger_end_politique():
92
+ return """
93
+ function accordion_trigger_end() {
94
+ var accordion_politique = document.getElementById("accordion-politique")
95
+ accordion_politique.children[1].children[0].textContent = "Politique agent - ready";
96
+ console.log("DONE - end politique")
97
+ }
98
+ """
99
+
100
+
101
+ def accordion_trigger_end_legal():
102
+ return """
103
+ function accordion_trigger_end() {
104
+ var accordion_legal = document.getElementById("accordion-legal")
105
+ accordion_legal.children[1].children[0].textContent = "Legal agent - ready";
106
+ console.log("DONE - end legal")
107
+ }
108
+ """
poetry.lock CHANGED
The diff for this file is too large to render. See raw diff
 
pyproject.toml CHANGED
@@ -9,7 +9,7 @@ package-mode = true
9
  [tool.poetry.dependencies]
10
  python = "^3.10"
11
  langchain = "^0.2.5"
12
- gradio = {extras = ["oauth"], version = "^4.36.1"}
13
  sentence-transformers = "2.2.2"
14
  msal = "^1.28.1"
15
  langchain-openai = "^0.1.8"
 
9
  [tool.poetry.dependencies]
10
  python = "^3.10"
11
  langchain = "^0.2.5"
12
+ gradio = "4.37.2"
13
  sentence-transformers = "2.2.2"
14
  msal = "^1.28.1"
15
  langchain-openai = "^0.1.8"
requirements.txt CHANGED
The diff for this file is too large to render. See raw diff
 
spinoza_project/source/frontend/utils.py CHANGED
@@ -1,5 +1,6 @@
1
  from queue import SimpleQueue
2
  from dotenv import load_dotenv
 
3
  from langchain.callbacks.base import BaseCallbackHandler
4
 
5
  job_done = object() # signals the processing is done
@@ -57,51 +58,66 @@ def get_source_link(metadata):
57
  return metadata["file_url"] + f"#page={metadata['content_page_number'] + 1}"
58
 
59
 
60
- def make_html_presse_source(source, i, tag, score, config):
61
  meta = source.metadata
62
  return f"""
63
- <div class="card" id="btn_{tag}_{i}_source" style="display:none;">
64
  <div class="card-content">
65
- <div class="card-content-column-1">
66
- <p><strong>Titre :</strong> {meta['file_title']}</p>
67
- <p><strong>Auteur/s :</strong> {meta['file_source_type']}</p>
68
- <p><strong>Date :</strong> {meta['file_date_publishing']}</p>
69
- </div>
70
- <div class="card-content-column-2">
71
- <p><strong>Paragraphe id :</strong> {source.page_content}</p>
72
- </div>
73
  </div>
74
  <div class="card-footer">
75
- <span>[{i}]</span>
76
  <span>Relevance Score : {round(100*score,1)}%</span>
 
 
 
77
  </div>
78
  </div>
79
  """
80
 
81
 
82
- def make_html_source(source, i, tag, score, config):
83
  meta = source.metadata
84
  return f"""
85
- <div class="card" id="btn_{tag}_{i}_source" style="display:none;">
86
  <div class="card-content">
87
- <div class="card-content-column-1">
88
- <p><strong>Titre :</strong> {meta['file_title']}</p>
89
- <p><strong>Auteur/s :</strong> {meta['file_source_type']}</p>
90
- <p><strong>Date :</strong> {meta['file_date_publishing']}</p>
91
- </div>
92
- <div class="card-content-column-2">
93
- <p><strong>Paragraphe id :</strong> {source.page_content.replace(config["passage_preprompt"], "")}</p>
94
- </div>
95
  </div>
96
  <div class="card-footer">
97
- <span>[{i}]</span>
98
- <span><a href="{get_source_link(meta)}" target="_blank">Lien source</a></span>
99
- <span>Page {meta['content_page_number'] + 1}</span>
100
  <span>Relevance Score : {round(100*score,1)}%</span>
 
 
 
101
  </div>
102
  </div>
103
  """
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
  def clear_text_box(textbox):
107
  return ""
 
1
  from queue import SimpleQueue
2
  from dotenv import load_dotenv
3
+ import re
4
  from langchain.callbacks.base import BaseCallbackHandler
5
 
6
  job_done = object() # signals the processing is done
 
58
  return metadata["file_url"] + f"#page={metadata['content_page_number'] + 1}"
59
 
60
 
61
+ def make_html_presse_source(source, i, score, config):
62
  meta = source.metadata
63
  return f"""
64
+ <div class="card" id="doc{i}">
65
  <div class="card-content">
66
+ <h2>Doc {i} - {meta['file_title']}</h2>
67
+ <p>{source.page_content}</p>
 
 
 
 
 
 
68
  </div>
69
  <div class="card-footer">
70
+ <span>{meta['file_source_type']}</span>
71
  <span>Relevance Score : {round(100*score,1)}%</span>
72
+ <a href="https://fr.wikipedia.org/wiki/Baruch_Spinoza" target="_blank">
73
+ <span role="img" aria-label="Open PDF">🔗</span>
74
+ </a>
75
  </div>
76
  </div>
77
  """
78
 
79
 
80
+ def make_html_source(source, i, score, config):
81
  meta = source.metadata
82
  return f"""
83
+ <div class="card" id="doc{i}">
84
  <div class="card-content">
85
+ <h2>Doc {i} - {meta['file_title']} - Page {meta['content_page_number'] + 1}</h2>
86
+ <p>{source.page_content.replace(config["passage_preprompt"], "")}</p>
 
 
 
 
 
 
87
  </div>
88
  <div class="card-footer">
89
+ <span>{meta['file_source_type']}</span>
 
 
90
  <span>Relevance Score : {round(100*score,1)}%</span>
91
+ <a href="{get_source_link(meta)}" target="_blank">
92
+ <span role="img" aria-label="Open PDF">🔗</span>
93
+ </a>
94
  </div>
95
  </div>
96
  """
97
 
98
+ def parse_output_llm_with_sources(output):
99
+ content_parts = re.split(
100
+ r"[\[(]?(Doc\s?\d+(?:,\s?Doc\s?\d+)*|doc\s?\d+(?:,\s?doc\s?\d+)*|Doc\s\d+)[\])?]",
101
+ output,
102
+ )
103
+ parts = []
104
+ for part in content_parts:
105
+ if part.lower().startswith("doc"):
106
+ subparts = part.split(",")
107
+ subparts = [
108
+ subpart.lower().replace("doc", "").strip() for subpart in subparts
109
+ ]
110
+ subparts = [
111
+ f"""<a href="#doc{subpart}" class="a-doc-ref" target="_self"><span class='doc-ref'><sup>{subpart}</sup></span></a>"""
112
+ for subpart in subparts
113
+ ]
114
+ parts.append("".join(subparts))
115
+ else:
116
+ parts.append(part)
117
+ content_parts = "".join(parts)
118
+
119
+ return content_parts
120
+
121
 
122
  def clear_text_box(textbox):
123
  return ""