pierreguillou commited on
Commit
4661970
·
verified ·
1 Parent(s): 51dcb5d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +213 -171
app.py CHANGED
@@ -10,25 +10,61 @@ from langchain_core.messages import HumanMessage
10
  from langchain_core.caches import BaseCache
11
  from langchain_core.callbacks import Callbacks
12
  ChatGoogleGenerativeAI.model_rebuild()
13
- import PyPDF2
14
- import docx
15
  import pandas as pd
16
- from pptx import Presentation
17
  import io
18
  import tempfile
19
  from urllib.parse import urlparse
20
  import re
21
 
22
- # Import et rebuild de ChatGoogleGenerativeAI en différé
 
 
 
 
 
23
  try:
24
  from langchain_google_genai import ChatGoogleGenerativeAI
25
  from langchain_core.caches import BaseCache
26
  ChatGoogleGenerativeAI.model_rebuild()
27
  except Exception as e:
28
- print(f"Avertissement lors du rebuild: {e}")
29
  from langchain_google_genai import ChatGoogleGenerativeAI
30
 
31
- # Configuration des modèles
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  MODELS = {
33
  "Gemini 2.5 Flash (Google AI)": {
34
  "provider": "Google AI",
@@ -56,61 +92,67 @@ MODELS = {
56
  }
57
  }
58
 
59
- # API par défaut pour Gemini 2.5 Flash via HF Spaces Secrets
60
  DEFAULT_GEMINI_API = os.getenv("FLASH_GOOGLE_API_KEY")
61
 
62
  def extract_text_from_file(file):
63
- """Extrait le texte d'un fichier uploadé"""
 
 
 
 
 
 
64
  if file is None:
65
  return ""
66
- file_extension = os.path.splitext(file.name)[1].lower()
 
 
 
 
 
 
 
67
  try:
68
- if file_extension == '.pdf':
69
- with open(file.name, 'rb') as f:
70
- reader = PyPDF2.PdfReader(f)
71
- text = ""
72
- for page in reader.pages:
73
- text += page.extract_text() + "\n"
74
- return text
75
- elif file_extension == '.docx':
76
- doc = docx.Document(file.name)
77
- text = ""
78
- for paragraph in doc.paragraphs:
79
- text += paragraph.text + "\n"
80
- return text
81
- elif file_extension == '.txt':
82
- with open(file.name, 'r', encoding='utf-8') as f:
 
 
 
83
  return f.read()
84
- elif file_extension in ['.xlsx', '.xls']:
85
- df = pd.read_excel(file.name)
86
- return df.to_string()
87
- elif file_extension == '.pptx':
88
- prs = Presentation(file.name)
89
- text = ""
90
- for slide in prs.slides:
91
- for shape in slide.shapes:
92
- if hasattr(shape, "text"):
93
- text += shape.text + "\n"
94
- return text
95
  else:
96
- return "Format de fichier non supporté"
97
  except Exception as e:
98
- return f"Erreur lors de la lecture du fichier: {str(e)}"
99
 
100
  def extract_text_from_url(url):
101
- """Extrait le texte d'une URL"""
102
  try:
103
  response = requests.get(url, timeout=10)
104
  response.raise_for_status()
105
  content = response.text
106
  content = re.sub(r'<[^>]+>', '', content)
107
  content = re.sub(r'\s+', ' ', content).strip()
108
- return content[:10000] # Limite à 10k caractères
109
  except Exception as e:
110
- return f"Erreur lors de la récupération de l'URL: {str(e)}"
111
 
112
  def get_document_content(text_input, url_input, file_input):
113
- """Récupère le contenu du document selon la source"""
114
  if text_input.strip():
115
  return text_input.strip()
116
  elif url_input.strip():
@@ -121,7 +163,7 @@ def get_document_content(text_input, url_input, file_input):
121
  return ""
122
 
123
  def create_llm_instance(model_name, api_key):
124
- """Crée une instance du modèle LLM"""
125
  model_config = MODELS[model_name]
126
  if model_config["provider"] == "OpenAI":
127
  return model_config["class"](
@@ -144,24 +186,24 @@ def create_llm_instance(model_name, api_key):
144
  )
145
 
146
  def generate_html(model_name, api_key, text_input, url_input, file_input):
147
- """Génère le fichier HTML éducatif"""
148
  start_time = time.time()
149
  if model_name != "Gemini 2.5 Flash (Google AI)" and not api_key.strip():
150
- return None, "❌ Erreur: Veuillez fournir une clé API pour ce modèle.", 0
151
 
152
  document_content = get_document_content(text_input, url_input, file_input)
153
  if not document_content:
154
- return None, "❌ Erreur: Veuillez fournir un document (texte, URL ou fichier).", 0
155
 
156
  try:
157
- # Création de l'instance LLM
158
  llm = create_llm_instance(model_name, api_key)
159
 
160
- # Lecture du prompt template
161
  with open("creation_educational_html_from_any_document_18082025.txt", "r", encoding="utf-8") as f:
162
  prompt_template = f.read()
163
 
164
- # Remplacement des variables
165
  model_config = MODELS[model_name]
166
  prompt = prompt_template.format(
167
  model_name=model_config["model_name"],
@@ -169,33 +211,33 @@ def generate_html(model_name, api_key, text_input, url_input, file_input):
169
  document=document_content
170
  )
171
 
172
- # Génération du contenu
173
  message = HumanMessage(content=prompt)
174
  response = llm.invoke([message])
175
  html_content = response.content
176
 
177
- # Nettoyage des éventuelles balises de code des modèles
178
  html_content = html_content.replace("```html", "")
179
  html_content = html_content.replace("```", "")
180
 
181
- # Calcul du temps de génération
182
  generation_time = time.time() - start_time
183
 
184
- # Sauvegarde du fichier HTML
185
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
186
- filename = f"document_educatif_{timestamp}.html"
187
  with open(filename, "w", encoding="utf-8") as f:
188
  f.write(html_content)
189
 
190
- success_message = f"✅ Fichier HTML généré avec succès en {generation_time:.2f} secondes!"
191
  return filename, success_message, generation_time
192
 
193
  except Exception as e:
194
- error_message = f"❌ Erreur lors de la génération: {str(e)}"
195
  return None, error_message, 0
196
 
197
  def reset_form():
198
- """Remet à zéro le formulaire"""
199
  return (
200
  "Gemini 2.5 Flash (Google AI)", # model_name
201
  "", # api_key
@@ -208,23 +250,23 @@ def reset_form():
208
  )
209
 
210
  def update_api_info(model_name):
211
- """Met à jour les informations sur l'API selon le modèle sélectionné"""
212
  if model_name == "Gemini 2.5 Flash (Google AI)":
213
  return gr.update(
214
- label="Clé API (optionnelle)",
215
- placeholder="API gratuite disponible jusqu'à épuisement, ou utilisez votre propre clé",
216
- info="💡 Une API gratuite est déjà configurée pour ce modèle. Vous pouvez utiliser votre propre clé si vous le souhaitez."
217
  )
218
  else:
219
  return gr.update(
220
- label="Clé API (obligatoire)",
221
- placeholder="Entrez votre clé API",
222
- info="🔑 Clé API requise pour ce modèle"
223
  )
224
 
225
- # Interface Gradio (Apple-like)
226
  with gr.Blocks(
227
- title="EduHTML Creator - Générateur de contenu éducatif HTML",
228
  theme=gr.themes.Soft(),
229
  css="""
230
  /* ==== Apple-inspired Global Reset & Typography ==== */
@@ -351,14 +393,14 @@ html, body {
351
  border-radius: var(--radius-lg);
352
  overflow: hidden;
353
  background: var(--apple-black);
354
- color: #ffff !important; /* Force texte blanc */
355
  box-shadow: var(--shadow-soft);
356
  }
357
  .header, .header * {
358
- color: #ffff !important; /* Tout le contenu en blanc */
359
  }
360
  .header a, .header a:visited, .header a:active {
361
- color: #ffff !important; /* Liens blancs */
362
  text-decoration: underline;
363
  text-underline-offset: var(--link-underline-offset);
364
  }
@@ -542,14 +584,14 @@ input:focus, textarea:focus {
542
  border-radius: var(--radius-lg);
543
  overflow: hidden;
544
  background: var(--apple-black);
545
- color: #ffff !important; /* Force texte blanc */
546
  box-shadow: var(--shadow-soft);
547
  }
548
  .footer, .footer * {
549
- color: #ffff !important; /* Tout le contenu en blanc */
550
  }
551
  .footer a, .footer a:visited, .footer a:active {
552
- color: #ffff !important; /* Liens blancs */
553
  text-decoration: underline;
554
  text-underline-offset: var(--link-underline-offset);
555
  }
@@ -585,155 +627,155 @@ a { text-decoration: underline; text-underline-offset: var(--link-underline-offs
585
  <div class="header-inner">
586
  <h1>🎓 EduHTML Creator</h1>
587
  <p>
588
- Transformez n'importe quel document en contenu éducatif HTML interactif, avec un design premium inspiré d'Apple.
589
- Fidélité au document, structuration claire, interactivité, et mise en valeur des informations clés.
590
  </p>
591
  </div>
592
  </div>
593
  """)
594
 
595
  with gr.Column(elem_classes=["main-container"]):
596
- # Section Configuration du modèle
597
  gr.HTML("<div class='section'>")
598
  model_dropdown = gr.Dropdown(
599
  choices=list(MODELS.keys()),
600
  value="Gemini 2.5 Flash (Google AI)",
601
- label="Modèle LLM",
602
- info="Sélectionnez le modèle à utiliser pour la génération"
603
  )
604
 
605
  api_input = gr.Textbox(
606
- label="Clé API (optionnelle)",
607
- placeholder="API gratuite (Gemini Flash) disponible. Vous pouvez entrer votre propre clé.",
608
- info="Pour OpenAI/Anthropic, une clé est obligatoire.",
609
  type="password"
610
  )
611
  gr.HTML("</div>")
612
 
613
- # Section Source du document avec onglets
614
  gr.HTML("<div class='section alt'>")
615
- gr.HTML("<h3>Source du document</h3>")
616
 
617
  with gr.Tabs():
618
- with gr.TabItem("📝 Texte"):
619
  text_input = gr.Textbox(
620
- label="Texte copié/collé",
621
- placeholder="Collez votre texte ici...",
622
  lines=4
623
  )
624
 
625
  with gr.TabItem("🌐 URL"):
626
  url_input = gr.Textbox(
627
- label="Lien Web",
628
- placeholder="https://exemple.com/article"
629
  )
630
 
631
- with gr.TabItem("📁 Fichier"):
632
  file_input = gr.File(
633
- label="Fichier",
634
  file_types=[".pdf", ".txt", ".docx", ".xlsx", ".xls", ".pptx"]
635
  )
636
 
637
  gr.HTML("</div>")
638
 
639
- # Boutons d'action
640
  with gr.Row():
641
- submit_btn = gr.Button("Générer le HTML", variant="primary", elem_classes=["apple-button"])
642
  reset_btn = gr.Button("Reset", elem_classes=["reset-button"])
643
 
644
- # Section Résultats
645
- status_output = gr.HTML(label="Statut")
646
  gr.HTML("<div class='section preview-card'>")
647
- gr.HTML("<div class='preview-header'><div class='preview-dot' aria-hidden='true'></div><div>Prévisualisation</div></div>")
648
- html_preview = gr.HTML(label="Prévisualisation", visible=False, elem_id="html-preview", elem_classes=["preview-body"])
649
- html_file_output = gr.File(label="Fichier HTML téléchargeable", visible=False)
650
  gr.HTML("</div>")
651
 
652
- # Footer (black)
653
- gr.HTML("""
654
- <div class="footer" role="contentinfo">
655
- <div class="footer-inner">
656
- <span>Design inspiré d'Apple Contrastes élevésInteractions fluides</span>
 
657
  </div>
658
- </div>
659
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
660
 
661
- # Light JS: smooth scroll to top, inline burger, focus handling
662
- gr.HTML("""
663
- <script>
664
- (function() {
665
- const menuBtn = document.getElementById('inlineMenuBtn');
666
- const menu = document.getElementById('inlineMenu');
667
- const topLink = document.getElementById('scrollTopLink');
668
-
669
- function closeMenu() {
670
- if (!menu) return;
671
- menu.classList.remove('open');
672
- menu.setAttribute('aria-hidden', 'true');
673
- }
674
-
675
- if (menuBtn && menu) {
676
- menuBtn.addEventListener('click', function(e) {
677
- e.preventDefault();
678
- const isOpen = menu.classList.contains('open');
679
- if (isOpen) {
 
 
 
 
 
 
680
  closeMenu();
681
- } else {
682
- menu.classList.add('open');
683
- menu.setAttribute('aria-hidden', 'false');
684
  }
685
  });
686
- }
687
 
688
- if (topLink) {
689
- topLink.addEventListener('click', function(e) {
690
- e.preventDefault();
691
- window.scrollTo({ top: 0, behavior: 'smooth' });
692
- closeMenu();
693
  });
694
- }
695
-
696
- // Close when clicking outside
697
- document.addEventListener('click', function(e) {
698
- if (!menu || !menuBtn) return;
699
- if (!menu.contains(e.target) && !menuBtn.contains(e.target)) {
700
- closeMenu();
701
- }
702
- });
703
-
704
- // Accessibility: close on Escape
705
- document.addEventListener('keydown', function(e) {
706
- if (e.key === 'Escape') closeMenu();
707
- });
708
- })();
709
- </script>
710
- """)
711
-
712
- # Événements
713
- model_dropdown.change(
714
- fn=update_api_info,
715
- inputs=[model_dropdown],
716
- outputs=[api_input]
717
- )
718
 
719
- submit_btn.click(
720
- fn=generate_html,
721
- inputs=[model_dropdown, api_input, text_input, url_input, file_input],
722
- outputs=[html_file_output, status_output, gr.State()]
723
- ).then(
724
- fn=lambda file, status, _: (
725
- gr.update(visible=file is not None),
726
- status,
727
- gr.update(visible=file is not None, value=(open(file, 'r', encoding='utf-8').read() if file else ""))
728
- ),
729
- inputs=[html_file_output, status_output, gr.State()],
730
- outputs=[html_file_output, status_output, html_preview]
731
- )
732
 
733
- reset_btn.click(
734
- fn=reset_form,
735
- outputs=[model_dropdown, api_input, text_input, url_input, file_input, status_output, html_file_output, html_preview]
736
- )
737
 
738
  if __name__ == "__main__":
739
  app.launch(
 
10
  from langchain_core.caches import BaseCache
11
  from langchain_core.callbacks import Callbacks
12
  ChatGoogleGenerativeAI.model_rebuild()
 
 
13
  import pandas as pd
 
14
  import io
15
  import tempfile
16
  from urllib.parse import urlparse
17
  import re
18
 
19
+ # Import DocLing and necessary configuration classes
20
+ from docling.document_converter import DocumentConverter, PdfFormatOption
21
+ from docling.datamodel.pipeline_options import PdfPipelineOptions
22
+ from docling.datamodel.base_models import InputFormat
23
+
24
+ # Import and rebuild ChatGoogleGenerativeAI deferred
25
  try:
26
  from langchain_google_genai import ChatGoogleGenerativeAI
27
  from langchain_core.caches import BaseCache
28
  ChatGoogleGenerativeAI.model_rebuild()
29
  except Exception as e:
30
+ print(f"Warning during rebuild: {e}")
31
  from langchain_google_genai import ChatGoogleGenerativeAI
32
 
33
+ # --- START OF OCR CONFIGURATION ---
34
+ # Create a single, pre-configured DocumentConverter instance to be reused.
35
+ # This is more efficient than creating it on every function call.
36
+
37
+ # 1. Define the pipeline options to enable OCR for PDFs.
38
+ # Configure a single global DocLing converter with Tesseract OCR enabled and all languages
39
+ # Note: With tesseract-ocr-all installed, all language data files are available.
40
+ pdf_options = PdfPipelineOptions(
41
+ do_ocr=True,
42
+ ocr_model="tesseract",
43
+ # Provide a broad default set. With tesseract-ocr-all, many language packs exist.
44
+ # You can keep this small for speed or expand it. Here we include a practical wide set.
45
+ ocr_languages=[
46
+ "eng","fra","deu","spa","ita","por","nld","pol","tur","ces","rus","ukr","ell","ron","hun",
47
+ "bul","hrv","srp","slk","slv","lit","lav","est","cat","eus","glg","isl","dan","nor","swe",
48
+ "fin","alb","mlt","afr","zul","swa","amh","uzb","aze","kaz","kir","mon","tgl","ind","msa",
49
+ "tha","vie","khm","lao","mya","ben","hin","mar","guj","pan","mal","tam","tel","kan","nep",
50
+ "sin","urd","fas","pus","kur","aze_cyrl","tat","uig","heb","ara","yid","grc","chr","epo",
51
+ "hye","kat","kat_old","aze_latn","mkd","bel","srp_latn","srp_cyrillic",
52
+ # CJK — these are heavier and slower; include only if needed:
53
+ "chi_sim","chi_tra","jpn","kor"
54
+ ]
55
+ )
56
+
57
+ # 2. Create the format-specific configuration.
58
+ format_options = {
59
+ InputFormat.PDF: PdfFormatOption(pipeline_options=pdf_options)
60
+ }
61
+
62
+ # 3. Initialize the converter with the OCR configuration.
63
+ # This converter will now automatically perform OCR on any PDF file.
64
+ docling_converter = DocumentConverter(format_options=format_options)
65
+ # --- END OF OCR CONFIGURATION ---
66
+
67
+ # Model configuration
68
  MODELS = {
69
  "Gemini 2.5 Flash (Google AI)": {
70
  "provider": "Google AI",
 
92
  }
93
  }
94
 
95
+ # Default API for Gemini 2.5 Flash via HF Spaces Secrets
96
  DEFAULT_GEMINI_API = os.getenv("FLASH_GOOGLE_API_KEY")
97
 
98
  def extract_text_from_file(file):
99
+ """
100
+ Extract text from an uploaded file or path (str).
101
+ - Accepts an object with .name attribute (e.g. Gradio upload) OR a file path (str).
102
+ - DocLing for: .pdf (Tesseract OCR enabled if configured), .docx, .xlsx, .pptx
103
+ - Converts .csv /.xls -> temporary .xlsx then DocLing
104
+ - .txt read directly
105
+ """
106
  if file is None:
107
  return ""
108
+
109
+ # Normalize to a filesystem path string
110
+ path = file.name if hasattr(file, "name") else str(file)
111
+ ext = os.path.splitext(path)[1].lower()
112
+
113
+ docling_direct = {".pdf", ".docx", ".xlsx", ".pptx"}
114
+ to_xlsx_first = {".csv", ".xls"}
115
+
116
  try:
117
+ if ext in docling_direct:
118
+ result = docling_converter.convert(path)
119
+ return result.document.export_to_markdown()
120
+
121
+ elif ext in to_xlsx_first:
122
+ # Convert CSV/XLS -> XLSX
123
+ if ext == ".csv":
124
+ df = pd.read_csv(path)
125
+ else: # .xls
126
+ df = pd.read_excel(path)
127
+
128
+ with tempfile.NamedTemporaryFile(delete=True, suffix=".xlsx") as tmp:
129
+ df.to_excel(tmp.name, index=False)
130
+ result = docling_converter.convert(tmp.name)
131
+ return result.document.export_to_markdown()
132
+
133
+ elif ext == ".txt":
134
+ with open(path, "r", encoding="utf-8") as f:
135
  return f.read()
136
+
 
 
 
 
 
 
 
 
 
 
137
  else:
138
+ return "Unsupported file format"
139
  except Exception as e:
140
+ return f"Error reading file: {str(e)}"
141
 
142
  def extract_text_from_url(url):
143
+ """Extract text from a URL"""
144
  try:
145
  response = requests.get(url, timeout=10)
146
  response.raise_for_status()
147
  content = response.text
148
  content = re.sub(r'<[^>]+>', '', content)
149
  content = re.sub(r'\s+', ' ', content).strip()
150
+ return content[:10000] # Limit to 10k characters
151
  except Exception as e:
152
+ return f"Error retrieving URL: {str(e)}"
153
 
154
  def get_document_content(text_input, url_input, file_input):
155
+ """Retrieve document content based on source"""
156
  if text_input.strip():
157
  return text_input.strip()
158
  elif url_input.strip():
 
163
  return ""
164
 
165
  def create_llm_instance(model_name, api_key):
166
+ """Create an LLM model instance"""
167
  model_config = MODELS[model_name]
168
  if model_config["provider"] == "OpenAI":
169
  return model_config["class"](
 
186
  )
187
 
188
  def generate_html(model_name, api_key, text_input, url_input, file_input):
189
+ """Generate educational HTML file"""
190
  start_time = time.time()
191
  if model_name != "Gemini 2.5 Flash (Google AI)" and not api_key.strip():
192
+ return None, "❌ Error: Please provide an API key for this model.", 0
193
 
194
  document_content = get_document_content(text_input, url_input, file_input)
195
  if not document_content:
196
+ return None, "❌ Error: Please provide a document (text, URL or file).", 0
197
 
198
  try:
199
+ # Create LLM instance
200
  llm = create_llm_instance(model_name, api_key)
201
 
202
+ # Read prompt template
203
  with open("creation_educational_html_from_any_document_18082025.txt", "r", encoding="utf-8") as f:
204
  prompt_template = f.read()
205
 
206
+ # Replace variables
207
  model_config = MODELS[model_name]
208
  prompt = prompt_template.format(
209
  model_name=model_config["model_name"],
 
211
  document=document_content
212
  )
213
 
214
+ # Generate content
215
  message = HumanMessage(content=prompt)
216
  response = llm.invoke([message])
217
  html_content = response.content
218
 
219
+ # Clean any code tags from models
220
  html_content = html_content.replace("```html", "")
221
  html_content = html_content.replace("```", "")
222
 
223
+ # Calculate generation time
224
  generation_time = time.time() - start_time
225
 
226
+ # Save HTML file
227
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
228
+ filename = f"educational_document_{timestamp}.html"
229
  with open(filename, "w", encoding="utf-8") as f:
230
  f.write(html_content)
231
 
232
+ success_message = f"✅ HTML file generated successfully in {generation_time:.2f} seconds!"
233
  return filename, success_message, generation_time
234
 
235
  except Exception as e:
236
+ error_message = f"❌ Error during generation: {str(e)}"
237
  return None, error_message, 0
238
 
239
  def reset_form():
240
+ """Reset the form to zero"""
241
  return (
242
  "Gemini 2.5 Flash (Google AI)", # model_name
243
  "", # api_key
 
250
  )
251
 
252
  def update_api_info(model_name):
253
+ """Update API information based on selected model"""
254
  if model_name == "Gemini 2.5 Flash (Google AI)":
255
  return gr.update(
256
+ label="API Key (optional)",
257
+ placeholder="Free API available until exhausted, or use your own key",
258
+ info="💡 A free API is already configured for this model. You can use your own key if you wish."
259
  )
260
  else:
261
  return gr.update(
262
+ label="API Key (required)",
263
+ placeholder="Enter your API key",
264
+ info="🔑 API key required for this model"
265
  )
266
 
267
+ # Gradio Interface (Apple-like)
268
  with gr.Blocks(
269
+ title="EduHTML Creator - Educational HTML Content Generator",
270
  theme=gr.themes.Soft(),
271
  css="""
272
  /* ==== Apple-inspired Global Reset & Typography ==== */
 
393
  border-radius: var(--radius-lg);
394
  overflow: hidden;
395
  background: var(--apple-black);
396
+ color: #ffff !important; /* Force white text */
397
  box-shadow: var(--shadow-soft);
398
  }
399
  .header, .header * {
400
+ color: #ffff !important; /* All content in white */
401
  }
402
  .header a, .header a:visited, .header a:active {
403
+ color: #ffff !important; /* White links */
404
  text-decoration: underline;
405
  text-underline-offset: var(--link-underline-offset);
406
  }
 
584
  border-radius: var(--radius-lg);
585
  overflow: hidden;
586
  background: var(--apple-black);
587
+ color: #ffff !important; /* Force white text */
588
  box-shadow: var(--shadow-soft);
589
  }
590
  .footer, .footer * {
591
+ color: #ffff !important; /* All content in white */
592
  }
593
  .footer a, .footer a:visited, .footer a:active {
594
+ color: #ffff !important; /* White links */
595
  text-decoration: underline;
596
  text-underline-offset: var(--link-underline-offset);
597
  }
 
627
  <div class="header-inner">
628
  <h1>🎓 EduHTML Creator</h1>
629
  <p>
630
+ Transform any document into interactive educational HTML content, with a premium Apple-inspired design.
631
+ Document fidelity, clear structure, interactivity, and highlighting of key information.
632
  </p>
633
  </div>
634
  </div>
635
  """)
636
 
637
  with gr.Column(elem_classes=["main-container"]):
638
+ # Model Configuration Section
639
  gr.HTML("<div class='section'>")
640
  model_dropdown = gr.Dropdown(
641
  choices=list(MODELS.keys()),
642
  value="Gemini 2.5 Flash (Google AI)",
643
+ label="LLM Model",
644
+ info="Select the model to use for generation"
645
  )
646
 
647
  api_input = gr.Textbox(
648
+ label="API Key (optional)",
649
+ placeholder="Free API (Gemini Flash) available. You can enter your own key.",
650
+ info="For OpenAI/Anthropic, a key is required.",
651
  type="password"
652
  )
653
  gr.HTML("</div>")
654
 
655
+ # Document Source Section with tabs
656
  gr.HTML("<div class='section alt'>")
657
+ gr.HTML("<h3>Document Source</h3>")
658
 
659
  with gr.Tabs():
660
+ with gr.TabItem("📝 Text"):
661
  text_input = gr.Textbox(
662
+ label="Copied/pasted text",
663
+ placeholder="Paste your text here...",
664
  lines=4
665
  )
666
 
667
  with gr.TabItem("🌐 URL"):
668
  url_input = gr.Textbox(
669
+ label="Web Link",
670
+ placeholder="https://example.com/article"
671
  )
672
 
673
+ with gr.TabItem("📁 File"):
674
  file_input = gr.File(
675
+ label="File",
676
  file_types=[".pdf", ".txt", ".docx", ".xlsx", ".xls", ".pptx"]
677
  )
678
 
679
  gr.HTML("</div>")
680
 
681
+ # Action buttons
682
  with gr.Row():
683
+ submit_btn = gr.Button("Generate HTML", variant="primary", elem_classes=["apple-button"])
684
  reset_btn = gr.Button("Reset", elem_classes=["reset-button"])
685
 
686
+ # Results Section
687
+ status_output = gr.HTML(label="Status")
688
  gr.HTML("<div class='section preview-card'>")
689
+ gr.HTML("<div class='preview-header'><div class='preview-dot' aria-hidden='true'></div><div>Preview</div></div>")
690
+ html_preview = gr.HTML(label="Preview", visible=False, elem_id="html-preview", elem_classes=["preview-body"])
691
+ html_file_output = gr.File(label="Downloadable HTML file", visible=False)
692
  gr.HTML("</div>")
693
 
694
+ # Footer (black)
695
+ gr.HTML("""
696
+ <div class="footer" role="contentinfo">
697
+ <div class="footer-inner">
698
+ <span>Apple-inspired designHigh contrastsSmooth interactions</span>
699
+ </div>
700
  </div>
701
+ """)
702
+
703
+ # Light JS: smooth scroll to top, inline burger, focus handling
704
+ gr.HTML("""
705
+ <script>
706
+ (function() {
707
+ const menuBtn = document.getElementById('inlineMenuBtn');
708
+ const menu = document.getElementById('inlineMenu');
709
+ const topLink = document.getElementById('scrollTopLink');
710
+
711
+ function closeMenu() {
712
+ if (!menu) return;
713
+ menu.classList.remove('open');
714
+ menu.setAttribute('aria-hidden', 'true');
715
+ }
716
 
717
+ if (menuBtn && menu) {
718
+ menuBtn.addEventListener('click', function(e) {
719
+ e.preventDefault();
720
+ const isOpen = menu.classList.contains('open');
721
+ if (isOpen) {
722
+ closeMenu();
723
+ } else {
724
+ menu.classList.add('open');
725
+ menu.setAttribute('aria-hidden', 'false');
726
+ }
727
+ });
728
+ }
729
+
730
+ if (topLink) {
731
+ topLink.addEventListener('click', function(e) {
732
+ e.preventDefault();
733
+ window.scrollTo({ top: 0, behavior: 'smooth' });
734
+ closeMenu();
735
+ });
736
+ }
737
+
738
+ // Close when clicking outside
739
+ document.addEventListener('click', function(e) {
740
+ if (!menu || !menuBtn) return;
741
+ if (!menu.contains(e.target) && !menuBtn.contains(e.target)) {
742
  closeMenu();
 
 
 
743
  }
744
  });
 
745
 
746
+ // Accessibility: close on Escape
747
+ document.addEventListener('keydown', function(e) {
748
+ if (e.key === 'Escape') closeMenu();
 
 
749
  });
750
+ })();
751
+ </script>
752
+ """)
753
+
754
+ # Events
755
+ model_dropdown.change(
756
+ fn=update_api_info,
757
+ inputs=[model_dropdown],
758
+ outputs=[api_input]
759
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
760
 
761
+ submit_btn.click(
762
+ fn=generate_html,
763
+ inputs=[model_dropdown, api_input, text_input, url_input, file_input],
764
+ outputs=[html_file_output, status_output, gr.State()]
765
+ ).then(
766
+ fn=lambda file, status, _: (
767
+ gr.update(visible=file is not None),
768
+ status,
769
+ gr.update(visible=file is not None, value=(open(file, 'r', encoding='utf-8').read() if file else ""))
770
+ ),
771
+ inputs=[html_file_output, status_output, gr.State()],
772
+ outputs=[html_file_output, status_output, html_preview]
773
+ )
774
 
775
+ reset_btn.click(
776
+ fn=reset_form,
777
+ outputs=[model_dropdown, api_input, text_input, url_input, file_input, status_output, html_file_output, html_preview]
778
+ )
779
 
780
  if __name__ == "__main__":
781
  app.launch(