awacke1 commited on
Commit
f3c8b80
ยท
verified ยท
1 Parent(s): 47411c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +219 -90
app.py CHANGED
@@ -9,7 +9,7 @@ import streamlit as st
9
  import pandas as pd
10
  from PIL import Image
11
  from reportlab.pdfgen import canvas
12
- from reportlab.lib.pagesizes import letter, A4
13
  from reportlab.lib.utils import ImageReader
14
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image as ReportLabImage
15
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
@@ -130,12 +130,28 @@ def markdown_to_pdf_content(markdown_text, add_space_before_numbered, headings_t
130
  total_lines = len(pdf_content)
131
  return pdf_content, total_lines
132
 
133
- def create_pdf(markdown_texts, image_files, base_font_size=14, num_columns=2, add_space_before_numbered=True, headings_to_fonts=True, doc_title="Combined Document"):
134
  if not markdown_texts and not image_files:
135
  return None
136
  buffer = io.BytesIO()
137
- page_width = A4[0] * 2
138
- page_height = A4[1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  doc = SimpleDocTemplate(
140
  buffer,
141
  pagesize=(page_width, page_height),
@@ -255,8 +271,8 @@ def create_pdf(markdown_texts, image_files, base_font_size=14, num_columns=2, ad
255
  try:
256
  img = Image.open(img_path)
257
  img_width, img_height = img.size
258
- page_width, page_height = A4
259
- scale = min((page_width - 40) / img_width, (page_height - 40) / img_height)
260
  new_width = img_width * scale
261
  new_height = img_height * scale
262
  story.append(ReportLabImage(img_path, width=new_width, height=new_height))
@@ -312,36 +328,94 @@ tab1, tab2 = st.tabs(["๐Ÿ“„ PDF Composer", "๐Ÿงช Code Interpreter"])
312
 
313
  with tab1:
314
  st.header("๐Ÿ“„ PDF Composer & Voice Generator ๐Ÿš€")
315
- # Sidebar PDF text settings
316
- columns = st.sidebar.slider("Text columns", 1, 3, 2)
317
- font_family = st.sidebar.selectbox("Font", ["Helvetica", "Times-Roman", "Courier", "DejaVuSans"])
318
- font_size = st.sidebar.slider("Font size", 6, 24, 14)
319
- # Markdown input
320
- md_file = st.file_uploader("Upload Markdown (.md)", type=["md"])
321
- if md_file:
322
- md_text = md_file.getvalue().decode("utf-8")
323
- stem = Path(md_file.name).stem
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
  else:
325
- md_text = st.text_area("Or enter markdown text directly", height=200)
 
 
 
 
326
  stem = datetime.now().strftime('%Y%m%d_%H%M%S')
327
- # Convert Markdown to plain text
328
- renderer = mistune.HTMLRenderer()
329
- markdown = mistune.create_markdown(renderer=renderer)
330
- html = markdown(md_text or "")
331
- plain_text = re.sub(r'<[^>]+>', '', html)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
  # Voice settings
333
- languages = {"English (US)": "en", "English (UK)": "en-uk", "Spanish": "es"}
334
- voice_choice = st.selectbox("Voice Language", list(languages.keys()))
335
- voice_lang = languages[voice_choice]
336
- slow = st.checkbox("Slow Speech")
337
- VOICES = ["en-US-AriaNeural", "en-US-JennyNeural", "en-GB-SoniaNeural", "en-US-GuyNeural", "en-US-AnaNeural"]
338
- selected_voice = st.selectbox("Select Voice for TTS", options=VOICES, index=0)
 
 
 
 
 
 
339
  if st.button("๐Ÿ”Š Generate & Download Voice MP3 from Text"):
340
  if plain_text.strip():
341
  voice_file = f"{stem}_{selected_voice}.mp3"
342
  try:
343
- cleaned_text = clean_for_speech(plain_text)
344
- audio_file = asyncio.run(generate_audio(cleaned_text, selected_voice, voice_file))
 
 
345
  st.audio(audio_file)
346
  with open(audio_file, 'rb') as mp3:
347
  st.download_button("๐Ÿ“ฅ Download MP3", data=mp3, file_name=voice_file, mime="audio/mpeg")
@@ -349,10 +423,13 @@ with tab1:
349
  st.error(f"Error generating voice: {e}")
350
  else:
351
  st.warning("No text to generate voice from.")
 
352
  # Image uploads and ordering
 
353
  imgs = st.file_uploader("Upload Images for PDF", type=["png", "jpg", "jpeg"], accept_multiple_files=True)
354
  ordered_images = []
355
  if imgs:
 
356
  df_imgs = pd.DataFrame([{"name": f.name, "order": i} for i, f in enumerate(imgs)])
357
  edited = st.data_editor(df_imgs, use_container_width=True, num_rows="dynamic")
358
  for _, row in edited.sort_values("order").iterrows():
@@ -360,62 +437,111 @@ with tab1:
360
  if f.name == row['name']:
361
  ordered_images.append(f)
362
  break
363
- if st.button("๐Ÿ–‹๏ธ Generate PDF with Markdown & Images"):
364
- if not plain_text.strip() and not ordered_images:
365
- st.warning("Please provide some text or upload images to generate a PDF.")
 
 
 
 
366
  else:
367
- buf = io.BytesIO()
368
- c = canvas.Canvas(buf)
369
- if plain_text.strip():
370
- page_w, page_h = letter
371
- margin = 40
372
- gutter = 20
373
- col_w = (page_w - 2*margin - (columns-1)*gutter) / columns
374
- c.setFont(font_family, font_size)
375
- line_height = font_size * 1.2
376
- col = 0
377
- x = margin
378
- y = page_h - margin
379
- avg_char_width = font_size * 0.6
380
- wrap_width = int(col_w / avg_char_width) if avg_char_width > 0 else 100
381
- for paragraph in plain_text.split("\n"):
382
- if not paragraph.strip():
383
- y -= line_height
384
- if y < margin:
385
- col += 1
386
- if col >= columns:
387
- c.showPage()
388
- c.setFont(font_family, font_size)
389
- col = 0
390
- x = margin + col*(col_w+gutter)
391
- y = page_h - margin
392
- continue
393
- for line in textwrap.wrap(paragraph, wrap_width):
394
- if y < margin:
395
- col += 1
396
- if col >= columns:
397
- c.showPage()
398
- c.setFont(font_family, font_size)
399
- col = 0
400
- x = margin + col*(col_w+gutter)
401
- y = page_h - margin
402
- c.drawString(x, y, line)
403
- y -= line_height
404
- y -= line_height
405
- for img_f in ordered_images:
406
- try:
407
- img = Image.open(img_f)
408
- w, h = img.size
409
- c.showPage()
410
- c.setPageSize((w, h))
411
- c.drawImage(ImageReader(img), 0, 0, w, h, preserveAspectRatio=False)
412
- except Exception as e:
413
- st.warning(f"Could not process image {img_f.name}: {e}")
414
- continue
415
- c.save()
416
- buf.seek(0)
417
- pdf_name = f"{stem}.pdf"
418
- st.download_button("โฌ‡๏ธ Download PDF", data=buf, file_name=pdf_name, mime="application/pdf")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
419
  st.markdown("---")
420
  st.subheader("๐Ÿ“‚ Available Assets")
421
  all_assets = glob.glob("*.*")
@@ -455,24 +581,25 @@ with tab1:
455
  if not st.session_state.selected_assets:
456
  st.warning("Please select at least one asset to generate a PDF.")
457
  else:
458
- markdown_texts = []
459
  image_files = []
460
  for a in st.session_state.selected_assets:
461
  ext = a.split('.')[-1].lower()
462
  if ext == 'md':
463
  with open(a, 'r', encoding='utf-8') as f:
464
- markdown_texts.append(f.read())
465
  elif ext in ['png', 'jpg', 'jpeg']:
466
  image_files.append(a)
467
  with st.spinner("Generating PDF from selected assets..."):
468
  pdf_bytes = create_pdf(
469
- markdown_texts=markdown_texts,
470
  image_files=image_files,
471
- base_font_size=14,
472
- num_columns=2,
473
  add_space_before_numbered=True,
474
  headings_to_fonts=True,
475
- doc_title="Combined_Selected_Assets"
 
476
  )
477
  if pdf_bytes:
478
  pdf_images = pdf_to_image(pdf_bytes)
@@ -489,6 +616,7 @@ with tab1:
489
  )
490
  else:
491
  st.error("Failed to generate PDF.")
 
492
  st.markdown("---")
493
  st.subheader("๐Ÿ–ผ Image Gallery")
494
  image_files = glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg")
@@ -505,6 +633,7 @@ with tab1:
505
  st.warning(f"Could not load image {image_file}: {e}")
506
  else:
507
  st.info("No images found in the current directory.")
 
508
  st.markdown("---")
509
  st.subheader("๐ŸŽฅ Video Gallery")
510
  video_files = glob.glob("*.mp4")
 
9
  import pandas as pd
10
  from PIL import Image
11
  from reportlab.pdfgen import canvas
12
+ from reportlab.lib.pagesizes import letter, A4, legal, A3, A5, LETTER, LEGAL
13
  from reportlab.lib.utils import ImageReader
14
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image as ReportLabImage
15
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
 
130
  total_lines = len(pdf_content)
131
  return pdf_content, total_lines
132
 
133
+ def create_pdf(markdown_texts, image_files, base_font_size=14, num_columns=2, add_space_before_numbered=True, headings_to_fonts=True, doc_title="Combined Document", page_size=A4):
134
  if not markdown_texts and not image_files:
135
  return None
136
  buffer = io.BytesIO()
137
+
138
+ # Use the selected page size
139
+ if page_size == "A4":
140
+ page_dimensions = A4
141
+ elif page_size == "Letter":
142
+ page_dimensions = letter
143
+ elif page_size == "Legal":
144
+ page_dimensions = legal
145
+ elif page_size == "A3":
146
+ page_dimensions = A3
147
+ elif page_size == "A5":
148
+ page_dimensions = A5
149
+ else:
150
+ page_dimensions = A4 # Default fallback
151
+
152
+ page_width = page_dimensions[0] * 2
153
+ page_height = page_dimensions[1]
154
+
155
  doc = SimpleDocTemplate(
156
  buffer,
157
  pagesize=(page_width, page_height),
 
271
  try:
272
  img = Image.open(img_path)
273
  img_width, img_height = img.size
274
+ page_width_img, page_height_img = page_dimensions
275
+ scale = min((page_width_img - 40) / img_width, (page_height_img - 40) / img_height)
276
  new_width = img_width * scale
277
  new_height = img_height * scale
278
  story.append(ReportLabImage(img_path, width=new_width, height=new_height))
 
328
 
329
  with tab1:
330
  st.header("๐Ÿ“„ PDF Composer & Voice Generator ๐Ÿš€")
331
+
332
+ # Sidebar PDF settings
333
+ with st.sidebar:
334
+ st.subheader("๐Ÿ“„ PDF Settings")
335
+ columns = st.slider("Text columns", 1, 3, 2)
336
+ font_family = st.selectbox("Font", ["Helvetica", "Times-Roman", "Courier", "DejaVuSans"])
337
+ font_size = st.slider("Font size", 6, 24, 14)
338
+
339
+ # Page size selection
340
+ page_size_options = {
341
+ "A4 (210 ร— 297 mm)": "A4",
342
+ "Letter (8.5 ร— 11 in)": "Letter",
343
+ "Legal (8.5 ร— 14 in)": "Legal",
344
+ "A3 (297 ร— 420 mm)": "A3",
345
+ "A5 (148 ร— 210 mm)": "A5"
346
+ }
347
+ selected_page_size = st.selectbox(
348
+ "๐Ÿ“ Page Size",
349
+ options=list(page_size_options.keys()),
350
+ index=0 # Default to A4
351
+ )
352
+ page_size = page_size_options[selected_page_size]
353
+
354
+ # Multiple markdown file upload
355
+ md_files = st.file_uploader("Upload Markdown Files (.md)", type=["md"], accept_multiple_files=True)
356
+ markdown_texts = []
357
+ combined_text = ""
358
+
359
+ if md_files:
360
+ st.subheader(f"๐Ÿ“‚ Uploaded Files ({len(md_files)})")
361
+ for i, md_file in enumerate(md_files):
362
+ md_text = md_file.getvalue().decode("utf-8")
363
+ markdown_texts.append(md_text)
364
+ combined_text += md_text + "\n\n"
365
+
366
+ with st.expander(f"๐Ÿ“„ {md_file.name}"):
367
+ st.markdown(md_text[:500] + "..." if len(md_text) > 500 else md_text)
368
+
369
+ stem = f"combined_{len(md_files)}_files_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
370
  else:
371
+ # Single text area for manual input
372
+ manual_text = st.text_area("Or enter markdown text directly", height=200)
373
+ if manual_text:
374
+ markdown_texts = [manual_text]
375
+ combined_text = manual_text
376
  stem = datetime.now().strftime('%Y%m%d_%H%M%S')
377
+
378
+ # Convert Markdown to plain text for voice generation
379
+ if combined_text:
380
+ renderer = mistune.HTMLRenderer()
381
+ markdown = mistune.create_markdown(renderer=renderer)
382
+ html = markdown(combined_text)
383
+ plain_text = re.sub(r'<[^>]+>', '', html)
384
+
385
+ st.subheader("๐Ÿ“Š Content Summary")
386
+ col1, col2, col3, col4 = st.columns(4)
387
+ with col1:
388
+ st.metric("Files", len(md_files) if md_files else 1)
389
+ with col2:
390
+ st.metric("Total Characters", len(combined_text))
391
+ with col3:
392
+ st.metric("Estimated Words", len(combined_text.split()))
393
+ with col4:
394
+ st.metric("Page Size", selected_page_size.split(" (")[0])
395
+ else:
396
+ plain_text = ""
397
+
398
  # Voice settings
399
+ st.subheader("๐Ÿ”Š Text-to-Speech Settings")
400
+ col1, col2 = st.columns(2)
401
+ with col1:
402
+ languages = {"English (US)": "en", "English (UK)": "en-uk", "Spanish": "es"}
403
+ voice_choice = st.selectbox("Voice Language", list(languages.keys()))
404
+ voice_lang = languages[voice_choice]
405
+ slow = st.checkbox("Slow Speech")
406
+
407
+ with col2:
408
+ VOICES = ["en-US-AriaNeural", "en-US-JennyNeural", "en-GB-SoniaNeural", "en-US-GuyNeural", "en-US-AnaNeural"]
409
+ selected_voice = st.selectbox("Select Voice for TTS", options=VOICES, index=0)
410
+
411
  if st.button("๐Ÿ”Š Generate & Download Voice MP3 from Text"):
412
  if plain_text.strip():
413
  voice_file = f"{stem}_{selected_voice}.mp3"
414
  try:
415
+ with st.spinner("Generating audio..."):
416
+ cleaned_text = clean_for_speech(plain_text)
417
+ audio_file = asyncio.run(generate_audio(cleaned_text, selected_voice, voice_file))
418
+ st.success("Audio generated successfully!")
419
  st.audio(audio_file)
420
  with open(audio_file, 'rb') as mp3:
421
  st.download_button("๐Ÿ“ฅ Download MP3", data=mp3, file_name=voice_file, mime="audio/mpeg")
 
423
  st.error(f"Error generating voice: {e}")
424
  else:
425
  st.warning("No text to generate voice from.")
426
+
427
  # Image uploads and ordering
428
+ st.subheader("๐Ÿ–ผ๏ธ Image Management")
429
  imgs = st.file_uploader("Upload Images for PDF", type=["png", "jpg", "jpeg"], accept_multiple_files=True)
430
  ordered_images = []
431
  if imgs:
432
+ st.write(f"๐Ÿ“Š Uploaded {len(imgs)} images")
433
  df_imgs = pd.DataFrame([{"name": f.name, "order": i} for i, f in enumerate(imgs)])
434
  edited = st.data_editor(df_imgs, use_container_width=True, num_rows="dynamic")
435
  for _, row in edited.sort_values("order").iterrows():
 
437
  if f.name == row['name']:
438
  ordered_images.append(f)
439
  break
440
+
441
+ # PDF Generation
442
+ st.subheader("๐Ÿ“„ PDF Generation")
443
+
444
+ if st.button("๐Ÿ–‹๏ธ Generate PDF with Markdown & Images", type="primary"):
445
+ if not markdown_texts and not ordered_images:
446
+ st.warning("Please provide some markdown text or upload images to generate a PDF.")
447
  else:
448
+ with st.spinner(f"Generating PDF with {page_size} page size..."):
449
+ if markdown_texts and not ordered_images:
450
+ # Use the enhanced create_pdf function
451
+ pdf_bytes = create_pdf(
452
+ markdown_texts=markdown_texts,
453
+ image_files=[],
454
+ base_font_size=font_size,
455
+ num_columns=columns,
456
+ add_space_before_numbered=True,
457
+ headings_to_fonts=True,
458
+ doc_title=f"Markdown_Document_{len(markdown_texts)}_files",
459
+ page_size=page_size
460
+ )
461
+
462
+ if pdf_bytes:
463
+ pdf_images = pdf_to_image(pdf_bytes)
464
+ if pdf_images:
465
+ st.subheader("Preview of Generated PDF")
466
+ for i, img in enumerate(pdf_images):
467
+ st.image(img, caption=f"Page {i+1}", use_container_width=True)
468
+
469
+ pdf_name = f"{stem}.pdf"
470
+ st.download_button("โฌ‡๏ธ Download PDF", data=pdf_bytes, file_name=pdf_name, mime="application/pdf")
471
+ else:
472
+ st.error("Failed to generate PDF from markdown.")
473
+ else:
474
+ # Fallback to original simple PDF generation for mixed content
475
+ buf = io.BytesIO()
476
+
477
+ # Get page dimensions for the selected page size
478
+ if page_size == "A4":
479
+ page_dimensions = A4
480
+ elif page_size == "Letter":
481
+ page_dimensions = letter
482
+ elif page_size == "Legal":
483
+ page_dimensions = legal
484
+ elif page_size == "A3":
485
+ page_dimensions = A3
486
+ elif page_size == "A5":
487
+ page_dimensions = A5
488
+ else:
489
+ page_dimensions = A4
490
+
491
+ c = canvas.Canvas(buf, pagesize=page_dimensions)
492
+
493
+ if plain_text.strip():
494
+ page_w, page_h = page_dimensions
495
+ margin = 40
496
+ gutter = 20
497
+ col_w = (page_w - 2*margin - (columns-1)*gutter) / columns
498
+ c.setFont(font_family, font_size)
499
+ line_height = font_size * 1.2
500
+ col = 0
501
+ x = margin
502
+ y = page_h - margin
503
+ avg_char_width = font_size * 0.6
504
+ wrap_width = int(col_w / avg_char_width) if avg_char_width > 0 else 100
505
+ for paragraph in plain_text.split("\n"):
506
+ if not paragraph.strip():
507
+ y -= line_height
508
+ if y < margin:
509
+ col += 1
510
+ if col >= columns:
511
+ c.showPage()
512
+ c.setFont(font_family, font_size)
513
+ col = 0
514
+ x = margin + col*(col_w+gutter)
515
+ y = page_h - margin
516
+ continue
517
+ for line in textwrap.wrap(paragraph, wrap_width):
518
+ if y < margin:
519
+ col += 1
520
+ if col >= columns:
521
+ c.showPage()
522
+ c.setFont(font_family, font_size)
523
+ col = 0
524
+ x = margin + col*(col_w+gutter)
525
+ y = page_h - margin
526
+ c.drawString(x, y, line)
527
+ y -= line_height
528
+ y -= line_height
529
+ for img_f in ordered_images:
530
+ try:
531
+ img = Image.open(img_f)
532
+ w, h = img.size
533
+ c.showPage()
534
+ c.setPageSize((w, h))
535
+ c.drawImage(ImageReader(img), 0, 0, w, h, preserveAspectRatio=False)
536
+ except Exception as e:
537
+ st.warning(f"Could not process image {img_f.name}: {e}")
538
+ continue
539
+ c.save()
540
+ buf.seek(0)
541
+ pdf_name = f"{stem}.pdf"
542
+ st.success(f"PDF generated successfully with {page_size} page size!")
543
+ st.download_button("โฌ‡๏ธ Download PDF", data=buf, file_name=pdf_name, mime="application/pdf")
544
+
545
  st.markdown("---")
546
  st.subheader("๐Ÿ“‚ Available Assets")
547
  all_assets = glob.glob("*.*")
 
581
  if not st.session_state.selected_assets:
582
  st.warning("Please select at least one asset to generate a PDF.")
583
  else:
584
+ selected_markdown_texts = []
585
  image_files = []
586
  for a in st.session_state.selected_assets:
587
  ext = a.split('.')[-1].lower()
588
  if ext == 'md':
589
  with open(a, 'r', encoding='utf-8') as f:
590
+ selected_markdown_texts.append(f.read())
591
  elif ext in ['png', 'jpg', 'jpeg']:
592
  image_files.append(a)
593
  with st.spinner("Generating PDF from selected assets..."):
594
  pdf_bytes = create_pdf(
595
+ markdown_texts=selected_markdown_texts,
596
  image_files=image_files,
597
+ base_font_size=font_size,
598
+ num_columns=columns,
599
  add_space_before_numbered=True,
600
  headings_to_fonts=True,
601
+ doc_title="Combined_Selected_Assets",
602
+ page_size=page_size
603
  )
604
  if pdf_bytes:
605
  pdf_images = pdf_to_image(pdf_bytes)
 
616
  )
617
  else:
618
  st.error("Failed to generate PDF.")
619
+
620
  st.markdown("---")
621
  st.subheader("๐Ÿ–ผ Image Gallery")
622
  image_files = glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg")
 
633
  st.warning(f"Could not load image {image_file}: {e}")
634
  else:
635
  st.info("No images found in the current directory.")
636
+
637
  st.markdown("---")
638
  st.subheader("๐ŸŽฅ Video Gallery")
639
  video_files = glob.glob("*.mp4")