Spaces:

tsphan
/

pdf_to_single_image

Sleeping

App Files Files Community

tsphan commited on Apr 22

Commit

7d83622

1 Parent(s): ca574af

adds requirements and app

Browse files

Files changed (2) hide show

app.py +165 -2
requirements.txt +3 -0

app.py CHANGED Viewed

@@ -1,4 +1,167 @@
 import streamlit as st
-x = st.slider('Select a value')
-st.write(x, 'squared is', x * x)

 import streamlit as st
+import fitz  # PyMuPDF
+import numpy as np
+from PIL import Image
+import io
+import tempfile
+import os
+import time
+st.set_page_config(
+    page_title="PDF to Single Image Converter",
+    page_icon="📄",
+    layout="centered"
+)
+st.title("📄 PDF to Single Image Converter")
+st.write("Upload a PDF and convert it into a single image containing all pages.")
+def pdf_to_single_image(pdf_path, output_format="PNG", dpi=300):
+    """Convert all pages of a PDF to a single image file"""
+    # Open the PDF
+    pdf_document = fitz.open(pdf_path)
+    num_pages = len(pdf_document)
+    # Calculate total height and get width
+    total_height = 0
+    width = 0
+    # First pass to calculate dimensions
+    zooms = []
+    for page_num in range(num_pages):
+        page = pdf_document[page_num]
+        zoom = dpi / 72  # 72 is the default DPI for PDFs
+        zooms.append(zoom)
+        rect = page.rect
+        width = max(width, int(rect.width * zoom))
+        total_height += int(rect.height * zoom)
+    # Create a new image with the calculated dimensions
+    result_image = Image.new("RGB", (width, total_height), (255, 255, 255))
+    # Second pass to render pages
+    current_height = 0
+    progress_bar = st.progress(0)
+    status_text = st.empty()
+    for page_num in range(num_pages):
+        status_text.text(f"Processing page {page_num + 1}/{num_pages}")
+        page = pdf_document[page_num]
+        zoom = zooms[page_num]
+        # Get the page as a pixmap
+        pix = page.get_pixmap(matrix=fitz.Matrix(zoom, zoom))
+        # Convert pixmap to PIL Image
+        page_image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+        # Paste this page into the result image
+        result_image.paste(page_image, (0, current_height))
+        current_height += pix.height
+        # Update progress
+        progress_bar.progress((page_num + 1) / num_pages)
+    # Create a byte buffer for the image
+    buf = io.BytesIO()
+    if output_format.upper() == "PNG":
+        result_image.save(buf, format="PNG")
+    else:
+        result_image.save(buf, format="JPEG", quality=95)
+    buf.seek(0)
+    pdf_document.close()
+    status_text.text("Processing complete!")
+    return buf
+# UI Components
+with st.sidebar:
+    st.header("Settings")
+    dpi = st.slider("Resolution (DPI)", min_value=72, max_value=600, value=300, step=1,
+                   help="Higher DPI means better quality but larger file size")
+    output_format = st.radio("Output Format", ["PNG", "JPG"],
+                            help="PNG provides better quality but larger file size")
+    st.write("---")
+    st.write("### About")
+    st.write("This app converts multi-page PDFs into a single image file.")
+    st.write("Made with ❤️ using Streamlit and PyMuPDF")
+# File uploader
+uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
+if uploaded_file is not None:
+    # Display file info
+    file_details = {
+        "Filename": uploaded_file.name,
+        "File size": f"{uploaded_file.size / 1024:.2f} KB"
+    }
+    st.write("### File Details")
+    for k, v in file_details.items():
+        st.write(f"**{k}:** {v}")
+    # Save uploaded file to temp file
+    with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
+        tmp_file.write(uploaded_file.getvalue())
+        pdf_path = tmp_file.name
+    # Process on button click
+    if st.button("Convert to Image"):
+        try:
+            with st.spinner("Converting PDF to image..."):
+                start_time = time.time()
+                # Process the PDF
+                img_buffer = pdf_to_single_image(pdf_path, output_format, dpi)
+                # Calculate processing time
+                processing_time = time.time() - start_time
+                st.success(f"Conversion completed in {processing_time:.2f} seconds!")
+                # Get file extension
+                ext = "png" if output_format == "PNG" else "jpg"
+                # Create download button
+                output_filename = f"{os.path.splitext(uploaded_file.name)[0]}.{ext}"
+                st.download_button(
+                    label=f"Download {output_format} Image",
+                    data=img_buffer,
+                    file_name=output_filename,
+                    mime=f"image/{ext.lower()}"
+                )
+                # Preview (with warning for large files)
+                img = Image.open(img_buffer)
+                width, height = img.size
+                aspect_ratio = width / height
+                st.write("### Image Preview")
+                if height > 10000:
+                    st.warning("This is a very tall image. Preview is scaled down.")
+                    st.image(img, caption=f"Output Image ({width}x{height} pixels)", width=min(width, 800))
+                else:
+                    st.image(img, caption=f"Output Image ({width}x{height} pixels)")
+                st.write(f"**Image dimensions:** {width}x{height} pixels")
+        except Exception as e:
+            st.error(f"An error occurred: {e}")
+        finally:
+            # Clean up temp file
+            if os.path.exists(pdf_path):
+                os.unlink(pdf_path)
+else:
+    st.info("👆 Please upload a PDF file to get started.")
+    # Example image
+    st.write("### Example Output")
+    st.image("https://via.placeholder.com/800x600?text=PDF+to+Single+Image+Example",
+             caption="Example of converted PDF")
+# Add requirements info at the bottom
+st.write("---")
+with st.expander("Installation Requirements"):
+    st.code("""
+    pip install streamlit PyMuPDF Pillow
+    """)
+    st.write("Run the app with: `streamlit run app.py`")

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+streamlit
+PyMuPDF
+numpy