Spaces:

SlouchyBuffalo
/

pages-converter-pro

Running on Zero

App Files Files Community

SlouchyBuffalo commited on May 12

Commit

8c7667b

verified ·

1 Parent(s): 495a855

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -382

app.py DELETED Viewed

@@ -1,382 +0,0 @@
-# app.py - Complete PWA Pages Converter
-import gradio as gr
-import os
-import spaces
-import tempfile
-import zipfile
-import json
-from pathlib import Path
-from huggingface_hub import InferenceClient
-import time
-# Debug token
-token = os.getenv("HF_TOKEN")
-print(f"Debug: Token exists = {token is not None}")
-print(f"Debug: Token length = {len(token) if token else 0}")
-# Initialize the client with Cerebras
-client = InferenceClient(
-    "meta-llama/Llama-3.3-70B-Instruct",
-    provider="cerebras",
-    token=token
-)
-@spaces.GPU
-def extract_pages_content(file_path):
-    """Extract content from Apple Pages file using ZeroGPU"""
-    print(f"DEBUG: Processing file: {file_path}")
-    print(f"DEBUG: File exists: {os.path.exists(file_path)}")
-    try:
-        content_parts = []
-        with zipfile.ZipFile(file_path, 'r') as zip_ref:
-            with tempfile.TemporaryDirectory() as temp_dir:
-                zip_ref.extractall(temp_dir)
-                temp_path = Path(temp_dir)
-                print(f"DEBUG: Extracted files: {list(temp_path.iterdir())}")
-                print(f"DEBUG: Index folder contents: {list((temp_path / 'Index').iterdir()) if (temp_path / 'Index').is_dir() else 'No Index folder'}")
-                # Strategy 1: Look for iwa files in Index folder
-                index_path = temp_path / "Index"
-                if index_path.exists():
-                    for iwa_file in index_path.glob("*.iwa"):
-                        try:
-                            # iwa files are protobuf archives, try reading as binary
-                            with open(iwa_file, 'rb') as f:
-                                binary_content = f.read()
-                                # Try to find text content in the binary
-                                text_content = binary_content.decode('utf-8', errors='ignore')
-                                # Extract readable text (basic approach)
-                                import re
-                                readable_text = re.findall(r'[\x20-\x7E]+', text_content)
-                                content_parts.extend([t.strip() for t in readable_text if len(t.strip()) > 5])
-                        except:
-                            continue
-        if content_parts:
-            # Clean and deduplicate
-            unique_content = list(dict.fromkeys(content_parts))
-            return "\n\n".join(unique_content)
-        else:
-            return "Could not extract readable content from .pages file"
-    except Exception as e:
-        return f"Error extracting content: {str(e)}"
-@spaces.GPU
-def convert_pages_document(file, output_format, progress=gr.Progress()):
-    """Convert Pages document using Cerebras with ZeroGPU acceleration"""
-    if not file:
-        return None, "❌ Please upload a .pages file"
-    try:
-        progress(0.1, desc="📖 Extracting content from .pages file...")
-        # Extract content
-        content = extract_pages_content(file.name)
-        if not content or len(content.strip()) < 10:
-            return None, "❌ Could not extract sufficient content from .pages file"
-        progress(0.4, desc="🤖 Preparing conversion with Cerebras...")
-        # Create format-specific prompt
-        prompt = create_conversion_prompt(content, output_format)
-        progress(0.6, desc="⚡ Converting with Cerebras Lightning Speed...")
-        # Convert using Cerebras
-        try:
-            # Use chat completion instead
-            messages = [{"role": "user", "content": prompt}]
-            response = client.chat_completion(
-                messages=messages,
-                max_tokens=4096,
-                temperature=0.1
-            )
-            # Extract the response text
-            response = response.choices[0].message.content
-        except Exception as e:
-            return None, f"❌ Conversion error: {str(e)}"
-        progress(0.9, desc="💫 Creating output file...")
-        # Create output file
-        output_path = create_output_file(response, output_format)
-        progress(1.0, desc="✅ Conversion complete!")
-        return output_path, f"✅ Successfully converted to {output_format} using ZeroGPU!"
-    except Exception as e:
-        return None, f"❌ Error: {str(e)}"
-def create_conversion_prompt(content, output_format):
-    """Create optimized prompt for Cerebras model"""
-    format_instructions = {
-        "PDF": "Create content suitable for PDF format with proper structure and formatting",
-        "DOCX": "Format as Microsoft Word document with headers, paragraphs, and proper styling",
-        "TXT": "Convert to clean, readable plain text preserving structure",
-        "HTML": "Create well-structured HTML with semantic markup",
-        "Markdown": "Convert to properly formatted Markdown with headers and structure"
-    }
-    return f"""You are an expert document converter. Convert the following Apple Pages document content to {output_format} format.
-INSTRUCTIONS:
-1. Preserve the original structure, formatting, and content organization
-2. Maintain headings, paragraphs, lists, and any tables if present
-3. Ensure the output is clean, professional, and well-formatted
-4. {format_instructions.get(output_format, "Format appropriately for the requested output type")}
-5. Return ONLY the converted content without explanations or meta-commentary
-ORIGINAL CONTENT:
-{content}
-CONVERTED {output_format.upper()} OUTPUT:"""
-def create_output_file(content, output_format):
-    """Create output file in specified format"""
-    # Clean the content (remove potential prompt artifacts)
-    content = content.strip()
-    # Create temporary file with appropriate extension
-    extensions = {
-        "PDF": ".pdf",
-        "DOCX": ".docx",
-        "TXT": ".txt",
-        "HTML": ".html",
-        "Markdown": ".md"
-    }
-    if output_format == "PDF":
-        # Create a temporary file with .pdf extension
-        with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f:
-            from reportlab.pdfgen import canvas
-            from reportlab.lib.pagesizes import letter
-            import textwrap
-            # Create PDF
-            pdf = canvas.Canvas(f.name, pagesize=letter)
-            width, height = letter
-            y_position = height - 50
-            # Split content into lines and wrap long lines
-            lines = []
-            for paragraph in content.split('\n'):
-                if paragraph.strip():
-                    # Wrap long lines at 80 characters
-                    wrapped_lines = textwrap.wrap(paragraph, width=80)
-                    lines.extend(wrapped_lines if wrapped_lines else [''])
-                else:
-                    lines.append('')  # Preserve empty lines
-            for line in lines:
-                if y_position < 50:  # Start new page
-                    pdf.showPage()
-                    y_position = height - 50
-                pdf.drawString(50, y_position, line)
-                y_position -= 20
-            pdf.save()
-            return f.name
-    elif output_format == "DOCX":
-        # Create a temporary file with .docx extension
-        with tempfile.NamedTemporaryFile(suffix='.docx', delete=False) as f:
-            from docx import Document
-            doc = Document()
-            paragraphs = content.split('\n\n')
-            for para in paragraphs:
-                if para.strip():
-                    doc.add_paragraph(para.strip())
-            doc.save(f.name)
-            return f.name
-    else:
-        # For TXT, HTML, Markdown
-        ext = extensions.get(output_format, ".txt")
-        with tempfile.NamedTemporaryFile(mode='w', suffix=ext, delete=False, encoding='utf-8') as f:
-            f.write(content)
-            return f.name
-# Custom CSS for professional appearance
-css = """
-.gradio-container {
-    background: linear-gradient(135deg, #1e3c72 0%, #2a5298 100%);
-    min-height: 100vh;
-}
-.main-content {
-    max-width: 1000px;
-    margin: 0 auto;
-    padding: 2rem;
-}
-.hero-section {
-    background: white;
-    border-radius: 1rem;
-    padding: 2rem;
-    text-align: center;
-    box-shadow: 0 10px 30px rgba(0,0,0,0.1);
-    margin-bottom: 2rem;
-}
-.upload-section {
-    background: white;
-    border-radius: 1rem;
-    padding: 2rem;
-    box-shadow: 0 5px 15px rgba(0,0,0,0.1);
-}
-.format-selector {
-    background: #f8f9fa;
-    border-radius: 0.5rem;
-    padding: 1rem;
-    margin: 1rem 0;
-}
-.convert-button {
-    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-    color: white;
-    border: none;
-    padding: 1rem 2rem;
-    border-radius: 0.5rem;
-    font-size: 1.1rem;
-    font-weight: bold;
-    width: 100%;
-    cursor: pointer;
-    transition: all 0.3s ease;
-}
-.convert-button:hover {
-    transform: translateY(-2px);
-    box-shadow: 0 5px 15px rgba(102, 126, 234, 0.3);
-}
-.zerogpu-badge {
-    display: inline-block;
-    background: linear-gradient(45deg, #ff6b6b, #feca57);
-    color: white;
-    padding: 0.5rem 1rem;
-    border-radius: 2rem;
-    font-weight: bold;
-    font-size: 0.9rem;
-}
-.pro-features {
-    background: #e8f5e9;
-    border-radius: 0.5rem;
-    padding: 1rem;
-    margin-top: 1rem;
-}
-"""
-# Create the Gradio interface
-with gr.Blocks(css=css, title="Pages Converter Pro - ZeroGPU", theme=gr.themes.Soft()) as app:
-    with gr.Column(elem_classes=["main-content"]):
-        # Hero section
-        gr.HTML("""
-        <div class="hero-section">
-            <h1>📄 Pages Converter Pro</h1>
-            <span class="zerogpu-badge">⚡ ZeroGPU Accelerated</span>
-            <p style="margin-top: 1rem; color: #666;">
-                Convert Apple Pages documents with lightning-fast Cerebras Llama-3.3-70B
-            </p>
-        </div>
-        """)
-        # Pro benefits showcase
-        gr.HTML("""
-        <div class="pro-features">
-            <h3>🚀 HuggingFace Pro Benefits Active</h3>
-            <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1rem; margin-top: 1rem;">
-                <div>✅ 5x Usage Quota</div>
-                <div>🔥 Priority Queue Access</div>
-                <div>💎 H200 GPU Hardware</div>
-                <div>⚡ Zero-GPU Acceleration</div>
-            </div>
-        </div>
-        """)
-        # Main conversion interface
-        with gr.Row():
-            with gr.Column(scale=2, elem_classes=["upload-section"]):
-                gr.HTML("<h3>📎 Upload Your Document</h3>")
-                file_input = gr.File(
-                    label="Select .pages file",
-                    file_types=[".pages"],
-                    elem_id="file-upload"
-                )
-                output_format = gr.Radio(
-                    choices=["PDF", "DOCX", "TXT", "HTML", "Markdown"],
-                    value="PDF",
-                    label="🎯 Output Format",
-                    elem_classes=["format-selector"]
-                )
-                convert_btn = gr.Button(
-                    "⚡ Convert with ZeroGPU",
-                    variant="primary",
-                    elem_classes=["convert-button"]
-                )
-            with gr.Column(scale=1):
-                gr.HTML("""
-                <div style="background: white; padding: 1.5rem; border-radius: 1rem; box-shadow: 0 5px 15px rgba(0,0,0,0.1);">
-                    <h3>⚡ ZeroGPU Features</h3>
-                    <ul style="color: #666;">
-                        <li>Lightning-fast processing</li>
-                        <li>H200 hardware acceleration</li>
-                        <li>Priority queue access</li>
-                        <li>Cerebras optimization</li>
-                    </ul>
-                    <h3>📋 Supported Formats</h3>
-                    <ul style="color: #666;">
-                        <li>📄 PDF (best quality)</li>
-                        <li>📝 Microsoft Word (DOCX)</li>
-                        <li>📋 Plain Text (TXT)</li>
-                        <li>🌐 Web Page (HTML)</li>
-                        <li>✏️ Markdown (MD)</li>
-                    </ul>
-                </div>
-                """)
-        # Output section
-        with gr.Row():
-            output_file = gr.File(
-                label="📁 Download Your Converted File",
-                elem_id="output-download"
-            )
-        with gr.Row():
-            status_html = gr.HTML(
-                value="<div style='text-align: center; padding: 1rem; color: #666;'>Ready to convert your Pages document</div>",
-                elem_id="status-display"
-            )
-        # Connect the interface
-        convert_btn.click(
-            fn=convert_pages_document,
-            inputs=[file_input, output_format],
-            outputs=[output_file, status_html],
-            show_progress=True
-        )
-        # Footer
-        gr.HTML("""
-        <div style="text-align: center; margin-top: 3rem; padding: 2rem; color: white;">
-            <p>💎 Built exclusively for HuggingFace Pro users</p>
-            <p><small>Powered by Cerebras • Accelerated by ZeroGPU • Made with ❤️</small></p>
-        </div>
-        """)
-# Launch the app
-if __name__ == "__main__":
-    app.launch()