# app.py - Corrected CloudConvert API Integration import gradio as gr import os import spaces import tempfile import requests import time from huggingface_hub import InferenceClient from pathlib import Path # Debug tokens hf_token = os.getenv("HF_TOKEN") cloudconvert_token = os.getenv("CLOUDCONVERT_API_KEY").strip() if os.getenv("CLOUDCONVERT_API_KEY") else None print(f"Debug: HF Token exists = {hf_token is not None}") print(f"Debug: CloudConvert Token exists = {cloudconvert_token is not None}") # Initialize the client with Cerebras client = InferenceClient( "meta-llama/Llama-3.3-70B-Instruct", provider="cerebras", token=hf_token ) def convert_pages_to_text(file_path, api_key): """Convert .pages file to text using CloudConvert API - Correct Format""" base_url = "https://api.cloudconvert.com/v2" headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json" } try: # Step 1: Create a job with correct task structure job_data = { "tasks": { "import-file": { "operation": "import/upload" }, "convert-file": { "operation": "convert", "input": "import-file", "input_format": "pages", "output_format": "txt" }, "export-file": { "operation": "export/url", "input": "convert-file" } } } print("Creating CloudConvert job...") response = requests.post(f"{base_url}/jobs", headers=headers, json=job_data) print(f"Job creation response: {response.status_code}") if not response.ok: print(f"Job creation failed: {response.text}") response.raise_for_status() job = response.json() print(f"Job created successfully: {job['data']['id']}") # Step 2: Upload the file upload_task = None for task in job["data"]["tasks"]: if task["operation"] == "import/upload": upload_task = task break if not upload_task: raise Exception("Upload task not found in job") upload_url = upload_task["result"]["form"]["url"] form_data = upload_task["result"]["form"]["parameters"] print("Uploading file to CloudConvert...") with open(file_path, 'rb') as f: files = {"file": f} upload_response = requests.post(upload_url, data=form_data, files=files) if not upload_response.ok: print(f"Upload failed: {upload_response.text}") upload_response.raise_for_status() print("File uploaded successfully") # Step 3: Wait for conversion to complete job_id = job["data"]["id"] print(f"Waiting for job {job_id} to complete...") max_attempts = 30 # Wait up to 1 minute for attempt in range(max_attempts): status_response = requests.get(f"{base_url}/jobs/{job_id}", headers=headers) status_response.raise_for_status() job_status = status_response.json() print(f"Job status: {job_status['data']['status']}") if job_status["data"]["status"] == "finished": print("Conversion completed successfully") break elif job_status["data"]["status"] == "error": error_msg = job_status['data'].get('message', 'Unknown error') print(f"Conversion failed: {error_msg}") # Check task-level errors for task in job_status.get('data', {}).get('tasks', []): if task.get('status') == 'error': task_error = task.get('message', 'Unknown task error') print(f"Task {task.get('operation')} error: {task_error}") raise Exception(f"Conversion failed: {error_msg}") time.sleep(2) # Wait 2 seconds before checking again else: raise Exception("Conversion timeout - job took too long") # Step 4: Download the converted text for task in job_status["data"]["tasks"]: if task["operation"] == "export/url" and task["status"] == "finished": download_url = task["result"]["files"][0]["url"] print(f"Downloading result from: {download_url}") download_response = requests.get(download_url) download_response.raise_for_status() text_content = download_response.text print(f"Downloaded {len(text_content)} characters") return text_content raise Exception("No converted file found in completed job") except requests.exceptions.RequestException as e: print(f"HTTP error: {e}") raise Exception(f"CloudConvert HTTP error: {str(e)}") except Exception as e: print(f"General error: {e}") raise Exception(f"CloudConvert error: {str(e)}") @spaces.GPU def convert_pages_document(file, output_format, progress=gr.Progress()): """Convert Pages document using CloudConvert + Novita""" if not file: return None, "❌ Please upload a .pages file" if not cloudconvert_token: return None, "❌ CloudConvert API key not configured. Please add CLOUDCONVERT_API_KEY to secrets." try: progress(0.1, desc="📤 Converting with CloudConvert...") # Use CloudConvert to extract text from .pages file print(f"Converting file: {file.name}") text_content = convert_pages_to_text(file.name, cloudconvert_token) if not text_content or len(text_content.strip()) < 10: return None, "❌ Could not extract content from .pages file" print(f"Extracted text preview: {text_content[:200]}...") progress(0.5, desc="🤖 Converting format with Cerebras AI...") # Create format-specific prompt prompt = create_conversion_prompt(text_content, output_format) progress(0.7, desc="⚡ Processing with ZeroGPU...") # Convert using Cerebras try: messages = [{"role": "user", "content": prompt}] response = client.chat_completion( messages=messages, max_tokens=4096, temperature=0.1 ) converted_text = response.choices[0].message.content except Exception as e: print(f"Cerebras error: {e}") return None, f"❌ AI conversion error: {str(e)}" progress(0.9, desc="💾 Creating output file...") # Create output file output_path = create_output_file(converted_text, output_format) progress(1.0, desc="✅ Conversion complete!") return output_path, f"✅ Successfully converted to {output_format}!" except Exception as e: print(f"Conversion error: {e}") return None, f"❌ Error: {str(e)}" def create_conversion_prompt(content, output_format): """Create optimized prompt for format conversion""" return f"""You are a document formatter. Convert the following text to {output_format} format. IMPORTANT: 1. Keep ALL original content - do not summarize or remove text 2. Only adjust formatting for {output_format} 3. Preserve all important information, names, and details Original text: {content} Formatted {output_format} output:""" def create_output_file(content, output_format): """Create output file in specified format""" content = content.strip() if output_format == "PDF": from reportlab.pdfgen import canvas from reportlab.lib.pagesizes import letter import textwrap with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f: pdf = canvas.Canvas(f.name, pagesize=letter) width, height = letter y = height - 50 # Better paragraph handling paragraphs = content.split('\n\n') for paragraph in paragraphs: if paragraph.strip(): lines = textwrap.wrap(paragraph.strip(), width=90) for line in lines: if y < 50: pdf.showPage() y = height - 50 pdf.drawString(50, y, line) y -= 20 y -= 10 # Space between paragraphs pdf.save() return f.name elif output_format == "DOCX": from docx import Document with tempfile.NamedTemporaryFile(suffix='.docx', delete=False) as f: doc = Document() # Add paragraphs paragraphs = content.split('\n\n') for paragraph in paragraphs: if paragraph.strip(): doc.add_paragraph(paragraph.strip()) doc.save(f.name) return f.name else: # For TXT, HTML, Markdown ext_map = {"TXT": ".txt", "HTML": ".html", "Markdown": ".md"} ext = ext_map.get(output_format, ".txt") with tempfile.NamedTemporaryFile(mode='w', suffix=ext, delete=False, encoding='utf-8') as f: f.write(content) return f.name # Create the Gradio interface with gr.Blocks(title="Pages Converter Pro - CloudConvert", theme=gr.themes.Soft()) as app: # Header gr.HTML("""

📄 Pages Converter Pro

Convert Apple Pages documents using CloudConvert + Cerebras AI

✨ Professional .pages parsing + AI-powered format conversion

""") # Status indicator with gr.Row(): gr.HTML(f"""
CloudConvert API: {'✅ Connected and Ready' if cloudconvert_token else '❌ API Key Missing'}
""") # Main interface with gr.Row(): with gr.Column(scale=2): gr.HTML("

📎 Upload & Convert

") file_input = gr.File( label="Select .pages file", file_types=[".pages"] ) output_format = gr.Radio( choices=["PDF", "DOCX", "TXT", "HTML", "Markdown"], value="PDF", label="🎯 Output Format" ) convert_btn = gr.Button( "🚀 Convert Document", variant="primary", size="lg" ) with gr.Column(scale=1): gr.HTML("""

✨ Features

💡 How it works:

  1. CloudConvert extracts text from .pages
  2. Cerebras AI formats for your chosen output
  3. Download your professionally converted file
""") # Output section with gr.Row(): output_file = gr.File( label="📁 Download Your Converted File" ) with gr.Row(): status_html = gr.HTML( value="
Upload a .pages file to get started
" ) # Connect the interface convert_btn.click( fn=convert_pages_document, inputs=[file_input, output_format], outputs=[output_file, status_html], show_progress=True ) # Footer gr.HTML("""

🔧 Technical Stack:

CloudConvert API for reliable .pages parsing • HuggingFace ZeroGPU for AI processing • Cerebras for lightning-fast inference

""") # Launch the app if __name__ == "__main__": app.launch()