SlouchyBuffalo commited on
Commit
8c7667b
·
verified ·
1 Parent(s): 495a855

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -382
app.py DELETED
@@ -1,382 +0,0 @@
1
- # app.py - Complete PWA Pages Converter
2
- import gradio as gr
3
- import os
4
- import spaces
5
- import tempfile
6
- import zipfile
7
- import json
8
- from pathlib import Path
9
- from huggingface_hub import InferenceClient
10
- import time
11
-
12
- # Debug token
13
- token = os.getenv("HF_TOKEN")
14
- print(f"Debug: Token exists = {token is not None}")
15
- print(f"Debug: Token length = {len(token) if token else 0}")
16
-
17
- # Initialize the client with Cerebras
18
- client = InferenceClient(
19
- "meta-llama/Llama-3.3-70B-Instruct",
20
- provider="cerebras",
21
- token=token
22
- )
23
-
24
- @spaces.GPU
25
- def extract_pages_content(file_path):
26
- """Extract content from Apple Pages file using ZeroGPU"""
27
- print(f"DEBUG: Processing file: {file_path}")
28
- print(f"DEBUG: File exists: {os.path.exists(file_path)}")
29
-
30
- try:
31
- content_parts = []
32
-
33
- with zipfile.ZipFile(file_path, 'r') as zip_ref:
34
- with tempfile.TemporaryDirectory() as temp_dir:
35
- zip_ref.extractall(temp_dir)
36
- temp_path = Path(temp_dir)
37
- print(f"DEBUG: Extracted files: {list(temp_path.iterdir())}")
38
- print(f"DEBUG: Index folder contents: {list((temp_path / 'Index').iterdir()) if (temp_path / 'Index').is_dir() else 'No Index folder'}")
39
-
40
- # Strategy 1: Look for iwa files in Index folder
41
- index_path = temp_path / "Index"
42
- if index_path.exists():
43
- for iwa_file in index_path.glob("*.iwa"):
44
- try:
45
- # iwa files are protobuf archives, try reading as binary
46
- with open(iwa_file, 'rb') as f:
47
- binary_content = f.read()
48
- # Try to find text content in the binary
49
- text_content = binary_content.decode('utf-8', errors='ignore')
50
- # Extract readable text (basic approach)
51
- import re
52
- readable_text = re.findall(r'[\x20-\x7E]+', text_content)
53
- content_parts.extend([t.strip() for t in readable_text if len(t.strip()) > 5])
54
- except:
55
- continue
56
-
57
- if content_parts:
58
- # Clean and deduplicate
59
- unique_content = list(dict.fromkeys(content_parts))
60
- return "\n\n".join(unique_content)
61
- else:
62
- return "Could not extract readable content from .pages file"
63
-
64
- except Exception as e:
65
- return f"Error extracting content: {str(e)}"
66
-
67
- @spaces.GPU
68
- def convert_pages_document(file, output_format, progress=gr.Progress()):
69
- """Convert Pages document using Cerebras with ZeroGPU acceleration"""
70
- if not file:
71
- return None, "❌ Please upload a .pages file"
72
-
73
- try:
74
- progress(0.1, desc="📖 Extracting content from .pages file...")
75
-
76
- # Extract content
77
- content = extract_pages_content(file.name)
78
-
79
- if not content or len(content.strip()) < 10:
80
- return None, "❌ Could not extract sufficient content from .pages file"
81
-
82
- progress(0.4, desc="🤖 Preparing conversion with Cerebras...")
83
-
84
- # Create format-specific prompt
85
- prompt = create_conversion_prompt(content, output_format)
86
-
87
- progress(0.6, desc="⚡ Converting with Cerebras Lightning Speed...")
88
-
89
- # Convert using Cerebras
90
- try:
91
- # Use chat completion instead
92
- messages = [{"role": "user", "content": prompt}]
93
- response = client.chat_completion(
94
- messages=messages,
95
- max_tokens=4096,
96
- temperature=0.1
97
- )
98
- # Extract the response text
99
- response = response.choices[0].message.content
100
- except Exception as e:
101
- return None, f"❌ Conversion error: {str(e)}"
102
-
103
- progress(0.9, desc="💫 Creating output file...")
104
-
105
- # Create output file
106
- output_path = create_output_file(response, output_format)
107
-
108
- progress(1.0, desc="✅ Conversion complete!")
109
-
110
- return output_path, f"✅ Successfully converted to {output_format} using ZeroGPU!"
111
-
112
- except Exception as e:
113
- return None, f"❌ Error: {str(e)}"
114
-
115
- def create_conversion_prompt(content, output_format):
116
- """Create optimized prompt for Cerebras model"""
117
- format_instructions = {
118
- "PDF": "Create content suitable for PDF format with proper structure and formatting",
119
- "DOCX": "Format as Microsoft Word document with headers, paragraphs, and proper styling",
120
- "TXT": "Convert to clean, readable plain text preserving structure",
121
- "HTML": "Create well-structured HTML with semantic markup",
122
- "Markdown": "Convert to properly formatted Markdown with headers and structure"
123
- }
124
-
125
- return f"""You are an expert document converter. Convert the following Apple Pages document content to {output_format} format.
126
-
127
- INSTRUCTIONS:
128
- 1. Preserve the original structure, formatting, and content organization
129
- 2. Maintain headings, paragraphs, lists, and any tables if present
130
- 3. Ensure the output is clean, professional, and well-formatted
131
- 4. {format_instructions.get(output_format, "Format appropriately for the requested output type")}
132
- 5. Return ONLY the converted content without explanations or meta-commentary
133
-
134
- ORIGINAL CONTENT:
135
- {content}
136
-
137
- CONVERTED {output_format.upper()} OUTPUT:"""
138
-
139
- def create_output_file(content, output_format):
140
- """Create output file in specified format"""
141
- # Clean the content (remove potential prompt artifacts)
142
- content = content.strip()
143
-
144
- # Create temporary file with appropriate extension
145
- extensions = {
146
- "PDF": ".pdf",
147
- "DOCX": ".docx",
148
- "TXT": ".txt",
149
- "HTML": ".html",
150
- "Markdown": ".md"
151
- }
152
-
153
- if output_format == "PDF":
154
- # Create a temporary file with .pdf extension
155
- with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f:
156
- from reportlab.pdfgen import canvas
157
- from reportlab.lib.pagesizes import letter
158
- import textwrap
159
-
160
- # Create PDF
161
- pdf = canvas.Canvas(f.name, pagesize=letter)
162
- width, height = letter
163
- y_position = height - 50
164
-
165
- # Split content into lines and wrap long lines
166
- lines = []
167
- for paragraph in content.split('\n'):
168
- if paragraph.strip():
169
- # Wrap long lines at 80 characters
170
- wrapped_lines = textwrap.wrap(paragraph, width=80)
171
- lines.extend(wrapped_lines if wrapped_lines else [''])
172
- else:
173
- lines.append('') # Preserve empty lines
174
-
175
- for line in lines:
176
- if y_position < 50: # Start new page
177
- pdf.showPage()
178
- y_position = height - 50
179
- pdf.drawString(50, y_position, line)
180
- y_position -= 20
181
-
182
- pdf.save()
183
- return f.name
184
-
185
- elif output_format == "DOCX":
186
- # Create a temporary file with .docx extension
187
- with tempfile.NamedTemporaryFile(suffix='.docx', delete=False) as f:
188
- from docx import Document
189
-
190
- doc = Document()
191
- paragraphs = content.split('\n\n')
192
- for para in paragraphs:
193
- if para.strip():
194
- doc.add_paragraph(para.strip())
195
-
196
- doc.save(f.name)
197
- return f.name
198
-
199
- else:
200
- # For TXT, HTML, Markdown
201
- ext = extensions.get(output_format, ".txt")
202
- with tempfile.NamedTemporaryFile(mode='w', suffix=ext, delete=False, encoding='utf-8') as f:
203
- f.write(content)
204
- return f.name
205
-
206
- # Custom CSS for professional appearance
207
- css = """
208
- .gradio-container {
209
- background: linear-gradient(135deg, #1e3c72 0%, #2a5298 100%);
210
- min-height: 100vh;
211
- }
212
-
213
- .main-content {
214
- max-width: 1000px;
215
- margin: 0 auto;
216
- padding: 2rem;
217
- }
218
-
219
- .hero-section {
220
- background: white;
221
- border-radius: 1rem;
222
- padding: 2rem;
223
- text-align: center;
224
- box-shadow: 0 10px 30px rgba(0,0,0,0.1);
225
- margin-bottom: 2rem;
226
- }
227
-
228
- .upload-section {
229
- background: white;
230
- border-radius: 1rem;
231
- padding: 2rem;
232
- box-shadow: 0 5px 15px rgba(0,0,0,0.1);
233
- }
234
-
235
- .format-selector {
236
- background: #f8f9fa;
237
- border-radius: 0.5rem;
238
- padding: 1rem;
239
- margin: 1rem 0;
240
- }
241
-
242
- .convert-button {
243
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
244
- color: white;
245
- border: none;
246
- padding: 1rem 2rem;
247
- border-radius: 0.5rem;
248
- font-size: 1.1rem;
249
- font-weight: bold;
250
- width: 100%;
251
- cursor: pointer;
252
- transition: all 0.3s ease;
253
- }
254
-
255
- .convert-button:hover {
256
- transform: translateY(-2px);
257
- box-shadow: 0 5px 15px rgba(102, 126, 234, 0.3);
258
- }
259
-
260
- .zerogpu-badge {
261
- display: inline-block;
262
- background: linear-gradient(45deg, #ff6b6b, #feca57);
263
- color: white;
264
- padding: 0.5rem 1rem;
265
- border-radius: 2rem;
266
- font-weight: bold;
267
- font-size: 0.9rem;
268
- }
269
-
270
- .pro-features {
271
- background: #e8f5e9;
272
- border-radius: 0.5rem;
273
- padding: 1rem;
274
- margin-top: 1rem;
275
- }
276
- """
277
-
278
- # Create the Gradio interface
279
- with gr.Blocks(css=css, title="Pages Converter Pro - ZeroGPU", theme=gr.themes.Soft()) as app:
280
- with gr.Column(elem_classes=["main-content"]):
281
- # Hero section
282
- gr.HTML("""
283
- <div class="hero-section">
284
- <h1>📄 Pages Converter Pro</h1>
285
- <span class="zerogpu-badge">⚡ ZeroGPU Accelerated</span>
286
- <p style="margin-top: 1rem; color: #666;">
287
- Convert Apple Pages documents with lightning-fast Cerebras Llama-3.3-70B
288
- </p>
289
- </div>
290
- """)
291
-
292
- # Pro benefits showcase
293
- gr.HTML("""
294
- <div class="pro-features">
295
- <h3>🚀 HuggingFace Pro Benefits Active</h3>
296
- <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1rem; margin-top: 1rem;">
297
- <div>✅ 5x Usage Quota</div>
298
- <div>🔥 Priority Queue Access</div>
299
- <div>💎 H200 GPU Hardware</div>
300
- <div>⚡ Zero-GPU Acceleration</div>
301
- </div>
302
- </div>
303
- """)
304
-
305
- # Main conversion interface
306
- with gr.Row():
307
- with gr.Column(scale=2, elem_classes=["upload-section"]):
308
- gr.HTML("<h3>📎 Upload Your Document</h3>")
309
-
310
- file_input = gr.File(
311
- label="Select .pages file",
312
- file_types=[".pages"],
313
- elem_id="file-upload"
314
- )
315
-
316
- output_format = gr.Radio(
317
- choices=["PDF", "DOCX", "TXT", "HTML", "Markdown"],
318
- value="PDF",
319
- label="🎯 Output Format",
320
- elem_classes=["format-selector"]
321
- )
322
-
323
- convert_btn = gr.Button(
324
- "⚡ Convert with ZeroGPU",
325
- variant="primary",
326
- elem_classes=["convert-button"]
327
- )
328
-
329
- with gr.Column(scale=1):
330
- gr.HTML("""
331
- <div style="background: white; padding: 1.5rem; border-radius: 1rem; box-shadow: 0 5px 15px rgba(0,0,0,0.1);">
332
- <h3>⚡ ZeroGPU Features</h3>
333
- <ul style="color: #666;">
334
- <li>Lightning-fast processing</li>
335
- <li>H200 hardware acceleration</li>
336
- <li>Priority queue access</li>
337
- <li>Cerebras optimization</li>
338
- </ul>
339
-
340
- <h3>📋 Supported Formats</h3>
341
- <ul style="color: #666;">
342
- <li>📄 PDF (best quality)</li>
343
- <li>📝 Microsoft Word (DOCX)</li>
344
- <li>📋 Plain Text (TXT)</li>
345
- <li>🌐 Web Page (HTML)</li>
346
- <li>✏️ Markdown (MD)</li>
347
- </ul>
348
- </div>
349
- """)
350
-
351
- # Output section
352
- with gr.Row():
353
- output_file = gr.File(
354
- label="📁 Download Your Converted File",
355
- elem_id="output-download"
356
- )
357
-
358
- with gr.Row():
359
- status_html = gr.HTML(
360
- value="<div style='text-align: center; padding: 1rem; color: #666;'>Ready to convert your Pages document</div>",
361
- elem_id="status-display"
362
- )
363
-
364
- # Connect the interface
365
- convert_btn.click(
366
- fn=convert_pages_document,
367
- inputs=[file_input, output_format],
368
- outputs=[output_file, status_html],
369
- show_progress=True
370
- )
371
-
372
- # Footer
373
- gr.HTML("""
374
- <div style="text-align: center; margin-top: 3rem; padding: 2rem; color: white;">
375
- <p>💎 Built exclusively for HuggingFace Pro users</p>
376
- <p><small>Powered by Cerebras • Accelerated by ZeroGPU • Made with ❤️</small></p>
377
- </div>
378
- """)
379
-
380
- # Launch the app
381
- if __name__ == "__main__":
382
- app.launch()