Eaz123 commited on
Commit
d5cda59
Β·
verified Β·
1 Parent(s): f6279fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -306
app.py CHANGED
@@ -6,338 +6,136 @@ import tempfile
6
  from pathlib import Path
7
  import difflib
8
  import time
9
- from typing import Optional, Tuple
10
  import logging
11
  from concurrent.futures import ThreadPoolExecutor
12
 
13
- # ========== LOGGING SETUP ==========
14
- logging.basicConfig(
15
- level=logging.INFO,
16
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
17
- )
18
- logger = logging.getLogger(__name__)
19
 
20
- # ========== MODEL SETUP ==========
21
- def load_model() -> Tuple[T5ForConditionalGeneration, T5Tokenizer]:
22
- """Load model with error handling and progress tracking"""
23
  device = "cuda" if torch.cuda.is_available() else "cpu"
24
  model_name = "ramsrigouthamg/t5_paraphraser"
25
-
26
- try:
27
- logger.info("Loading tokenizer...")
28
- # First try with legacy=False (newer versions)
29
- try:
30
- tokenizer = T5Tokenizer.from_pretrained(model_name, legacy=False)
31
- except:
32
- # Fallback to legacy mode if needed
33
- tokenizer = T5Tokenizer.from_pretrained(model_name)
34
-
35
- logger.info("Loading model...")
36
- model = T5ForConditionalGeneration.from_pretrained(model_name).to(device)
37
- model.eval()
38
-
39
- logger.info("Model loaded successfully")
40
- return model, tokenizer
41
- except Exception as e:
42
- logger.error(f"Model loading failed: {str(e)}")
43
- raise gr.Error(f"Failed to initialize the AI model. Please ensure all dependencies are installed. Error: {str(e)}")
44
 
45
  try:
46
- model, tokenizer = load_model()
47
- device = next(model.parameters()).device
48
  except Exception as e:
49
- logger.error(f"Initial model loading failed: {str(e)}")
50
- model, tokenizer = None, None
51
- device = "cuda" if torch.cuda.is_available() else "cpu"
52
 
53
- # ========== UTILITIES ==========
54
- def cleanup_file(file_path: Optional[str]) -> None:
55
- """Securely delete temporary files with error handling"""
56
- if file_path and Path(file_path).exists():
57
- try:
58
- Path(file_path).unlink()
59
- logger.info(f"Cleaned up temporary file: {file_path}")
60
- except Exception as e:
61
- logger.warning(f"File cleanup error: {e}")
62
 
63
- def extract_text(file_obj) -> Tuple[str, Optional[str]]:
64
- """Handle file uploads with comprehensive error handling"""
65
- temp_path = None
66
- try:
67
- if file_obj.name.endswith('.pdf'):
68
- # Create temp file with secure permissions
69
- with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as tmp:
70
- temp_path = tmp.name
71
- tmp.write(file_obj.read())
72
-
73
- with pdfplumber.open(temp_path) as pdf:
74
- text = "\n".join(
75
- page.extract_text() or ""
76
- for page in pdf.pages[:3] # Limit to 3 pages for performance
77
- )
78
- return text[:5000], temp_path # Limit to 5000 chars
79
-
80
- # Handle text files
81
- text = file_obj.read().decode('utf-8')[:5000]
82
- return text, None
83
 
84
- except Exception as e:
85
- logger.error(f"File processing error: {str(e)}")
86
- if temp_path:
87
- cleanup_file(temp_path)
88
- raise gr.Error(f"File processing failed: {str(e)}")
89
 
90
- # ========== CORE FUNCTION ==========
91
- def process_request(
92
- file_obj,
93
- text_input: str,
94
- creativity: int = 3,
95
- tone: str = "professional"
96
- ) -> Tuple[str, int, int, int, list]:
97
- """Main processing pipeline with enhanced error handling"""
98
- start_time = time.time()
99
- temp_file = None
100
- progress = []
101
-
102
- try:
103
- # Check if model is loaded
104
- if model is None or tokenizer is None:
105
- raise gr.Error("AI model failed to load. Please check the logs and try again later.")
106
-
107
- # Input validation
108
- if not (file_obj or text_input):
109
- raise gr.Error("Please provide either text or a file")
110
-
111
- # Process input
112
- if file_obj:
113
- text, temp_file = extract_text(file_obj)
114
- progress.append("πŸ“„ File processed successfully")
115
- else:
116
- text = text_input[:5000]
117
- progress.append("πŸ“ Text input received")
118
-
119
- if not text.strip():
120
- return "", 0, 0, 0, progress
121
-
122
- # Chunk processing with parallelization
123
- chunks = [text[i:i+400] for i in range(0, len(text), 400)]
124
- outputs = []
125
-
126
- def process_chunk(chunk: str) -> str:
127
- """Process a single text chunk"""
128
- inputs = tokenizer(
129
- f"paraphrase: {chunk} </s>",
130
- max_length=256,
131
- padding="max_length",
132
- return_tensors="pt",
133
- truncation=True
134
- ).to(device)
135
-
136
- outputs = model.generate(
137
- **inputs,
138
- max_length=256,
139
- num_beams=3 + creativity,
140
- temperature=0.7 + (creativity * 0.15),
141
- early_stopping=True,
142
- num_return_sequences=1
143
- )
144
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
145
-
146
- # Process chunks in parallel (limited threads)
147
- with ThreadPoolExecutor(max_workers=2) as executor:
148
- outputs = list(executor.map(process_chunk, chunks))
149
- progress.extend(f"✍️ Processed chunk {i+1}/{len(chunks)}"
150
- for i in range(len(chunks)))
151
-
152
- result = " ".join(outputs)
153
- similarity = int(difflib.SequenceMatcher(None, text, result).ratio() * 100)
154
- elapsed = time.time() - start_time
155
-
156
- progress.append(f"βœ… Completed in {elapsed:.1f} seconds")
157
- logger.info(f"Processed {len(text.split())} words in {elapsed:.2f}s")
158
-
159
- return result, len(text.split()), len(result.split()), similarity, progress
160
-
161
- except Exception as e:
162
- logger.error(f"Processing error: {str(e)}")
163
- progress.append(f"❌ Error: {str(e)}")
164
- raise gr.Error(f"Processing failed: {str(e)}")
165
-
166
- finally:
167
- if temp_file:
168
- cleanup_file(temp_file)
169
 
170
- # ========== UI COMPONENTS ==========
 
 
 
 
171
  custom_css = """
172
- :root {
173
- --primary: #2563eb;
174
- --primary-dark: #1d4ed8;
175
- --text: #1e293b;
176
- --light-bg: #f8fafc;
177
- }
178
- .gradio-container {
179
- font-family: 'Inter', system-ui;
180
- max-width: 1200px !important;
181
- margin: 0 auto !important;
182
- }
183
- .header {
184
- background: linear-gradient(135deg, var(--primary) 0%, var(--primary-dark) 100%);
185
- border-radius: 12px 12px 0 0;
186
- padding: 2rem 1rem;
187
- color: white;
188
- }
189
- .card {
190
- background: white;
191
- border-radius: 12px;
192
- box-shadow: 0 4px 24px rgba(0,0,0,0.08);
193
- padding: 1.5rem;
194
- margin-bottom: 1.5rem;
195
- }
196
- .progress-log {
197
- font-size: 0.9em;
198
- color: #64748b;
199
- max-height: 120px;
200
- overflow-y: auto;
201
- background: #f8fafc;
202
- padding: 0.75rem;
203
- border-radius: 8px;
204
- }
205
- .file-upload {
206
- border: 2px dashed #e2e8f0 !important;
207
- border-radius: 8px !important;
208
- padding: 1.5rem !important;
209
- }
210
- footer {
211
- text-align: center;
212
- padding: 1rem;
213
- color: #64748b;
214
- font-size: 0.9em;
215
- }
216
  """
217
 
218
- with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="AI Paraphraser Pro") as demo:
219
- # ========== HEADER ==========
220
- with gr.Column(elem_classes=["header"]):
221
- gr.Markdown("""
222
- <div style="text-align: center">
223
- <h1 style="font-weight: 700; margin-bottom: 0.5rem">AI Paraphraser Pro</h1>
224
- <p style="opacity: 0.9">Enterprise-grade text transformation with semantic preservation</p>
225
- </div>
226
- """)
227
-
228
- # ========== MAIN INTERFACE ==========
229
  with gr.Row():
230
- # Input Panel
231
  with gr.Column(scale=1):
232
- with gr.Column(elem_classes=["card"]):
233
- gr.Markdown("### Input Content")
234
-
235
  with gr.Tabs():
236
- with gr.TabItem("Text Input"):
237
- text_input = gr.Textbox(
238
- placeholder="Paste your text here...",
239
- lines=8,
240
- max_lines=12,
241
- label="Direct Input",
242
- elem_id="text-input"
243
- )
244
-
245
- with gr.TabItem("File Upload"):
246
- file_upload = gr.File(
247
- label="Upload PDF/TXT (Auto-deleted after processing)",
248
- file_types=[".pdf", ".txt"],
249
- elem_classes=["file-upload"]
250
- )
251
-
252
- with gr.Row():
253
- creativity = gr.Slider(
254
- 1, 5, value=3,
255
- label="Creativity Level",
256
- info="1=Conservative, 5=Highly Creative"
257
- )
258
- tone = gr.Dropdown(
259
- ["professional", "academic", "casual"],
260
- value="professional",
261
- label="Output Style"
262
- )
263
-
264
- submit_btn = gr.Button(
265
- "Paraphrase Now",
266
- variant="primary",
267
- size="lg"
268
- )
269
-
270
- # Output Panel
271
  with gr.Column(scale=1):
272
- with gr.Column(elem_classes=["card"]):
273
- gr.Markdown("### Paraphrased Output")
274
-
275
- output_text = gr.Textbox(
276
- lines=8,
277
- max_lines=12,
278
- label="Result",
279
- interactive=True,
280
- elem_id="output-text"
281
- )
282
 
283
  with gr.Row():
284
- copy_btn = gr.Button("Copy to Clipboard", size="sm")
285
- download_btn = gr.Button("Download Result", size="sm")
286
-
287
- with gr.Column(elem_classes=["card"]):
288
- gr.Markdown("**Text Analysis**")
289
  with gr.Row():
290
- input_words = gr.Number(label="Original Words", precision=0)
291
- output_words = gr.Number(label="New Words", precision=0)
292
- similarity_score = gr.Number(label="Similarity (%)", precision=0)
293
-
294
- with gr.Accordion("Processing Log", open=False):
295
- progress_log = gr.HTML(elem_classes=["progress-log"])
296
-
297
- # ========== FOOTER ==========
298
- gr.HTML("""
299
- <footer>
300
- <p>Β© 2024 AI Paraphraser Pro | Secure Processing | Files Never Stored</p>
301
- </footer>
302
- """)
303
 
304
- # ========== EVENT HANDLERS ==========
305
- submit_btn.click(
306
- process_request,
307
- [file_upload, text_input, creativity, tone],
308
- [output_text, input_words, output_words, similarity_score, progress_log],
309
- api_name="paraphrase"
310
- )
311
-
312
- copy_btn.click(
313
- None,
314
- [output_text],
315
- None,
316
- js="(text) => { navigator.clipboard.writeText(text); alert('Copied to clipboard!'); }"
317
- )
318
 
319
  download_btn.click(
320
- lambda text: (text, "paraphrased_result.txt"),
321
- [output_text],
322
- [gr.File(label="Downloading...", visible=False)]
323
  )
324
 
325
- # [Previous code remains exactly the same until the launch section]
326
-
327
- # ========== LAUNCH SETTINGS ==========
328
- if __name__ == "__main__":
329
- # Simple version without explicit queue
330
- demo.launch(
331
- server_name="0.0.0.0",
332
- server_port=7860,
333
- show_api=False,
334
- favicon_path="favicon.ico"
335
- )
336
-
337
- # OR for more control:
338
- # demo.queue(max_size=2).launch(
339
- # server_name="0.0.0.0",
340
- # server_port=7860,
341
- # show_api=False,
342
- # favicon_path="favicon.ico"
343
- # )
 
6
  from pathlib import Path
7
  import difflib
8
  import time
 
9
  import logging
10
  from concurrent.futures import ThreadPoolExecutor
11
 
12
+ # Logger Setup
13
+ logging.basicConfig(level=logging.INFO)
14
+ logger = logging.getLogger("ParaphraserPro")
 
 
 
15
 
16
+ # Load Model
17
+ def load_model():
 
18
  device = "cuda" if torch.cuda.is_available() else "cpu"
19
  model_name = "ramsrigouthamg/t5_paraphraser"
20
+ tokenizer = T5Tokenizer.from_pretrained(model_name)
21
+ model = T5ForConditionalGeneration.from_pretrained(model_name).to(device)
22
+ return model.eval(), tokenizer, device
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  try:
25
+ model, tokenizer, device = load_model()
 
26
  except Exception as e:
27
+ raise gr.Error(f"Model failed to load: {str(e)}")
 
 
28
 
29
+ # Text Extractor
30
+ def extract_text(file_obj):
31
+ if file_obj.name.endswith(".pdf"):
32
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
33
+ tmp.write(file_obj.read())
34
+ tmp_path = tmp.name
 
 
 
35
 
36
+ with pdfplumber.open(tmp_path) as pdf:
37
+ text = "\n".join(page.extract_text() or "" for page in pdf.pages[:3])
38
+ Path(tmp_path).unlink()
39
+ return text[:5000]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ return file_obj.read().decode("utf-8")[:5000]
 
 
 
 
42
 
43
+ # Paraphrasing Core
44
+ def paraphrase(file, text_input, creativity, tone):
45
+ start = time.time()
46
+ logs = []
47
+ input_text = ""
48
+
49
+ if file:
50
+ input_text = extract_text(file)
51
+ logs.append("πŸ“„ File processed.")
52
+ elif text_input.strip():
53
+ input_text = text_input.strip()[:5000]
54
+ logs.append("πŸ“ Text input received.")
55
+ else:
56
+ raise gr.Error("Please provide text or upload a file.")
57
+
58
+ chunks = [input_text[i:i+400] for i in range(0, len(input_text), 400)]
59
+
60
+ def paraphrase_chunk(chunk):
61
+ inputs = tokenizer(f"paraphrase: {chunk} </s>", return_tensors="pt", padding="max_length", truncation=True, max_length=256).to(device)
62
+ outputs = model.generate(**inputs, max_length=256, num_beams=3+creativity, temperature=0.7+(creativity*0.15), num_return_sequences=1)
63
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
64
+
65
+ with ThreadPoolExecutor(max_workers=2) as executor:
66
+ results = list(executor.map(paraphrase_chunk, chunks))
67
+
68
+ output_text = " ".join(results)
69
+ similarity = int(difflib.SequenceMatcher(None, input_text, output_text).ratio() * 100)
70
+ elapsed = time.time() - start
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
+ logs.append(f"βœ… Completed in {elapsed:.1f} seconds.")
73
+
74
+ return output_text, len(input_text.split()), len(output_text.split()), similarity, "<br>".join(logs)
75
+
76
+ # Custom CSS
77
  custom_css = """
78
+ body { background-color: #f8fafc; margin: 0; font-family: 'Inter', sans-serif; }
79
+ .gradio-container { max-width: 1200px !important; margin: 0 auto !important; }
80
+ h1, h3 { color: #1e293b; }
81
+ .header { background: linear-gradient(135deg, #2563eb, #1d4ed8); padding: 2rem 1rem; color: white; text-align: center; border-radius: 1rem 1rem 0 0; }
82
+ .card { background: white; border-radius: 1rem; padding: 2rem; box-shadow: 0 4px 20px rgba(0,0,0,0.08); margin-bottom: 2rem; }
83
+ textarea, input, .gr-input { border-radius: 8px !important; }
84
+ footer { text-align: center; color: #64748b; padding: 1rem; font-size: 0.9em; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  """
86
 
87
+ # Gradio Interface
88
+ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app:
89
+ with gr.Column(elem_classes="header"):
90
+ gr.Markdown("# AI Paraphraser Pro")
91
+ gr.Markdown("### Rewrite like a pro β€” smarter, faster, and safer")
92
+
 
 
 
 
 
93
  with gr.Row():
 
94
  with gr.Column(scale=1):
95
+ with gr.Column(elem_classes="card"):
96
+ gr.Markdown("### Input")
97
+
98
  with gr.Tabs():
99
+ with gr.Tab("Paste Text"):
100
+ text_input = gr.Textbox(label="Your Text", lines=10, placeholder="Paste or type your content...")
101
+
102
+ with gr.Tab("Upload File"):
103
+ file_input = gr.File(label="Upload .pdf or .txt", file_types=[".pdf", ".txt"])
104
+
105
+ creativity = gr.Slider(1, 5, value=3, label="Creativity (1 = Conservative, 5 = Creative)")
106
+ tone = gr.Dropdown(["professional", "academic", "casual"], value="professional", label="Style")
107
+
108
+ submit = gr.Button("Paraphrase Now", variant="primary")
109
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  with gr.Column(scale=1):
111
+ with gr.Column(elem_classes="card"):
112
+ gr.Markdown("### Output")
113
+ output_text = gr.Textbox(label="Paraphrased Output", lines=10, interactive=True)
 
 
 
 
 
 
 
114
 
115
  with gr.Row():
116
+ copy_btn = gr.Button("πŸ“‹ Copy")
117
+ download_btn = gr.Button("⬇️ Download")
118
+
119
+ with gr.Accordion("πŸ“Š Analysis", open=False):
 
120
  with gr.Row():
121
+ in_words = gr.Number(label="Input Words", interactive=False)
122
+ out_words = gr.Number(label="Output Words", interactive=False)
123
+ similarity = gr.Number(label="Similarity (%)", interactive=False)
124
+
125
+ logs = gr.HTML(label="Process Logs")
126
+
127
+ gr.HTML("<footer>Β© 2025 AI Paraphraser Pro – No content stored. Privacy-first platform.</footer>")
128
+
129
+ # Event Hooks
130
+ submit.click(paraphrase, inputs=[file_input, text_input, creativity, tone], outputs=[output_text, in_words, out_words, similarity, logs])
 
 
 
131
 
132
+ copy_btn.click(None, inputs=[output_text], js="(text) => navigator.clipboard.writeText(text)")
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
  download_btn.click(
135
+ lambda txt: gr.File.update(value=(tempfile.NamedTemporaryFile(delete=False, suffix=".txt").write(txt.encode()) or txt), visible=True),
136
+ inputs=[output_text],
137
+ outputs=[]
138
  )
139
 
140
+ # Launch on Hugging Face
141
+ app.launch()