|
import gradio as gr |
|
from transformers import T5ForConditionalGeneration, T5Tokenizer |
|
import pdfplumber |
|
import torch |
|
import tempfile |
|
from pathlib import Path |
|
import difflib |
|
import time |
|
import logging |
|
from concurrent.futures import ThreadPoolExecutor |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger("ParaphraserPro") |
|
|
|
|
|
def load_model(): |
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
model_name = "ramsrigouthamg/t5_paraphraser" |
|
tokenizer = T5Tokenizer.from_pretrained(model_name) |
|
model = T5ForConditionalGeneration.from_pretrained(model_name).to(device) |
|
return model.eval(), tokenizer, device |
|
|
|
try: |
|
model, tokenizer, device = load_model() |
|
except Exception as e: |
|
raise gr.Error(f"Model failed to load: {str(e)}") |
|
|
|
|
|
def extract_text(file_obj): |
|
if file_obj.name.endswith(".pdf"): |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: |
|
tmp.write(file_obj.read()) |
|
tmp_path = tmp.name |
|
|
|
with pdfplumber.open(tmp_path) as pdf: |
|
text = "\n".join(page.extract_text() or "" for page in pdf.pages[:3]) |
|
Path(tmp_path).unlink() |
|
return text[:5000] |
|
|
|
return file_obj.read().decode("utf-8")[:5000] |
|
|
|
|
|
def paraphrase(file, text_input, creativity, tone): |
|
start = time.time() |
|
logs = [] |
|
input_text = "" |
|
|
|
if file: |
|
input_text = extract_text(file) |
|
logs.append("π File processed.") |
|
elif text_input.strip(): |
|
input_text = text_input.strip()[:5000] |
|
logs.append("π Text input received.") |
|
else: |
|
raise gr.Error("Please provide text or upload a file.") |
|
|
|
chunks = [input_text[i:i+400] for i in range(0, len(input_text), 400)] |
|
|
|
def paraphrase_chunk(chunk): |
|
inputs = tokenizer(f"paraphrase: {chunk} </s>", return_tensors="pt", padding="max_length", truncation=True, max_length=256).to(device) |
|
outputs = model.generate(**inputs, max_length=256, num_beams=3+creativity, temperature=0.7+(creativity*0.15), num_return_sequences=1) |
|
return tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
with ThreadPoolExecutor(max_workers=2) as executor: |
|
results = list(executor.map(paraphrase_chunk, chunks)) |
|
|
|
output_text = " ".join(results) |
|
similarity = int(difflib.SequenceMatcher(None, input_text, output_text).ratio() * 100) |
|
elapsed = time.time() - start |
|
|
|
logs.append(f"β
Completed in {elapsed:.1f} seconds.") |
|
|
|
return output_text, len(input_text.split()), len(output_text.split()), similarity, "<br>".join(logs) |
|
|
|
|
|
custom_css = """ |
|
body { background-color: #f8fafc; margin: 0; font-family: 'Inter', sans-serif; } |
|
.gradio-container { max-width: 1200px !important; margin: 0 auto !important; } |
|
h1, h3 { color: #1e293b; } |
|
.header { background: linear-gradient(135deg, #2563eb, #1d4ed8); padding: 2rem 1rem; color: white; text-align: center; border-radius: 1rem 1rem 0 0; } |
|
.card { background: white; border-radius: 1rem; padding: 2rem; box-shadow: 0 4px 20px rgba(0,0,0,0.08); margin-bottom: 2rem; } |
|
textarea, input, .gr-input { border-radius: 8px !important; } |
|
footer { text-align: center; color: #64748b; padding: 1rem; font-size: 0.9em; } |
|
""" |
|
|
|
|
|
with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app: |
|
with gr.Column(elem_classes="header"): |
|
gr.Markdown("# AI Paraphraser Pro") |
|
gr.Markdown("### Rewrite like a pro β smarter, faster, and safer") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
with gr.Column(elem_classes="card"): |
|
gr.Markdown("### Input") |
|
|
|
with gr.Tabs(): |
|
with gr.Tab("Paste Text"): |
|
text_input = gr.Textbox(label="Your Text", lines=10, placeholder="Paste or type your content...") |
|
|
|
with gr.Tab("Upload File"): |
|
file_input = gr.File(label="Upload .pdf or .txt", file_types=[".pdf", ".txt"]) |
|
|
|
creativity = gr.Slider(1, 5, value=3, label="Creativity (1 = Conservative, 5 = Creative)") |
|
tone = gr.Dropdown(["professional", "academic", "casual"], value="professional", label="Style") |
|
|
|
submit = gr.Button("Paraphrase Now", variant="primary") |
|
|
|
with gr.Column(scale=1): |
|
with gr.Column(elem_classes="card"): |
|
gr.Markdown("### Output") |
|
output_text = gr.Textbox(label="Paraphrased Output", lines=10, interactive=True) |
|
|
|
with gr.Row(): |
|
copy_btn = gr.Button("π Copy") |
|
download_btn = gr.Button("β¬οΈ Download") |
|
|
|
with gr.Accordion("π Analysis", open=False): |
|
with gr.Row(): |
|
in_words = gr.Number(label="Input Words", interactive=False) |
|
out_words = gr.Number(label="Output Words", interactive=False) |
|
similarity = gr.Number(label="Similarity (%)", interactive=False) |
|
|
|
logs = gr.HTML(label="Process Logs") |
|
|
|
gr.HTML("<footer>Β© 2025 AI Paraphraser Pro β No content stored. Privacy-first platform.</footer>") |
|
|
|
|
|
submit.click(paraphrase, inputs=[file_input, text_input, creativity, tone], outputs=[output_text, in_words, out_words, similarity, logs]) |
|
|
|
copy_btn.click(None, inputs=[output_text], js="(text) => navigator.clipboard.writeText(text)") |
|
|
|
download_btn.click( |
|
lambda txt: gr.File.update(value=(tempfile.NamedTemporaryFile(delete=False, suffix=".txt").write(txt.encode()) or txt), visible=True), |
|
inputs=[output_text], |
|
outputs=[] |
|
) |
|
|
|
|
|
app.launch() |
|
|