import gradio as gr
from transformers import T5ForConditionalGeneration, T5Tokenizer
import pdfplumber
import torch
import tempfile
from pathlib import Path
import difflib
import time
import logging
from concurrent.futures import ThreadPoolExecutor
# Logger Setup
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("ParaphraserPro")
# Load Model
def load_model():
device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "ramsrigouthamg/t5_paraphraser"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name).to(device)
return model.eval(), tokenizer, device
try:
model, tokenizer, device = load_model()
except Exception as e:
raise gr.Error(f"Model failed to load: {str(e)}")
# Text Extractor
def extract_text(file_obj):
if file_obj.name.endswith(".pdf"):
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
tmp.write(file_obj.read())
tmp_path = tmp.name
with pdfplumber.open(tmp_path) as pdf:
text = "\n".join(page.extract_text() or "" for page in pdf.pages[:3])
Path(tmp_path).unlink()
return text[:5000]
return file_obj.read().decode("utf-8")[:5000]
# Paraphrasing Core
def paraphrase(file, text_input, creativity, tone):
start = time.time()
logs = []
input_text = ""
if file:
input_text = extract_text(file)
logs.append("📄 File processed.")
elif text_input.strip():
input_text = text_input.strip()[:5000]
logs.append("📝 Text input received.")
else:
raise gr.Error("Please provide text or upload a file.")
chunks = [input_text[i:i+400] for i in range(0, len(input_text), 400)]
def paraphrase_chunk(chunk):
inputs = tokenizer(f"paraphrase: {chunk} ", return_tensors="pt", padding="max_length", truncation=True, max_length=256).to(device)
outputs = model.generate(**inputs, max_length=256, num_beams=3+creativity, temperature=0.7+(creativity*0.15), num_return_sequences=1)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
with ThreadPoolExecutor(max_workers=2) as executor:
results = list(executor.map(paraphrase_chunk, chunks))
output_text = " ".join(results)
similarity = int(difflib.SequenceMatcher(None, input_text, output_text).ratio() * 100)
elapsed = time.time() - start
logs.append(f"✅ Completed in {elapsed:.1f} seconds.")
return output_text, len(input_text.split()), len(output_text.split()), similarity, "
".join(logs)
# Custom CSS
custom_css = """
body { background-color: #f8fafc; margin: 0; font-family: 'Inter', sans-serif; }
.gradio-container { max-width: 1200px !important; margin: 0 auto !important; }
h1, h3 { color: #1e293b; }
.header { background: linear-gradient(135deg, #2563eb, #1d4ed8); padding: 2rem 1rem; color: white; text-align: center; border-radius: 1rem 1rem 0 0; }
.card { background: white; border-radius: 1rem; padding: 2rem; box-shadow: 0 4px 20px rgba(0,0,0,0.08); margin-bottom: 2rem; }
textarea, input, .gr-input { border-radius: 8px !important; }
footer { text-align: center; color: #64748b; padding: 1rem; font-size: 0.9em; }
"""
# Gradio Interface
with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app:
with gr.Column(elem_classes="header"):
gr.Markdown("# AI Paraphraser Pro")
gr.Markdown("### Rewrite like a pro — smarter, faster, and safer")
with gr.Row():
with gr.Column(scale=1):
with gr.Column(elem_classes="card"):
gr.Markdown("### Input")
with gr.Tabs():
with gr.Tab("Paste Text"):
text_input = gr.Textbox(label="Your Text", lines=10, placeholder="Paste or type your content...")
with gr.Tab("Upload File"):
file_input = gr.File(label="Upload .pdf or .txt", file_types=[".pdf", ".txt"])
creativity = gr.Slider(1, 5, value=3, label="Creativity (1 = Conservative, 5 = Creative)")
tone = gr.Dropdown(["professional", "academic", "casual"], value="professional", label="Style")
submit = gr.Button("Paraphrase Now", variant="primary")
with gr.Column(scale=1):
with gr.Column(elem_classes="card"):
gr.Markdown("### Output")
output_text = gr.Textbox(label="Paraphrased Output", lines=10, interactive=True)
with gr.Row():
copy_btn = gr.Button("📋 Copy")
download_btn = gr.Button("⬇️ Download")
with gr.Accordion("📊 Analysis", open=False):
with gr.Row():
in_words = gr.Number(label="Input Words", interactive=False)
out_words = gr.Number(label="Output Words", interactive=False)
similarity = gr.Number(label="Similarity (%)", interactive=False)
logs = gr.HTML(label="Process Logs")
gr.HTML("")
# Event Hooks
submit.click(paraphrase, inputs=[file_input, text_input, creativity, tone], outputs=[output_text, in_words, out_words, similarity, logs])
copy_btn.click(None, inputs=[output_text], js="(text) => navigator.clipboard.writeText(text)")
download_btn.click(
lambda txt: gr.File.update(value=(tempfile.NamedTemporaryFile(delete=False, suffix=".txt").write(txt.encode()) or txt), visible=True),
inputs=[output_text],
outputs=[]
)
# Launch on Hugging Face
app.launch()