File size: 5,736 Bytes
0b71231 fe5d04a 1e32c00 dda7a4a 1e32c00 d31a082 d5cda59 0b71231 d5cda59 d31a082 d5cda59 d31a082 9d60491 d5cda59 9d60491 d5cda59 658ed6e d5cda59 1e32c00 d5cda59 fe5d04a d5cda59 658ed6e d5cda59 f0fbbe7 d5cda59 1e32c00 d5cda59 7c97293 d5cda59 1e32c00 7c97293 d5cda59 dda7a4a d5cda59 7c97293 d5cda59 1e32c00 d5cda59 1e32c00 d5cda59 80f4701 d5cda59 1e32c00 d5cda59 80f4701 f0fbbe7 d5cda59 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
import gradio as gr
from transformers import T5ForConditionalGeneration, T5Tokenizer
import pdfplumber
import torch
import tempfile
from pathlib import Path
import difflib
import time
import logging
from concurrent.futures import ThreadPoolExecutor
# Logger Setup
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("ParaphraserPro")
# Load Model
def load_model():
device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "ramsrigouthamg/t5_paraphraser"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name).to(device)
return model.eval(), tokenizer, device
try:
model, tokenizer, device = load_model()
except Exception as e:
raise gr.Error(f"Model failed to load: {str(e)}")
# Text Extractor
def extract_text(file_obj):
if file_obj.name.endswith(".pdf"):
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
tmp.write(file_obj.read())
tmp_path = tmp.name
with pdfplumber.open(tmp_path) as pdf:
text = "\n".join(page.extract_text() or "" for page in pdf.pages[:3])
Path(tmp_path).unlink()
return text[:5000]
return file_obj.read().decode("utf-8")[:5000]
# Paraphrasing Core
def paraphrase(file, text_input, creativity, tone):
start = time.time()
logs = []
input_text = ""
if file:
input_text = extract_text(file)
logs.append("π File processed.")
elif text_input.strip():
input_text = text_input.strip()[:5000]
logs.append("π Text input received.")
else:
raise gr.Error("Please provide text or upload a file.")
chunks = [input_text[i:i+400] for i in range(0, len(input_text), 400)]
def paraphrase_chunk(chunk):
inputs = tokenizer(f"paraphrase: {chunk} </s>", return_tensors="pt", padding="max_length", truncation=True, max_length=256).to(device)
outputs = model.generate(**inputs, max_length=256, num_beams=3+creativity, temperature=0.7+(creativity*0.15), num_return_sequences=1)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
with ThreadPoolExecutor(max_workers=2) as executor:
results = list(executor.map(paraphrase_chunk, chunks))
output_text = " ".join(results)
similarity = int(difflib.SequenceMatcher(None, input_text, output_text).ratio() * 100)
elapsed = time.time() - start
logs.append(f"β
Completed in {elapsed:.1f} seconds.")
return output_text, len(input_text.split()), len(output_text.split()), similarity, "<br>".join(logs)
# Custom CSS
custom_css = """
body { background-color: #f8fafc; margin: 0; font-family: 'Inter', sans-serif; }
.gradio-container { max-width: 1200px !important; margin: 0 auto !important; }
h1, h3 { color: #1e293b; }
.header { background: linear-gradient(135deg, #2563eb, #1d4ed8); padding: 2rem 1rem; color: white; text-align: center; border-radius: 1rem 1rem 0 0; }
.card { background: white; border-radius: 1rem; padding: 2rem; box-shadow: 0 4px 20px rgba(0,0,0,0.08); margin-bottom: 2rem; }
textarea, input, .gr-input { border-radius: 8px !important; }
footer { text-align: center; color: #64748b; padding: 1rem; font-size: 0.9em; }
"""
# Gradio Interface
with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app:
with gr.Column(elem_classes="header"):
gr.Markdown("# AI Paraphraser Pro")
gr.Markdown("### Rewrite like a pro β smarter, faster, and safer")
with gr.Row():
with gr.Column(scale=1):
with gr.Column(elem_classes="card"):
gr.Markdown("### Input")
with gr.Tabs():
with gr.Tab("Paste Text"):
text_input = gr.Textbox(label="Your Text", lines=10, placeholder="Paste or type your content...")
with gr.Tab("Upload File"):
file_input = gr.File(label="Upload .pdf or .txt", file_types=[".pdf", ".txt"])
creativity = gr.Slider(1, 5, value=3, label="Creativity (1 = Conservative, 5 = Creative)")
tone = gr.Dropdown(["professional", "academic", "casual"], value="professional", label="Style")
submit = gr.Button("Paraphrase Now", variant="primary")
with gr.Column(scale=1):
with gr.Column(elem_classes="card"):
gr.Markdown("### Output")
output_text = gr.Textbox(label="Paraphrased Output", lines=10, interactive=True)
with gr.Row():
copy_btn = gr.Button("π Copy")
download_btn = gr.Button("β¬οΈ Download")
with gr.Accordion("π Analysis", open=False):
with gr.Row():
in_words = gr.Number(label="Input Words", interactive=False)
out_words = gr.Number(label="Output Words", interactive=False)
similarity = gr.Number(label="Similarity (%)", interactive=False)
logs = gr.HTML(label="Process Logs")
gr.HTML("<footer>Β© 2025 AI Paraphraser Pro β No content stored. Privacy-first platform.</footer>")
# Event Hooks
submit.click(paraphrase, inputs=[file_input, text_input, creativity, tone], outputs=[output_text, in_words, out_words, similarity, logs])
copy_btn.click(None, inputs=[output_text], js="(text) => navigator.clipboard.writeText(text)")
download_btn.click(
lambda txt: gr.File.update(value=(tempfile.NamedTemporaryFile(delete=False, suffix=".txt").write(txt.encode()) or txt), visible=True),
inputs=[output_text],
outputs=[]
)
# Launch on Hugging Face
app.launch()
|