Spaces:

CamiloVega
/

cuentos

Sleeping

App Files Files Community

cuentos / app.py

CamiloVega

Update app.py

1193412 verified 8 months ago

raw

history blame contribute delete

1.94 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch
	import fitz # PyMuPDF
	from docx import Document

	# Load model and tokenizer
	model_name = "microsoft/phi-2"
	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
	model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.float16)

	def extract_text_from_pdf(file):
	doc = fitz.open(stream=file.read(), filetype="pdf")
	text = ""
	for page in doc:
	text += page.get_text()
	return text

	def extract_text_from_docx(file):
	doc = Document(file)
	return "\n".join([paragraph.text for paragraph in doc.paragraphs])

	def convert_to_story(file):
	if file is None:
	return "Please upload a file."

	file_extension = file.name.split('.')[-1].lower()

	if file_extension == 'pdf':
	text = extract_text_from_pdf(file)
	elif file_extension == 'docx':
	text = extract_text_from_docx(file)
	else:
	return "Unsupported file format. Please upload a PDF or DOCX file."

	prompt = f"Convert the following news article into a short children's story (maximum 200 words):\n\n{text}\n\nChildren's story:"

	inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=200,
	temperature=0.7,
	top_p=0.95,
	do_sample=True
	)

	story = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return story.split("Children's story:")[-1].strip()

	iface = gr.Interface(
	fn=convert_to_story,
	inputs=gr.File(label="Upload PDF or DOCX file"),
	outputs="text",
	title="News to Children's Story Converter",
	description="Upload a news article in PDF or DOCX format to convert it into a short children's story."
	)

	iface.launch()