CamiloVega commited on
Commit
058e66c
·
verified ·
1 Parent(s): 7796d4c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -0
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+ import fitz # PyMuPDF
5
+ from docx import Document
6
+
7
+ # Load model and tokenizer
8
+ model_name = "microsoft/phi-2"
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
10
+ model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.float16)
11
+
12
+ def extract_text_from_pdf(file):
13
+ doc = fitz.open(stream=file.read(), filetype="pdf")
14
+ text = ""
15
+ for page in doc:
16
+ text += page.get_text()
17
+ return text
18
+
19
+ def extract_text_from_docx(file):
20
+ doc = Document(file)
21
+ return "\n".join([paragraph.text for paragraph in doc.paragraphs])
22
+
23
+ def convert_to_story(file):
24
+ if file is None:
25
+ return "Please upload a file."
26
+
27
+ file_extension = file.name.split('.')[-1].lower()
28
+
29
+ if file_extension == 'pdf':
30
+ text = extract_text_from_pdf(file)
31
+ elif file_extension == 'docx':
32
+ text = extract_text_from_docx(file)
33
+ else:
34
+ return "Unsupported file format. Please upload a PDF or DOCX file."
35
+
36
+ prompt = f"Convert the following news article into a short children's story (maximum 200 words):\n\n{text}\n\nChildren's story:"
37
+
38
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
39
+
40
+ with torch.no_grad():
41
+ outputs = model.generate(
42
+ **inputs,
43
+ max_new_tokens=200,
44
+ temperature=0.7,
45
+ top_p=0.95,
46
+ do_sample=True
47
+ )
48
+
49
+ story = tokenizer.decode(outputs[0], skip_special_tokens=True)
50
+ return story.split("Children's story:")[-1].strip()
51
+
52
+ iface = gr.Interface(
53
+ fn=convert_to_story,
54
+ inputs=gr.File(label="Upload PDF or DOCX file"),
55
+ outputs="text",
56
+ title="News to Children's Story Converter",
57
+ description="Upload a news article in PDF or DOCX format to convert it into a short children's story."
58
+ )
59
+
60
+ iface.launch()