sagar007 commited on
Commit
c9e151d
1 Parent(s): 4048302

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -0
app.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spaces
3
+ from PIL import Image
4
+ import requests
5
+ from transformers import AutoModelForCausalLM, AutoProcessor
6
+ import torch
7
+
8
+ # Load the model and processor
9
+ model_id = "microsoft/Phi-3.5-vision-instruct"
10
+ model = AutoModelForCausalLM.from_pretrained(
11
+ model_id,
12
+ trust_remote_code=True,
13
+ torch_dtype=torch.float16,
14
+ )
15
+ processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True, num_crops=16)
16
+
17
+ @spaces.GPU(duration=120) # Adjust the duration as needed
18
+ def solve_math_problem(image):
19
+ # Move model to GPU for this function call
20
+ model.to('cuda')
21
+
22
+ # Prepare the input
23
+ messages = [
24
+ {"role": "user", "content": "<|image_1|>\nSolve this math problem step by step. Explain your reasoning clearly."},
25
+ ]
26
+ prompt = processor.tokenizer.apply_chat_template(
27
+ messages,
28
+ tokenize=False,
29
+ add_generation_prompt=True
30
+ )
31
+
32
+ # Process the input
33
+ inputs = processor(prompt, image, return_tensors="pt").to("cuda")
34
+
35
+ # Generate the response
36
+ generation_args = {
37
+ "max_new_tokens": 1000,
38
+ "temperature": 0.2,
39
+ "do_sample": True,
40
+ }
41
+ generate_ids = model.generate(**inputs,
42
+ eos_token_id=processor.tokenizer.eos_token_id,
43
+ **generation_args
44
+ )
45
+
46
+ # Decode the response
47
+ generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
48
+ response = processor.batch_decode(generate_ids,
49
+ skip_special_tokens=True,
50
+ clean_up_tokenization_spaces=False
51
+ )[0]
52
+
53
+ # Move model back to CPU to free up GPU memory
54
+ model.to('cpu')
55
+
56
+ return response
57
+
58
+ # Create the Gradio interface
59
+ iface = gr.Interface(
60
+ fn=solve_math_problem,
61
+ inputs=gr.Image(type="pil"),
62
+ outputs="text",
63
+ title="Visual Math Problem Solver",
64
+ description="Upload an image of a math problem, and I'll try to solve it step by step!",
65
+ examples=[
66
+ ["example_math_problem1.jpg"],
67
+ ["example_math_problem2.jpg"]
68
+ ]
69
+ )
70
+
71
+ # Launch the app
72
+ iface.launch()