sagar007 commited on
Commit
63b02f7
·
verified ·
1 Parent(s): 11a9884

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +196 -24
app.py CHANGED
@@ -5,11 +5,12 @@ import requests
5
  from transformers import AutoModelForCausalLM, AutoProcessor
6
  import torch
7
  import subprocess
 
 
8
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
9
 
10
  # Load the model and processor
11
  model_id = "microsoft/Phi-3.5-vision-instruct"
12
-
13
  model = AutoModelForCausalLM.from_pretrained(
14
  model_id,
15
  trust_remote_code=True,
@@ -28,9 +29,7 @@ def solve_math_problem(image):
28
  {"role": "user", "content": "<|image_1|>\nSolve this math problem step by step. Explain your reasoning clearly."},
29
  ]
30
  prompt = processor.tokenizer.apply_chat_template(
31
- messages,
32
- tokenize=False,
33
- add_generation_prompt=True
34
  )
35
 
36
  # Process the input
@@ -42,35 +41,208 @@ def solve_math_problem(image):
42
  "temperature": 0.2,
43
  "do_sample": True,
44
  }
45
- generate_ids = model.generate(**inputs,
46
- eos_token_id=processor.tokenizer.eos_token_id,
47
- **generation_args
48
- )
49
 
50
  # Decode the response
51
  generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
52
- response = processor.batch_decode(generate_ids,
53
- skip_special_tokens=True,
54
- clean_up_tokenization_spaces=False
55
- )[0]
56
 
57
  # Move model back to CPU to free up GPU memory
58
  model.to('cpu')
59
-
60
  return response
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  # Create the Gradio interface
63
- iface = gr.Interface(
64
- fn=solve_math_problem,
65
- inputs=gr.Image(type="pil"),
66
- outputs="text",
67
- title="Visual Math Problem Solver",
68
- description="Upload an image of a math problem, and I'll try to solve it step by step!",
69
- examples=[
70
- ["example_math_problem1.jpg"],
71
- ["example_math_problem2.jpg"]
72
- ]
73
- )
 
 
 
 
 
 
 
 
 
 
74
 
75
  # Launch the app
76
  iface.launch()
 
5
  from transformers import AutoModelForCausalLM, AutoProcessor
6
  import torch
7
  import subprocess
8
+
9
+ # Install flash-attn
10
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
11
 
12
  # Load the model and processor
13
  model_id = "microsoft/Phi-3.5-vision-instruct"
 
14
  model = AutoModelForCausalLM.from_pretrained(
15
  model_id,
16
  trust_remote_code=True,
 
29
  {"role": "user", "content": "<|image_1|>\nSolve this math problem step by step. Explain your reasoning clearly."},
30
  ]
31
  prompt = processor.tokenizer.apply_chat_template(
32
+ messages, tokenize=False, add_generation_prompt=True
 
 
33
  )
34
 
35
  # Process the input
 
41
  "temperature": 0.2,
42
  "do_sample": True,
43
  }
44
+ generate_ids = model.generate(**inputs, eos_token_id=processor.tokenizer.eos_token_id, **generation_args)
 
 
 
45
 
46
  # Decode the response
47
  generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
48
+ response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
 
 
 
49
 
50
  # Move model back to CPU to free up GPU memory
51
  model.to('cpu')
 
52
  return response
53
 
54
+ # Custom CSS
55
+ custom_css = """
56
+ <style>
57
+ body {
58
+ background: linear-gradient(135deg, #1a1c2c, #4a4e69, #9a8c98);
59
+ font-family: 'Arial', sans-serif;
60
+ color: #f2e9e4;
61
+ margin: 0;
62
+ padding: 0;
63
+ min-height: 100vh;
64
+ }
65
+ #app-header {
66
+ text-align: center;
67
+ background: rgba(255, 255, 255, 0.1);
68
+ padding: 30px;
69
+ border-radius: 20px;
70
+ box-shadow: 0 10px 30px rgba(0, 0, 0, 0.3);
71
+ position: relative;
72
+ overflow: hidden;
73
+ margin: 20px auto;
74
+ max-width: 800px;
75
+ }
76
+ #app-header::before {
77
+ content: "";
78
+ position: absolute;
79
+ top: -50%;
80
+ left: -50%;
81
+ width: 200%;
82
+ height: 200%;
83
+ background: radial-gradient(circle, rgba(255,255,255,0.1) 0%, rgba(255,255,255,0) 70%);
84
+ animation: shimmer 15s infinite linear;
85
+ }
86
+ @keyframes shimmer {
87
+ 0% { transform: rotate(0deg); }
88
+ 100% { transform: rotate(360deg); }
89
+ }
90
+ #app-header h1 {
91
+ color: #f2e9e4;
92
+ font-size: 2.5em;
93
+ margin-bottom: 15px;
94
+ text-shadow: 2px 2px 4px rgba(0,0,0,0.5);
95
+ }
96
+ #app-header p {
97
+ font-size: 1.2em;
98
+ color: #c9ada7;
99
+ }
100
+ .concept-container {
101
+ display: flex;
102
+ justify-content: center;
103
+ gap: 20px;
104
+ margin-top: 30px;
105
+ flex-wrap: wrap;
106
+ }
107
+ .concept {
108
+ position: relative;
109
+ transition: transform 0.3s, box-shadow 0.3s;
110
+ border-radius: 15px;
111
+ overflow: hidden;
112
+ background: rgba(255, 255, 255, 0.1);
113
+ box-shadow: 0 5px 15px rgba(0,0,0,0.2);
114
+ width: 150px;
115
+ height: 150px;
116
+ display: flex;
117
+ flex-direction: column;
118
+ justify-content: center;
119
+ align-items: center;
120
+ }
121
+ .concept:hover {
122
+ transform: translateY(-10px) rotate(3deg);
123
+ box-shadow: 0 15px 30px rgba(0,0,0,0.4);
124
+ }
125
+ .concept-emoji {
126
+ font-size: 60px;
127
+ margin-bottom: 10px;
128
+ }
129
+ .concept-description {
130
+ background-color: rgba(110, 72, 170, 0.8);
131
+ color: white;
132
+ padding: 10px;
133
+ font-size: 0.9em;
134
+ text-align: center;
135
+ width: 100%;
136
+ position: absolute;
137
+ bottom: 0;
138
+ }
139
+ .artifact {
140
+ position: absolute;
141
+ background: radial-gradient(circle, rgba(255,255,255,0.1) 0%, rgba(255,255,255,0) 70%);
142
+ border-radius: 50%;
143
+ opacity: 0.5;
144
+ pointer-events: none;
145
+ }
146
+ .artifact.large {
147
+ width: 400px;
148
+ height: 400px;
149
+ top: -100px;
150
+ left: -200px;
151
+ animation: float 20s infinite ease-in-out;
152
+ }
153
+ .artifact.medium {
154
+ width: 300px;
155
+ height: 300px;
156
+ bottom: -150px;
157
+ right: -150px;
158
+ animation: float 15s infinite ease-in-out reverse;
159
+ }
160
+ .artifact.small {
161
+ width: 150px;
162
+ height: 150px;
163
+ top: 50%;
164
+ left: 50%;
165
+ transform: translate(-50%, -50%);
166
+ animation: pulse 5s infinite alternate;
167
+ }
168
+ @keyframes float {
169
+ 0%, 100% { transform: translateY(0) rotate(0deg); }
170
+ 50% { transform: translateY(-20px) rotate(10deg); }
171
+ }
172
+ @keyframes pulse {
173
+ 0% { transform: translate(-50%, -50%) scale(1); opacity: 0.5; }
174
+ 100% { transform: translate(-50%, -50%) scale(1.1); opacity: 0.8; }
175
+ }
176
+ /* Gradio component styling */
177
+ .gr-box {
178
+ background-color: rgba(255, 255, 255, 0.1) !important;
179
+ border: 1px solid rgba(255, 255, 255, 0.2) !important;
180
+ }
181
+ .gr-input, .gr-button {
182
+ background-color: rgba(255, 255, 255, 0.1) !important;
183
+ color: #f2e9e4 !important;
184
+ border: 1px solid rgba(255, 255, 255, 0.2) !important;
185
+ }
186
+ .gr-button:hover {
187
+ background-color: rgba(255, 255, 255, 0.2) !important;
188
+ }
189
+ .gr-form {
190
+ background-color: transparent !important;
191
+ }
192
+ </style>
193
+ """
194
+
195
+ # Custom HTML
196
+ custom_html = """
197
+ <div id="app-header">
198
+ <div class="artifact large"></div>
199
+ <div class="artifact medium"></div>
200
+ <div class="artifact small"></div>
201
+ <h1>Visual Math Problem Solver</h1>
202
+ <p>Upload an image of a math problem, and I'll try to solve it step by step!</p>
203
+ <div class="concept-container">
204
+ <div class="concept">
205
+ <div class="concept-emoji">🧮</div>
206
+ <div class="concept-description">Problem Solving</div>
207
+ </div>
208
+ <div class="concept">
209
+ <div class="concept-emoji">📷</div>
210
+ <div class="concept-description">Image Recognition</div>
211
+ </div>
212
+ <div class="concept">
213
+ <div class="concept-emoji">🤖</div>
214
+ <div class="concept-description">AI-Powered</div>
215
+ </div>
216
+ <div class="concept">
217
+ <div class="concept-emoji">📝</div>
218
+ <div class="concept-description">Step-by-Step</div>
219
+ </div>
220
+ </div>
221
+ </div>
222
+ """
223
+
224
  # Create the Gradio interface
225
+ with gr.Blocks(css=custom_css) as iface:
226
+ gr.HTML(custom_html)
227
+ with gr.Row():
228
+ with gr.Column(scale=1):
229
+ input_image = gr.Image(type="pil", label="Upload Math Problem Image")
230
+ submit_btn = gr.Button("Solve Problem")
231
+ with gr.Column(scale=1):
232
+ output_text = gr.Textbox(label="Solution", lines=10)
233
+
234
+ submit_btn.click(fn=solve_math_problem, inputs=input_image, outputs=output_text)
235
+
236
+ gr.Examples(
237
+ examples=[
238
+ ["example_math_problem1.jpg"],
239
+ ["example_math_problem2.jpg"]
240
+ ],
241
+ inputs=input_image,
242
+ outputs=output_text,
243
+ fn=solve_math_problem,
244
+ cache_examples=True,
245
+ )
246
 
247
  # Launch the app
248
  iface.launch()