Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import spaces | |
from PIL import Image | |
import requests | |
from transformers import AutoModelForCausalLM, AutoProcessor | |
import torch | |
import subprocess | |
from io import BytesIO | |
# Install flash-attn | |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True) | |
# Load the model and processor | |
model_id = "microsoft/Phi-3.5-vision-instruct" | |
model = AutoModelForCausalLM.from_pretrained( | |
model_id, | |
trust_remote_code=True, | |
torch_dtype=torch.float16, | |
use_flash_attention_2=False, # Explicitly disable Flash Attention 2 | |
) | |
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True, num_crops=16) | |
# Adjust the duration as needed | |
def solve_math_problem(image): | |
# Move model to GPU for this function call | |
model.to('cuda') | |
# Prepare the input | |
messages = [ | |
{"role": "user", "content": "<|image_1|>\nSolve this math problem step by step. Explain your reasoning clearly."}, | |
] | |
prompt = processor.tokenizer.apply_chat_template( | |
messages, tokenize=False, add_generation_prompt=True | |
) | |
# Process the input | |
inputs = processor(prompt, image, return_tensors="pt").to("cuda") | |
# Generate the response | |
generation_args = { | |
"max_new_tokens": 1000, | |
"temperature": 0.2, | |
"do_sample": True, | |
} | |
generate_ids = model.generate(**inputs, eos_token_id=processor.tokenizer.eos_token_id, **generation_args) | |
# Decode the response | |
generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:] | |
response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] | |
# Move model back to CPU to free up GPU memory | |
model.to('cpu') | |
return response | |
# Function to load image from URL | |
def load_image_from_url(url): | |
response = requests.get(url) | |
img = Image.open(BytesIO(response.content)) | |
return img | |
# Custom CSS | |
custom_css = """ | |
<style> | |
body { | |
background: linear-gradient(135deg, #1a1c2c, #4a4e69, #9a8c98); | |
font-family: 'Arial', sans-serif; | |
color: #f2e9e4; | |
margin: 0; | |
padding: 0; | |
min-height: 100vh; | |
} | |
#app-header { | |
text-align: center; | |
background: rgba(255, 255, 255, 0.1); | |
padding: 30px; | |
border-radius: 20px; | |
box-shadow: 0 10px 30px rgba(0, 0, 0, 0.3); | |
position: relative; | |
overflow: hidden; | |
margin: 20px auto; | |
max-width: 800px; | |
} | |
#app-header::before { | |
content: ""; | |
position: absolute; | |
top: -50%; | |
left: -50%; | |
width: 200%; | |
height: 200%; | |
background: radial-gradient(circle, rgba(255,255,255,0.1) 0%, rgba(255,255,255,0) 70%); | |
animation: shimmer 15s infinite linear; | |
} | |
@keyframes shimmer { | |
0% { transform: rotate(0deg); } | |
100% { transform: rotate(360deg); } | |
} | |
#app-header h1 { | |
color: #f2e9e4; | |
font-size: 2.5em; | |
margin-bottom: 15px; | |
text-shadow: 2px 2px 4px rgba(0,0,0,0.5); | |
} | |
#app-header p { | |
font-size: 1.2em; | |
color: #c9ada7; | |
} | |
.concept-container { | |
display: flex; | |
justify-content: center; | |
gap: 20px; | |
margin-top: 30px; | |
flex-wrap: wrap; | |
} | |
.concept { | |
position: relative; | |
transition: transform 0.3s, box-shadow 0.3s; | |
border-radius: 15px; | |
overflow: hidden; | |
background: rgba(255, 255, 255, 0.1); | |
box-shadow: 0 5px 15px rgba(0,0,0,0.2); | |
width: 150px; | |
height: 150px; | |
display: flex; | |
flex-direction: column; | |
justify-content: center; | |
align-items: center; | |
} | |
.concept:hover { | |
transform: translateY(-10px) rotate(3deg); | |
box-shadow: 0 15px 30px rgba(0,0,0,0.4); | |
} | |
.concept-emoji { | |
font-size: 60px; | |
margin-bottom: 10px; | |
} | |
.concept-description { | |
background-color: rgba(110, 72, 170, 0.8); | |
color: white; | |
padding: 10px; | |
font-size: 0.9em; | |
text-align: center; | |
width: 100%; | |
position: absolute; | |
bottom: 0; | |
} | |
.artifact { | |
position: absolute; | |
background: radial-gradient(circle, rgba(255,255,255,0.1) 0%, rgba(255,255,255,0) 70%); | |
border-radius: 50%; | |
opacity: 0.5; | |
pointer-events: none; | |
} | |
.artifact.large { | |
width: 400px; | |
height: 400px; | |
top: -100px; | |
left: -200px; | |
animation: float 20s infinite ease-in-out; | |
} | |
.artifact.medium { | |
width: 300px; | |
height: 300px; | |
bottom: -150px; | |
right: -150px; | |
animation: float 15s infinite ease-in-out reverse; | |
} | |
.artifact.small { | |
width: 150px; | |
height: 150px; | |
top: 50%; | |
left: 50%; | |
transform: translate(-50%, -50%); | |
animation: pulse 5s infinite alternate; | |
} | |
@keyframes float { | |
0%, 100% { transform: translateY(0) rotate(0deg); } | |
50% { transform: translateY(-20px) rotate(10deg); } | |
} | |
@keyframes pulse { | |
0% { transform: translate(-50%, -50%) scale(1); opacity: 0.5; } | |
100% { transform: translate(-50%, -50%) scale(1.1); opacity: 0.8; } | |
} | |
/* Gradio component styling */ | |
.gr-box { | |
background-color: rgba(255, 255, 255, 0.1) !important; | |
border: 1px solid rgba(255, 255, 255, 0.2) !important; | |
} | |
.gr-input, .gr-button { | |
background-color: rgba(255, 255, 255, 0.1) !important; | |
color: #f2e9e4 !important; | |
border: 1px solid rgba(255, 255, 255, 0.2) !important; | |
} | |
.gr-button:hover { | |
background-color: rgba(255, 255, 255, 0.2) !important; | |
} | |
.gr-form { | |
background-color: transparent !important; | |
} | |
</style> | |
""" | |
# Custom HTML | |
custom_html = """ | |
<div id="app-header"> | |
<div class="artifact large"></div> | |
<div class="artifact medium"></div> | |
<div class="artifact small"></div> | |
<h1>Visual Math Problem Solver</h1> | |
<p>Upload an image of a math problem, and I'll try to solve it step by step!</p> | |
<div class="concept-container"> | |
<div class="concept"> | |
<div class="concept-emoji">๐งฎ</div> | |
<div class="concept-description">Problem Solving</div> | |
</div> | |
<div class="concept"> | |
<div class="concept-emoji">๐ท</div> | |
<div class="concept-description">Image Recognition</div> | |
</div> | |
<div class="concept"> | |
<div class="concept-emoji">๐ค</div> | |
<div class="concept-description">AI-Powered</div> | |
</div> | |
<div class="concept"> | |
<div class="concept-emoji">๐</div> | |
<div class="concept-description">Step-by-Step</div> | |
</div> | |
</div> | |
</div> | |
""" | |
# Create the Gradio interface | |
with gr.Blocks(css=custom_css) as iface: | |
gr.HTML(custom_html) | |
with gr.Row(): | |
with gr.Column(scale=1): | |
input_image = gr.Image(type="pil", label="Upload Math Problem Image") | |
submit_btn = gr.Button("Solve Problem") | |
with gr.Column(scale=1): | |
output_text = gr.Textbox(label="Solution", lines=10) | |
submit_btn.click(fn=solve_math_problem, inputs=input_image, outputs=output_text) | |
gr.Examples( | |
examples=[ | |
"https://i.imgur.com/2Gwd3bN.jpg", # Replace with actual URLs of math problem images | |
"https://i.imgur.com/wPw5YtB.jpg" | |
], | |
inputs=input_image, | |
outputs=output_text, | |
fn=lambda url: solve_math_problem(load_image_from_url(url)), | |
cache_examples=True, | |
) | |
# Launch the app | |
iface.launch() |