Spaces:

sagar007
/

phi-vision-math-assistant

Paused

App Files Files Community

phi-vision-math-assistant / app.py

sagar007

Update app.py

ae24526 verified 10 months ago

raw

history blame

7.74 kB

	import gradio as gr
	import spaces
	from PIL import Image
	import requests
	from transformers import AutoModelForCausalLM, AutoProcessor
	import torch
	import subprocess
	from io import BytesIO

	# Install flash-attn
	subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)

	# Load the model and processor
	model_id = "microsoft/Phi-3.5-vision-instruct"
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	trust_remote_code=True,
	torch_dtype=torch.float16,
	use_flash_attention_2=False, # Explicitly disable Flash Attention 2
	)
	processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True, num_crops=16)

	@spaces.GPU(duration=120) # Adjust the duration as needed
	def solve_math_problem(image):
	# Move model to GPU for this function call
	model.to('cuda')

	# Prepare the input
	messages = [
	{"role": "user", "content": "<\|image_1\|>\nSolve this math problem step by step. Explain your reasoning clearly."},
	]
	prompt = processor.tokenizer.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True
	)

	# Process the input
	inputs = processor(prompt, image, return_tensors="pt").to("cuda")

	# Generate the response
	generation_args = {
	"max_new_tokens": 1000,
	"temperature": 0.2,
	"do_sample": True,
	}
	generate_ids = model.generate(inputs, eos_token_id=processor.tokenizer.eos_token_id, generation_args)

	# Decode the response
	generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
	response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]

	# Move model back to CPU to free up GPU memory
	model.to('cpu')
	return response

	# Function to load image from URL
	def load_image_from_url(url):
	response = requests.get(url)
	img = Image.open(BytesIO(response.content))
	return img

	# Custom CSS
	custom_css = """
	<style>
	body {
	background: linear-gradient(135deg, #1a1c2c, #4a4e69, #9a8c98);
	font-family: 'Arial', sans-serif;
	color: #f2e9e4;
	margin: 0;
	padding: 0;
	min-height: 100vh;
	}
	#app-header {
	text-align: center;
	background: rgba(255, 255, 255, 0.1);
	padding: 30px;
	border-radius: 20px;
	box-shadow: 0 10px 30px rgba(0, 0, 0, 0.3);
	position: relative;
	overflow: hidden;
	margin: 20px auto;
	max-width: 800px;
	}
	#app-header::before {
	content: "";
	position: absolute;
	top: -50%;
	left: -50%;
	width: 200%;
	height: 200%;
	background: radial-gradient(circle, rgba(255,255,255,0.1) 0%, rgba(255,255,255,0) 70%);
	animation: shimmer 15s infinite linear;
	}
	@keyframes shimmer {
	0% { transform: rotate(0deg); }
	100% { transform: rotate(360deg); }
	}
	#app-header h1 {
	color: #f2e9e4;
	font-size: 2.5em;
	margin-bottom: 15px;
	text-shadow: 2px 2px 4px rgba(0,0,0,0.5);
	}
	#app-header p {
	font-size: 1.2em;
	color: #c9ada7;
	}
	.concept-container {
	display: flex;
	justify-content: center;
	gap: 20px;
	margin-top: 30px;
	flex-wrap: wrap;
	}
	.concept {
	position: relative;
	transition: transform 0.3s, box-shadow 0.3s;
	border-radius: 15px;
	overflow: hidden;
	background: rgba(255, 255, 255, 0.1);
	box-shadow: 0 5px 15px rgba(0,0,0,0.2);
	width: 150px;
	height: 150px;
	display: flex;
	flex-direction: column;
	justify-content: center;
	align-items: center;
	}
	.concept:hover {
	transform: translateY(-10px) rotate(3deg);
	box-shadow: 0 15px 30px rgba(0,0,0,0.4);
	}
	.concept-emoji {
	font-size: 60px;
	margin-bottom: 10px;
	}
	.concept-description {
	background-color: rgba(110, 72, 170, 0.8);
	color: white;
	padding: 10px;
	font-size: 0.9em;
	text-align: center;
	width: 100%;
	position: absolute;
	bottom: 0;
	}
	.artifact {
	position: absolute;
	background: radial-gradient(circle, rgba(255,255,255,0.1) 0%, rgba(255,255,255,0) 70%);
	border-radius: 50%;
	opacity: 0.5;
	pointer-events: none;
	}
	.artifact.large {
	width: 400px;
	height: 400px;
	top: -100px;
	left: -200px;
	animation: float 20s infinite ease-in-out;
	}
	.artifact.medium {
	width: 300px;
	height: 300px;
	bottom: -150px;
	right: -150px;
	animation: float 15s infinite ease-in-out reverse;
	}
	.artifact.small {
	width: 150px;
	height: 150px;
	top: 50%;
	left: 50%;
	transform: translate(-50%, -50%);
	animation: pulse 5s infinite alternate;
	}
	@keyframes float {
	0%, 100% { transform: translateY(0) rotate(0deg); }
	50% { transform: translateY(-20px) rotate(10deg); }
	}
	@keyframes pulse {
	0% { transform: translate(-50%, -50%) scale(1); opacity: 0.5; }
	100% { transform: translate(-50%, -50%) scale(1.1); opacity: 0.8; }
	}
	/* Gradio component styling */
	.gr-box {
	background-color: rgba(255, 255, 255, 0.1) !important;
	border: 1px solid rgba(255, 255, 255, 0.2) !important;
	}
	.gr-input, .gr-button {
	background-color: rgba(255, 255, 255, 0.1) !important;
	color: #f2e9e4 !important;
	border: 1px solid rgba(255, 255, 255, 0.2) !important;
	}
	.gr-button:hover {
	background-color: rgba(255, 255, 255, 0.2) !important;
	}
	.gr-form {
	background-color: transparent !important;
	}
	</style>
	"""

	# Custom HTML
	custom_html = """
	<div id="app-header">
	<div class="artifact large"></div>
	<div class="artifact medium"></div>
	<div class="artifact small"></div>
	<h1>Visual Math Problem Solver</h1>
	<p>Upload an image of a math problem, and I'll try to solve it step by step!</p>
	<div class="concept-container">
	<div class="concept">
	<div class="concept-emoji">🧮</div>
	<div class="concept-description">Problem Solving</div>
	</div>
	<div class="concept">
	<div class="concept-emoji">📷</div>
	<div class="concept-description">Image Recognition</div>
	</div>
	<div class="concept">
	<div class="concept-emoji">🤖</div>
	<div class="concept-description">AI-Powered</div>
	</div>
	<div class="concept">
	<div class="concept-emoji">📝</div>
	<div class="concept-description">Step-by-Step</div>
	</div>
	</div>
	</div>
	"""


	# Create the Gradio interface
	with gr.Blocks(css=custom_css) as iface:
	gr.HTML(custom_html)
	with gr.Row():
	with gr.Column(scale=1):
	input_image = gr.Image(type="pil", label="Upload Math Problem Image")
	submit_btn = gr.Button("Solve Problem")
	with gr.Column(scale=1):
	output_text = gr.Textbox(label="Solution", lines=10)

	submit_btn.click(fn=solve_math_problem, inputs=input_image, outputs=output_text)

	gr.Examples(
	examples=[
	"https://i.imgur.com/2Gwd3bN.jpg", # Replace with actual URLs of math problem images
	"https://i.imgur.com/wPw5YtB.jpg"
	],
	inputs=input_image,
	outputs=output_text,
	fn=lambda url: solve_math_problem(load_image_from_url(url)),
	cache_examples=True,
	)

	# Launch the app
	iface.launch()