Spaces:

sagar007
/

phi-vision-math-assistant

Paused

App Files Files Community

sagar007 commited on Aug 23, 2024

Commit

63b02f7

verified ·

1 Parent(s): 11a9884

Update app.py

Browse files

Files changed (1) hide show

app.py +196 -24

app.py CHANGED Viewed

@@ -5,11 +5,12 @@ import requests
 from transformers import AutoModelForCausalLM, AutoProcessor
 import torch
 import subprocess
 subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 # Load the model and processor
 model_id = "microsoft/Phi-3.5-vision-instruct"
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     trust_remote_code=True,
@@ -28,9 +29,7 @@ def solve_math_problem(image):
         {"role": "user", "content": "<|image_1|>\nSolve this math problem step by step. Explain your reasoning clearly."},
     ]
     prompt = processor.tokenizer.apply_chat_template(
-        messages,
-        tokenize=False,
-        add_generation_prompt=True
     )
     # Process the input
@@ -42,35 +41,208 @@ def solve_math_problem(image):
         "temperature": 0.2,
         "do_sample": True,
     }
-    generate_ids = model.generate(**inputs,
-        eos_token_id=processor.tokenizer.eos_token_id,
-        **generation_args
-    )
     # Decode the response
     generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
-    response = processor.batch_decode(generate_ids,
-        skip_special_tokens=True,
-        clean_up_tokenization_spaces=False
-    )[0]
     # Move model back to CPU to free up GPU memory
     model.to('cpu')
     return response
 # Create the Gradio interface
-iface = gr.Interface(
-    fn=solve_math_problem,
-    inputs=gr.Image(type="pil"),
-    outputs="text",
-    title="Visual Math Problem Solver",
-    description="Upload an image of a math problem, and I'll try to solve it step by step!",
-    examples=[
-        ["example_math_problem1.jpg"],
-        ["example_math_problem2.jpg"]
-    ]
-)
 # Launch the app
 iface.launch()

 from transformers import AutoModelForCausalLM, AutoProcessor
 import torch
 import subprocess
+# Install flash-attn
 subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 # Load the model and processor
 model_id = "microsoft/Phi-3.5-vision-instruct"
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     trust_remote_code=True,
         {"role": "user", "content": "<|image_1|>\nSolve this math problem step by step. Explain your reasoning clearly."},
     ]
     prompt = processor.tokenizer.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
     )
     # Process the input
         "temperature": 0.2,
         "do_sample": True,
     }
+    generate_ids = model.generate(**inputs, eos_token_id=processor.tokenizer.eos_token_id, **generation_args)
     # Decode the response
     generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
+    response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
     # Move model back to CPU to free up GPU memory
     model.to('cpu')
     return response
+# Custom CSS
+custom_css = """
+<style>
+    body {
+        background: linear-gradient(135deg, #1a1c2c, #4a4e69, #9a8c98);
+        font-family: 'Arial', sans-serif;
+        color: #f2e9e4;
+        margin: 0;
+        padding: 0;
+        min-height: 100vh;
+    }
+    #app-header {
+        text-align: center;
+        background: rgba(255, 255, 255, 0.1);
+        padding: 30px;
+        border-radius: 20px;
+        box-shadow: 0 10px 30px rgba(0, 0, 0, 0.3);
+        position: relative;
+        overflow: hidden;
+        margin: 20px auto;
+        max-width: 800px;
+    }
+    #app-header::before {
+        content: "";
+        position: absolute;
+        top: -50%;
+        left: -50%;
+        width: 200%;
+        height: 200%;
+        background: radial-gradient(circle, rgba(255,255,255,0.1) 0%, rgba(255,255,255,0) 70%);
+        animation: shimmer 15s infinite linear;
+    }
+    @keyframes shimmer {
+        0% { transform: rotate(0deg); }
+        100% { transform: rotate(360deg); }
+    }
+    #app-header h1 {
+        color: #f2e9e4;
+        font-size: 2.5em;
+        margin-bottom: 15px;
+        text-shadow: 2px 2px 4px rgba(0,0,0,0.5);
+    }
+    #app-header p {
+        font-size: 1.2em;
+        color: #c9ada7;
+    }
+    .concept-container {
+        display: flex;
+        justify-content: center;
+        gap: 20px;
+        margin-top: 30px;
+        flex-wrap: wrap;
+    }
+    .concept {
+        position: relative;
+        transition: transform 0.3s, box-shadow 0.3s;
+        border-radius: 15px;
+        overflow: hidden;
+        background: rgba(255, 255, 255, 0.1);
+        box-shadow: 0 5px 15px rgba(0,0,0,0.2);
+        width: 150px;
+        height: 150px;
+        display: flex;
+        flex-direction: column;
+        justify-content: center;
+        align-items: center;
+    }
+    .concept:hover {
+        transform: translateY(-10px) rotate(3deg);
+        box-shadow: 0 15px 30px rgba(0,0,0,0.4);
+    }
+    .concept-emoji {
+        font-size: 60px;
+        margin-bottom: 10px;
+    }
+    .concept-description {
+        background-color: rgba(110, 72, 170, 0.8);
+        color: white;
+        padding: 10px;
+        font-size: 0.9em;
+        text-align: center;
+        width: 100%;
+        position: absolute;
+        bottom: 0;
+    }
+    .artifact {
+        position: absolute;
+        background: radial-gradient(circle, rgba(255,255,255,0.1) 0%, rgba(255,255,255,0) 70%);
+        border-radius: 50%;
+        opacity: 0.5;
+        pointer-events: none;
+    }
+    .artifact.large {
+        width: 400px;
+        height: 400px;
+        top: -100px;
+        left: -200px;
+        animation: float 20s infinite ease-in-out;
+    }
+    .artifact.medium {
+        width: 300px;
+        height: 300px;
+        bottom: -150px;
+        right: -150px;
+        animation: float 15s infinite ease-in-out reverse;
+    }
+    .artifact.small {
+        width: 150px;
+        height: 150px;
+        top: 50%;
+        left: 50%;
+        transform: translate(-50%, -50%);
+        animation: pulse 5s infinite alternate;
+    }
+    @keyframes float {
+        0%, 100% { transform: translateY(0) rotate(0deg); }
+        50% { transform: translateY(-20px) rotate(10deg); }
+    }
+    @keyframes pulse {
+        0% { transform: translate(-50%, -50%) scale(1); opacity: 0.5; }
+        100% { transform: translate(-50%, -50%) scale(1.1); opacity: 0.8; }
+    }
+    /* Gradio component styling */
+    .gr-box {
+        background-color: rgba(255, 255, 255, 0.1) !important;
+        border: 1px solid rgba(255, 255, 255, 0.2) !important;
+    }
+    .gr-input, .gr-button {
+        background-color: rgba(255, 255, 255, 0.1) !important;
+        color: #f2e9e4 !important;
+        border: 1px solid rgba(255, 255, 255, 0.2) !important;
+    }
+    .gr-button:hover {
+        background-color: rgba(255, 255, 255, 0.2) !important;
+    }
+    .gr-form {
+        background-color: transparent !important;
+    }
+</style>
+"""
+# Custom HTML
+custom_html = """
+<div id="app-header">
+    <div class="artifact large"></div>
+    <div class="artifact medium"></div>
+    <div class="artifact small"></div>
+    <h1>Visual Math Problem Solver</h1>
+    <p>Upload an image of a math problem, and I'll try to solve it step by step!</p>
+    <div class="concept-container">
+        <div class="concept">
+            <div class="concept-emoji">🧮</div>
+            <div class="concept-description">Problem Solving</div>
+        </div>
+        <div class="concept">
+            <div class="concept-emoji">📷</div>
+            <div class="concept-description">Image Recognition</div>
+        </div>
+        <div class="concept">
+            <div class="concept-emoji">🤖</div>
+            <div class="concept-description">AI-Powered</div>
+        </div>
+        <div class="concept">
+            <div class="concept-emoji">📝</div>
+            <div class="concept-description">Step-by-Step</div>
+        </div>
+    </div>
+</div>
+"""
 # Create the Gradio interface
+with gr.Blocks(css=custom_css) as iface:
+    gr.HTML(custom_html)
+    with gr.Row():
+        with gr.Column(scale=1):
+            input_image = gr.Image(type="pil", label="Upload Math Problem Image")
+            submit_btn = gr.Button("Solve Problem")
+        with gr.Column(scale=1):
+            output_text = gr.Textbox(label="Solution", lines=10)
+    submit_btn.click(fn=solve_math_problem, inputs=input_image, outputs=output_text)
+    gr.Examples(
+        examples=[
+            ["example_math_problem1.jpg"],
+            ["example_math_problem2.jpg"]
+        ],
+        inputs=input_image,
+        outputs=output_text,
+        fn=solve_math_problem,
+        cache_examples=True,
+    )
 # Launch the app
 iface.launch()