Spaces:

Anuji
/

OCR-app

Sleeping

App Files Files Community

Anuji commited on Apr 3

Commit

3db54ae

verified ·

1 Parent(s): 92affd7

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -28

app.py CHANGED Viewed

@@ -1,33 +1,31 @@
-import os
 import torch
 import streamlit as st
 from PIL import Image
 from deepseek_vl2.serve.inference import load_model, deepseek_generate, convert_conversation_to_prompts
-from deepseek_vl2.models.conversation import SeparatorStyle
 from deepseek_vl2.serve.app_modules.utils import configure_logger, strip_stop_words, pil_to_base64
-# Initialize logger
 logger = configure_logger()
-# Global variables for model loading
 MODELS = ["deepseek-ai/deepseek-vl2-tiny"]
 DEPLOY_MODELS = {}
 IMAGE_TOKEN = "<image>"
-# Load model function
 def fetch_model(model_name: str, dtype=torch.bfloat16):
     global DEPLOY_MODELS
     if model_name not in DEPLOY_MODELS:
-        st.write(f"Loading {model_name}...")
         model_info = load_model(model_name, dtype=dtype)
         tokenizer, model, vl_chat_processor = model_info
         device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         model = model.to(device)
         DEPLOY_MODELS[model_name] = (tokenizer, model, vl_chat_processor)
-        st.write(f"Loaded {model_name} on {device}")
     return DEPLOY_MODELS[model_name]
-# Generate prompt with conversation history
 def generate_prompt_with_history(text, images, history, vl_chat_processor, tokenizer, max_length=2048):
     conversation = vl_chat_processor.new_chat_template()
     if history:
@@ -39,7 +37,7 @@ def generate_prompt_with_history(text, images, history, vl_chat_processor, token
     conversation.append_message(conversation.roles[1], "")
     return conversation
-# Convert conversation to Gradio-compatible format
 def to_gradio_chatbot(conv):
     ret = []
     for i, (role, msg) in enumerate(conv.messages[conv.offset:]):
@@ -54,12 +52,15 @@ def to_gradio_chatbot(conv):
             ret[-1][-1] = msg
     return ret
-# Prediction function
 def predict(text, images, chatbot, history, model_name="deepseek-ai/deepseek-vl2-tiny"):
     tokenizer, vl_gpt, vl_chat_processor = fetch_model(model_name)
     if not text:
         return chatbot, history, "Empty context."
     pil_images = [Image.open(img).convert("RGB") for img in images] if images else []
     conversation = generate_prompt_with_history(
         text, pil_images, history, vl_chat_processor, tokenizer
@@ -69,6 +70,7 @@ def predict(text, images, chatbot, history, model_name="deepseek-ai/deepseek-vl2
     gradio_chatbot_output = to_gradio_chatbot(conversation)
     full_response = ""
     try:
         with torch.no_grad():
             for x in deepseek_generate(
@@ -86,31 +88,35 @@ def predict(text, images, chatbot, history, model_name="deepseek-ai/deepseek-vl2
                 response = strip_stop_words(full_response, stop_words)
                 conversation.update_last_message(response)
                 gradio_chatbot_output[-1][1] = response
                 yield gradio_chatbot_output, conversation.messages, "Generating..."
         torch.cuda.empty_cache()
         yield gradio_chatbot_output, conversation.messages, "Success"
     except Exception as e:
         yield gradio_chatbot_output, conversation.messages, f"Error: {str(e)}"
-# Streamlit UI setup
-st.title("DeepSeek-VL2 OCR in Colab")
-st.write("Upload an image and get the extracted text.")
-# Image upload
-image_input = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg"])
-# Output text
-output_text = st.text_area("Extracted Text", "")
-# Handle the image upload and processing
-if image_input:
-    prompt = "Extract all text from this image exactly as it appears, ensuring the output is in English only."
     chatbot = []
     history = []
-    for chatbot_output, history_output, status in predict(prompt, [image_input], chatbot, history):
         if status == "Success":
-            output_text = chatbot_output[-1][1]
-            st.write("Extracted Text:", output_text)
-        else:
-            st.error(f"Error: {status}")

 import torch
 import streamlit as st
 from PIL import Image
 from deepseek_vl2.serve.inference import load_model, deepseek_generate, convert_conversation_to_prompts
 from deepseek_vl2.serve.app_modules.utils import configure_logger, strip_stop_words, pil_to_base64
+# Set up logging
 logger = configure_logger()
+# Models and deployment
 MODELS = ["deepseek-ai/deepseek-vl2-tiny"]
 DEPLOY_MODELS = {}
 IMAGE_TOKEN = "<image>"
+# Fetch model
 def fetch_model(model_name: str, dtype=torch.bfloat16):
     global DEPLOY_MODELS
     if model_name not in DEPLOY_MODELS:
+        logger.info(f"Loading {model_name}...")
         model_info = load_model(model_name, dtype=dtype)
         tokenizer, model, vl_chat_processor = model_info
         device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         model = model.to(device)
         DEPLOY_MODELS[model_name] = (tokenizer, model, vl_chat_processor)
+        logger.info(f"Loaded {model_name} on {device}")
     return DEPLOY_MODELS[model_name]
+# Generate prompt with history
 def generate_prompt_with_history(text, images, history, vl_chat_processor, tokenizer, max_length=2048):
     conversation = vl_chat_processor.new_chat_template()
     if history:
     conversation.append_message(conversation.roles[1], "")
     return conversation
+# Convert conversation to gradio format
 def to_gradio_chatbot(conv):
     ret = []
     for i, (role, msg) in enumerate(conv.messages[conv.offset:]):
             ret[-1][-1] = msg
     return ret
+# Predict function
 def predict(text, images, chatbot, history, model_name="deepseek-ai/deepseek-vl2-tiny"):
+    logger.info("Starting predict function...")
     tokenizer, vl_gpt, vl_chat_processor = fetch_model(model_name)
     if not text:
+        logger.warning("Empty text input detected.")
         return chatbot, history, "Empty context."
+    logger.info("Processing images...")
     pil_images = [Image.open(img).convert("RGB") for img in images] if images else []
     conversation = generate_prompt_with_history(
         text, pil_images, history, vl_chat_processor, tokenizer
     gradio_chatbot_output = to_gradio_chatbot(conversation)
     full_response = ""
+    logger.info("Generating response...")
     try:
         with torch.no_grad():
             for x in deepseek_generate(
                 response = strip_stop_words(full_response, stop_words)
                 conversation.update_last_message(response)
                 gradio_chatbot_output[-1][1] = response
+                logger.info(f"Yielding partial response: {response[:50]}...")
                 yield gradio_chatbot_output, conversation.messages, "Generating..."
+        logger.info("Generation complete.")
         torch.cuda.empty_cache()
         yield gradio_chatbot_output, conversation.messages, "Success"
     except Exception as e:
+        logger.error(f"Error in generation: {str(e)}")
         yield gradio_chatbot_output, conversation.messages, f"Error: {str(e)}"
+# Streamlit OCR app interface
+def upload_and_process(image):
+    if image is None:
+        return "Please upload an image.", []
+    prompt = "Extract all text from this image exactly as it appears, ensuring the output is in English only. Preserve spaces, bullets, numbers, and all formatting. Do not translate, generate, or include text in any other language. Stop at the last character of the image text."
     chatbot = []
     history = []
+    logger.info("Starting upload_and_process...")
+    for chatbot_output, history_output, status in predict(prompt, [image], chatbot, history):
+        logger.info(f"Status: {status}")
         if status == "Success":
+            return chatbot_output[-1][1], history_output
+    return "Processing failed.", []
+# Streamlit UI
+st.title("DeepSeek-VL2 OCR with Streamlit")
+image_input = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg"])
+output_text = st.text_area("Extracted Text", height=300)
+if image_input:
+    output, _ = upload_and_process(image_input)
+    output_text.write(output)