Anuji commited on
Commit
09a5bc7
·
verified ·
1 Parent(s): e59e1f3
Files changed (1) hide show
  1. app.py +27 -52
app.py CHANGED
@@ -1,49 +1,33 @@
1
  import os
2
- import subprocess
3
- import sys
4
  import torch
5
- import gradio as gr
6
  from PIL import Image
7
  from deepseek_vl2.serve.inference import load_model, deepseek_generate, convert_conversation_to_prompts
8
  from deepseek_vl2.models.conversation import SeparatorStyle
9
  from deepseek_vl2.serve.app_modules.utils import configure_logger, strip_stop_words, pil_to_base64
10
- from google.colab import files
11
 
12
- # Clone the DeepSeek-VL2 repository if not already present
13
- repo_dir = "/content/DeepSeek-VL2"
14
- if not os.path.exists(repo_dir):
15
- subprocess.run(["git", "clone", "https://github.com/deepseek-ai/DeepSeek-VL2"], check=True)
16
- sys.path.append(repo_dir) # Add the DeepSeek-VL2 repository to the Python path
17
- else:
18
- print("Repository already cloned.")
19
-
20
- # Step 2: Install dependencies if not already installed
21
- subprocess.run([sys.executable, "-m", "pip", "install", "-r", "/content/DeepSeek-VL2/requirements.txt"])
22
-
23
- # Step 3: Verify GPU (Optional)
24
- print("CUDA Available:", torch.cuda.is_available())
25
- print("Device:", torch.cuda.current_device() if torch.cuda.is_available() else "CPU")
26
- print("Device Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "N/A")
27
-
28
- # Step 4: Define your model and prediction functions
29
  logger = configure_logger()
30
 
 
31
  MODELS = ["deepseek-ai/deepseek-vl2-tiny"]
32
  DEPLOY_MODELS = {}
33
  IMAGE_TOKEN = "<image>"
34
 
 
35
  def fetch_model(model_name: str, dtype=torch.bfloat16):
36
  global DEPLOY_MODELS
37
  if model_name not in DEPLOY_MODELS:
38
- print(f"Loading {model_name}...")
39
  model_info = load_model(model_name, dtype=dtype)
40
  tokenizer, model, vl_chat_processor = model_info
41
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
42
  model = model.to(device)
43
  DEPLOY_MODELS[model_name] = (tokenizer, model, vl_chat_processor)
44
- print(f"Loaded {model_name} on {device}")
45
  return DEPLOY_MODELS[model_name]
46
 
 
47
  def generate_prompt_with_history(text, images, history, vl_chat_processor, tokenizer, max_length=2048):
48
  conversation = vl_chat_processor.new_chat_template()
49
  if history:
@@ -55,6 +39,7 @@ def generate_prompt_with_history(text, images, history, vl_chat_processor, token
55
  conversation.append_message(conversation.roles[1], "")
56
  return conversation
57
 
 
58
  def to_gradio_chatbot(conv):
59
  ret = []
60
  for i, (role, msg) in enumerate(conv.messages[conv.offset:]):
@@ -69,14 +54,12 @@ def to_gradio_chatbot(conv):
69
  ret[-1][-1] = msg
70
  return ret
71
 
 
72
  def predict(text, images, chatbot, history, model_name="deepseek-ai/deepseek-vl2-tiny"):
73
- print("Starting predict function...")
74
  tokenizer, vl_gpt, vl_chat_processor = fetch_model(model_name)
75
  if not text:
76
- print("Empty text input detected.")
77
  return chatbot, history, "Empty context."
78
 
79
- print("Processing images...")
80
  pil_images = [Image.open(img).convert("RGB") for img in images] if images else []
81
  conversation = generate_prompt_with_history(
82
  text, pil_images, history, vl_chat_processor, tokenizer
@@ -86,7 +69,6 @@ def predict(text, images, chatbot, history, model_name="deepseek-ai/deepseek-vl2
86
  gradio_chatbot_output = to_gradio_chatbot(conversation)
87
 
88
  full_response = ""
89
- print("Generating response...")
90
  try:
91
  with torch.no_grad():
92
  for x in deepseek_generate(
@@ -104,38 +86,31 @@ def predict(text, images, chatbot, history, model_name="deepseek-ai/deepseek-vl2
104
  response = strip_stop_words(full_response, stop_words)
105
  conversation.update_last_message(response)
106
  gradio_chatbot_output[-1][1] = response
107
- print(f"Yielding partial response: {response[:50]}...")
108
  yield gradio_chatbot_output, conversation.messages, "Generating..."
109
 
110
- print("Generation complete.")
111
  torch.cuda.empty_cache()
112
  yield gradio_chatbot_output, conversation.messages, "Success"
113
  except Exception as e:
114
- print(f"Error in generation: {str(e)}")
115
  yield gradio_chatbot_output, conversation.messages, f"Error: {str(e)}"
116
 
117
- # Gradio interface for OCR
118
- def upload_and_process(image):
119
- if image is None:
120
- return "Please upload an image.", []
121
- prompt = "Extract all text from this image exactly as it appears, ensuring the output is in English only. Preserve spaces, bullets, numbers, and all formatting. Do not translate, generate, or include text in any other language. Stop at the last character of the image text."
122
- chatbot = []
123
- history = []
124
- print("Starting upload_and_process...")
125
- for chatbot_output, history_output, status in predict(prompt, [image], chatbot, history):
126
- print(f"Status: {status}")
127
- if status == "Success":
128
- return chatbot_output[-1][1], history_output
129
- return "Processing failed.", []
130
 
131
- # Launch Gradio app
132
- with gr.Blocks() as demo:
133
- gr.Markdown("### DeepSeek-VL2 OCR in Colab")
134
- image_input = gr.Image(type="filepath", label="Upload Image")
135
- output_text = gr.Textbox(label="Extracted Text")
136
- history_state = gr.State([])
137
- submit_btn = gr.Button("Extract Text")
138
- submit_btn.click(upload_and_process, inputs=image_input, outputs=[output_text, history_state])
139
 
140
- demo.launch(share=True, debug=True) # Added debug=True for more Gradio logs
 
141
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
 
 
2
  import torch
3
+ import streamlit as st
4
  from PIL import Image
5
  from deepseek_vl2.serve.inference import load_model, deepseek_generate, convert_conversation_to_prompts
6
  from deepseek_vl2.models.conversation import SeparatorStyle
7
  from deepseek_vl2.serve.app_modules.utils import configure_logger, strip_stop_words, pil_to_base64
 
8
 
9
+ # Initialize logger
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  logger = configure_logger()
11
 
12
+ # Global variables for model loading
13
  MODELS = ["deepseek-ai/deepseek-vl2-tiny"]
14
  DEPLOY_MODELS = {}
15
  IMAGE_TOKEN = "<image>"
16
 
17
+ # Load model function
18
  def fetch_model(model_name: str, dtype=torch.bfloat16):
19
  global DEPLOY_MODELS
20
  if model_name not in DEPLOY_MODELS:
21
+ st.write(f"Loading {model_name}...")
22
  model_info = load_model(model_name, dtype=dtype)
23
  tokenizer, model, vl_chat_processor = model_info
24
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
25
  model = model.to(device)
26
  DEPLOY_MODELS[model_name] = (tokenizer, model, vl_chat_processor)
27
+ st.write(f"Loaded {model_name} on {device}")
28
  return DEPLOY_MODELS[model_name]
29
 
30
+ # Generate prompt with conversation history
31
  def generate_prompt_with_history(text, images, history, vl_chat_processor, tokenizer, max_length=2048):
32
  conversation = vl_chat_processor.new_chat_template()
33
  if history:
 
39
  conversation.append_message(conversation.roles[1], "")
40
  return conversation
41
 
42
+ # Convert conversation to Gradio-compatible format
43
  def to_gradio_chatbot(conv):
44
  ret = []
45
  for i, (role, msg) in enumerate(conv.messages[conv.offset:]):
 
54
  ret[-1][-1] = msg
55
  return ret
56
 
57
+ # Prediction function
58
  def predict(text, images, chatbot, history, model_name="deepseek-ai/deepseek-vl2-tiny"):
 
59
  tokenizer, vl_gpt, vl_chat_processor = fetch_model(model_name)
60
  if not text:
 
61
  return chatbot, history, "Empty context."
62
 
 
63
  pil_images = [Image.open(img).convert("RGB") for img in images] if images else []
64
  conversation = generate_prompt_with_history(
65
  text, pil_images, history, vl_chat_processor, tokenizer
 
69
  gradio_chatbot_output = to_gradio_chatbot(conversation)
70
 
71
  full_response = ""
 
72
  try:
73
  with torch.no_grad():
74
  for x in deepseek_generate(
 
86
  response = strip_stop_words(full_response, stop_words)
87
  conversation.update_last_message(response)
88
  gradio_chatbot_output[-1][1] = response
 
89
  yield gradio_chatbot_output, conversation.messages, "Generating..."
90
 
 
91
  torch.cuda.empty_cache()
92
  yield gradio_chatbot_output, conversation.messages, "Success"
93
  except Exception as e:
 
94
  yield gradio_chatbot_output, conversation.messages, f"Error: {str(e)}"
95
 
96
+ # Streamlit UI setup
97
+ st.title("DeepSeek-VL2 OCR in Colab")
98
+ st.write("Upload an image and get the extracted text.")
 
 
 
 
 
 
 
 
 
 
99
 
100
+ # Image upload
101
+ image_input = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg"])
 
 
 
 
 
 
102
 
103
+ # Output text
104
+ output_text = st.text_area("Extracted Text", "")
105
 
106
+ # Handle the image upload and processing
107
+ if image_input:
108
+ prompt = "Extract all text from this image exactly as it appears, ensuring the output is in English only."
109
+ chatbot = []
110
+ history = []
111
+ for chatbot_output, history_output, status in predict(prompt, [image_input], chatbot, history):
112
+ if status == "Success":
113
+ output_text = chatbot_output[-1][1]
114
+ st.write("Extracted Text:", output_text)
115
+ else:
116
+ st.error(f"Error: {status}")