ved1beta commited on
Commit
5b195c1
·
1 Parent(s): fa73fe7
Files changed (1) hide show
  1. app.py +7 -26
app.py CHANGED
@@ -1,6 +1,3 @@
1
- import subprocess
2
- subprocess.run('pip install bitsandbytes', shell=True)
3
-
4
  import gradio as gr
5
  from PIL import Image
6
  from transformers import AutoModelForCausalLM
@@ -15,29 +12,19 @@ model = AutoModelForCausalLM.from_pretrained(
15
  model_id,
16
  device_map="cpu",
17
  trust_remote_code=True,
18
- torch_dtype=torch.float16, # Reduced precision
19
- load_in_8bit=True, # 8-bit quantization
20
  _attn_implementation="eager"
21
  )
22
  processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
23
 
24
- PLACEHOLDER = """
25
- <div style="padding: 30px; text-align: center;">
26
- <h1>Phi3 Vision Model</h1>
27
- <p>Upload an image and ask a question</p>
28
- </div>
29
- """
30
-
31
  @spaces.CPU
32
  def bot_streaming(message, history):
33
  try:
34
- # Image extraction
35
  image = (message["files"][-1]["path"] if isinstance(message["files"][-1], dict) else message["files"][-1]) if message["files"] else None
36
 
37
  if not image:
38
  raise ValueError("No image uploaded")
39
 
40
- # Conversation preparation
41
  conversation = []
42
  for user, assistant in history:
43
  conversation.extend([
@@ -47,17 +34,15 @@ def bot_streaming(message, history):
47
 
48
  conversation.append({"role": "user", "content": f"<|image_1|>\n{message['text']}"})
49
 
50
- # Prompt and image processing
51
  prompt = processor.tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
52
  image = Image.open(image)
53
  inputs = processor(prompt, image, return_tensors="pt")
54
 
55
- # Streaming generation with reduced tokens
56
  streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True)
57
  generation_kwargs = dict(
58
  inputs,
59
  streamer=streamer,
60
- max_new_tokens=256, # Reduced token generation
61
  do_sample=False,
62
  temperature=0.1,
63
  eos_token_id=processor.tokenizer.eos_token_id
@@ -74,20 +59,16 @@ def bot_streaming(message, history):
74
  except Exception as e:
75
  yield f"Error: {str(e)}"
76
 
77
- # Gradio Interface Configuration
78
- chatbot = gr.Chatbot(scale=1, placeholder=PLACEHOLDER)
79
- chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Upload image and ask a question")
80
-
81
  demo = gr.Blocks()
82
  with demo:
83
  gr.ChatInterface(
84
  fn=bot_streaming,
85
  title="Phi3 Vision 128K",
86
  description="Multimodal AI Vision Model",
87
- multimodal=True,
88
- textbox=chat_input,
89
- chatbot=chatbot
90
  )
91
 
92
- demo.queue(max_size=10) # Limit queue size
93
- demo.launch(debug=False, show_error=True)
 
 
 
 
1
  import gradio as gr
2
  from PIL import Image
3
  from transformers import AutoModelForCausalLM
 
12
  model_id,
13
  device_map="cpu",
14
  trust_remote_code=True,
15
+ torch_dtype=torch.float32,
 
16
  _attn_implementation="eager"
17
  )
18
  processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
19
 
 
 
 
 
 
 
 
20
  @spaces.CPU
21
  def bot_streaming(message, history):
22
  try:
 
23
  image = (message["files"][-1]["path"] if isinstance(message["files"][-1], dict) else message["files"][-1]) if message["files"] else None
24
 
25
  if not image:
26
  raise ValueError("No image uploaded")
27
 
 
28
  conversation = []
29
  for user, assistant in history:
30
  conversation.extend([
 
34
 
35
  conversation.append({"role": "user", "content": f"<|image_1|>\n{message['text']}"})
36
 
 
37
  prompt = processor.tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
38
  image = Image.open(image)
39
  inputs = processor(prompt, image, return_tensors="pt")
40
 
 
41
  streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True)
42
  generation_kwargs = dict(
43
  inputs,
44
  streamer=streamer,
45
+ max_new_tokens=256,
46
  do_sample=False,
47
  temperature=0.1,
48
  eos_token_id=processor.tokenizer.eos_token_id
 
59
  except Exception as e:
60
  yield f"Error: {str(e)}"
61
 
 
 
 
 
62
  demo = gr.Blocks()
63
  with demo:
64
  gr.ChatInterface(
65
  fn=bot_streaming,
66
  title="Phi3 Vision 128K",
67
  description="Multimodal AI Vision Model",
68
+ examples=[
69
+ {"text": "Describe this image", "files": ["./example.jpg"]},
70
+ ]
71
  )
72
 
73
+ demo.queue()
74
+ demo.launch(debug=True)