Prashant26am commited on
Commit
1ea681e
·
1 Parent(s): 33fd881

Move app.py to root directory for Hugging Face Space deployment

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +51 -61
  3. requirements.txt +2 -2
README.md CHANGED
@@ -5,7 +5,7 @@ colorFrom: blue
5
  colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 4.19.2
8
- app_file: llava-chat/app.py
9
  pinned: false
10
  license: mit
11
  ---
 
5
  colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 4.19.2
8
+ app_file: app.py
9
  pinned: false
10
  license: mit
11
  ---
app.py CHANGED
@@ -81,25 +81,51 @@ def initialize_model():
81
  del model
82
  torch.cuda.empty_cache()
83
 
84
- # Initialize new model
 
 
 
 
85
  model = LLaVA(
86
  vision_model_path="openai/clip-vit-base-patch32",
87
  language_model_path="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
88
- device="cpu",
89
- projection_hidden_dim=2048
 
 
 
 
 
90
  )
91
 
92
  # Configure model for inference
93
  if hasattr(model, 'language_model'):
94
  model.language_model.config.use_cache = False
95
  model.language_model.eval()
 
 
 
 
 
 
 
 
 
 
 
96
 
97
  model_status.update({
98
  "initialized": True,
99
  "device": str(model.device),
100
- "error": None
 
 
 
 
 
 
101
  })
102
- logger.info(f"Model successfully initialized on {model.device}")
103
  return True
104
 
105
  except Exception as e:
@@ -167,16 +193,25 @@ def process_image(
167
  # Clear memory
168
  torch.cuda.empty_cache()
169
 
170
- # Process image
171
  with torch.inference_mode():
172
  try:
173
  logger.info("Generating response...")
 
 
 
 
 
 
174
  response = model.generate_from_image(
175
  image_path=temp_path,
176
  prompt=prompt,
177
  max_new_tokens=max_new_tokens,
178
  temperature=temperature,
179
- top_p=top_p
 
 
 
180
  )
181
 
182
  if not response:
@@ -217,25 +252,8 @@ def process_image(
217
  except Exception as e:
218
  logger.warning(f"Failed to clear CUDA cache: {str(e)}")
219
 
220
- def get_status_text() -> str:
221
- """Get a formatted status text for display."""
222
- try:
223
- status = {
224
- "Model Initialized": "Yes" if model is not None else "No",
225
- "Device": str(model.device) if model is not None else "None",
226
- "Last Error": model_status.get("last_error", "None"),
227
- "Memory Usage": {
228
- "CUDA Available": "Yes" if torch.cuda.is_available() else "No",
229
- "Memory Allocated": f"{torch.cuda.memory_allocated() / 1024**2:.2f} MB" if torch.cuda.is_available() else "N/A",
230
- "Memory Reserved": f"{torch.cuda.memory_reserved() / 1024**2:.2f} MB" if torch.cuda.is_available() else "N/A"
231
- }
232
- }
233
- return "\n".join(f"{k}: {v}" for k, v in status.items())
234
- except Exception as e:
235
- return f"Error getting status: {str(e)}"
236
-
237
  def create_interface():
238
- """Create the Gradio interface with proper error handling."""
239
  try:
240
  with gr.Blocks(title="LLaVA Chat", theme=gr.themes.Soft()) as demo:
241
  gr.Markdown("""
@@ -252,19 +270,13 @@ def create_interface():
252
  """)
253
 
254
  with gr.Row():
255
- with gr.Column(scale=1):
256
- # Input components with explicit types and validation
257
- image_input = gr.Image(
258
- type="pil",
259
- label="Upload Image",
260
- image_mode="RGB",
261
- format="PNG"
262
- )
263
  prompt_input = gr.Textbox(
264
  label="Ask about the image",
265
  placeholder="What can you see in this image?",
266
- lines=3,
267
- max_lines=5
268
  )
269
 
270
  with gr.Accordion("Advanced Settings", open=False):
@@ -291,31 +303,17 @@ def create_interface():
291
  )
292
 
293
  submit_btn = gr.Button("Generate Response", variant="primary")
294
- status_btn = gr.Button("Check Status", variant="secondary")
295
 
296
- with gr.Column(scale=1):
297
  output = gr.Textbox(
298
  label="Model Response",
299
  lines=10,
300
  show_copy_button=True
301
  )
302
- status_output = gr.Textbox(
303
- label="System Status",
304
- lines=5,
305
- show_copy_button=True
306
- )
307
-
308
- # Set up event handlers with proper error handling
309
- def safe_process_image(*args):
310
- try:
311
- return process_image(*args)
312
- except Exception as e:
313
- logger.error(f"Interface error: {str(e)}")
314
- logger.error(traceback.format_exc())
315
- return f"Error: {str(e)}"
316
 
 
317
  submit_btn.click(
318
- fn=safe_process_image,
319
  inputs=[
320
  image_input,
321
  prompt_input,
@@ -323,15 +321,7 @@ def create_interface():
323
  temperature,
324
  top_p
325
  ],
326
- outputs=output,
327
- api_name="process_image"
328
- )
329
-
330
- status_btn.click(
331
- fn=get_status_text,
332
- inputs=[],
333
- outputs=status_output,
334
- api_name="check_status"
335
  )
336
 
337
  logger.info("Successfully created Gradio interface")
 
81
  del model
82
  torch.cuda.empty_cache()
83
 
84
+ # Set device
85
+ device = "cuda" if torch.cuda.is_available() else "cpu"
86
+ logger.info(f"Using device: {device}")
87
+
88
+ # Initialize new model with Hugging Face specific parameters
89
  model = LLaVA(
90
  vision_model_path="openai/clip-vit-base-patch32",
91
  language_model_path="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
92
+ device=device,
93
+ projection_hidden_dim=2048,
94
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
95
+ low_cpu_mem_usage=True,
96
+ use_safetensors=True,
97
+ load_in_8bit=True if device == "cuda" else False,
98
+ trust_remote_code=True
99
  )
100
 
101
  # Configure model for inference
102
  if hasattr(model, 'language_model'):
103
  model.language_model.config.use_cache = False
104
  model.language_model.eval()
105
+
106
+ # Set generation config
107
+ if hasattr(model.language_model, 'generation_config'):
108
+ model.language_model.generation_config.do_sample = True
109
+ model.language_model.generation_config.max_new_tokens = 256
110
+ model.language_model.generation_config.temperature = 0.7
111
+ model.language_model.generation_config.top_p = 0.9
112
+ model.language_model.generation_config.pad_token_id = model.language_model.config.eos_token_id
113
+
114
+ # Move model to device
115
+ model = model.to(device)
116
 
117
  model_status.update({
118
  "initialized": True,
119
  "device": str(model.device),
120
+ "error": None,
121
+ "model_info": {
122
+ "vision_model": "openai/clip-vit-base-patch32",
123
+ "language_model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
124
+ "dtype": str(model.dtype),
125
+ "device": str(model.device)
126
+ }
127
  })
128
+ logger.info(f"Model successfully initialized on {model.device} with dtype {model.dtype}")
129
  return True
130
 
131
  except Exception as e:
 
193
  # Clear memory
194
  torch.cuda.empty_cache()
195
 
196
+ # Process image with Hugging Face specific settings
197
  with torch.inference_mode():
198
  try:
199
  logger.info("Generating response...")
200
+ # Update generation config if available
201
+ if hasattr(model, 'language_model') and hasattr(model.language_model, 'generation_config'):
202
+ model.language_model.generation_config.max_new_tokens = max_new_tokens
203
+ model.language_model.generation_config.temperature = temperature
204
+ model.language_model.generation_config.top_p = top_p
205
+
206
  response = model.generate_from_image(
207
  image_path=temp_path,
208
  prompt=prompt,
209
  max_new_tokens=max_new_tokens,
210
  temperature=temperature,
211
+ top_p=top_p,
212
+ do_sample=True,
213
+ num_beams=1,
214
+ pad_token_id=model.language_model.config.eos_token_id if hasattr(model, 'language_model') else None
215
  )
216
 
217
  if not response:
 
252
  except Exception as e:
253
  logger.warning(f"Failed to clear CUDA cache: {str(e)}")
254
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  def create_interface():
256
+ """Create a simplified Gradio interface."""
257
  try:
258
  with gr.Blocks(title="LLaVA Chat", theme=gr.themes.Soft()) as demo:
259
  gr.Markdown("""
 
270
  """)
271
 
272
  with gr.Row():
273
+ with gr.Column():
274
+ # Input components
275
+ image_input = gr.Image(type="pil", label="Upload Image")
 
 
 
 
 
276
  prompt_input = gr.Textbox(
277
  label="Ask about the image",
278
  placeholder="What can you see in this image?",
279
+ lines=3
 
280
  )
281
 
282
  with gr.Accordion("Advanced Settings", open=False):
 
303
  )
304
 
305
  submit_btn = gr.Button("Generate Response", variant="primary")
 
306
 
307
+ with gr.Column():
308
  output = gr.Textbox(
309
  label="Model Response",
310
  lines=10,
311
  show_copy_button=True
312
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
313
 
314
+ # Set up event handler
315
  submit_btn.click(
316
+ fn=process_image,
317
  inputs=[
318
  image_input,
319
  prompt_input,
 
321
  temperature,
322
  top_p
323
  ],
324
+ outputs=output
 
 
 
 
 
 
 
 
325
  )
326
 
327
  logger.info("Successfully created Gradio interface")
requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
  transformers>=4.36.0
2
  torch>=2.1.0
3
  pillow>=10.0.0
4
- gradio==4.19.2
5
  fastapi>=0.100.0
6
  uvicorn>=0.23.0
7
  accelerate>=0.25.0
@@ -24,4 +24,4 @@ bcrypt>=4.0.1
24
  aiofiles>=23.2.0
25
  httpx>=0.26.0
26
  # Memory optimization
27
- optimum>=1.16.0
 
1
  transformers>=4.36.0
2
  torch>=2.1.0
3
  pillow>=10.0.0
4
+ gradio>=4.0.0
5
  fastapi>=0.100.0
6
  uvicorn>=0.23.0
7
  accelerate>=0.25.0
 
24
  aiofiles>=23.2.0
25
  httpx>=0.26.0
26
  # Memory optimization
27
+ optimum>=1.16.0