import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch from PIL import Image import base64 from io import BytesIO # 加载模型和分词器 model_name = "openbmb/MiniCPM-Llama3-V-2_5-int4" tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True) def encode_image(image): buffered = BytesIO() image.save(buffered, format="PNG") return base64.b64encode(buffered.getvalue()).decode('utf-8') def generate_text(prompt, max_length=100): inputs = tokenizer(prompt, return_tensors="pt") with torch.no_grad(): outputs = model.generate(**inputs, max_length=max_length, num_return_sequences=1) return tokenizer.decode(outputs[0], skip_special_tokens=True) def predict(image, prompt): if image is not None: # 确保image是PIL Image对象 if isinstance(image, str): image = Image.open(image) # 编码图像 encoded_image = encode_image(image) # 准备输入 full_prompt = f"{encoded_image}\n{prompt if prompt else 'Describe this image.'}" # 生成文本 result = generate_text(full_prompt) return f"Model response: {result}\n\nUser prompt: {prompt}" else: return "No image uploaded. " + (f"You asked: {prompt}" if prompt else "Please upload an image and optionally provide a prompt.") demo = gr.Interface( predict, inputs=[ gr.Image(type="pil", label="Upload Image"), gr.Textbox(label="Prompt (optional)") ], outputs=gr.Textbox(label="Result"), title="Image Analysis with MiniCPM-Llama3-V-2_5-int4", description="Upload an image and optionally provide a prompt for analysis." ) if __name__ == "__main__": demo.launch()