srivatsavdamaraju commited on
Commit
f204998
·
verified ·
1 Parent(s): 617fb50

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from PIL import Image
4
+ from torchvision import transforms
5
+ from llama_cpp import Llama
6
+ from torchvision.models import vgg16
7
+
8
+ # Load your gguf model (LLaMA or similar)
9
+ llm = Llama(model_path="path/to/ggml-model-IQ3_M.gguf")
10
+
11
+ # Load a pre-trained image recognition model (VGG16 in this case)
12
+ vgg_model = vgg16(pretrained=True).eval()
13
+
14
+ # Image transformation pipeline
15
+ transform = transforms.Compose([
16
+ transforms.Resize((224, 224)),
17
+ transforms.ToTensor(),
18
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
19
+ ])
20
+
21
+ # Function to process the image and extract features
22
+ def process_image(image):
23
+ image_tensor = transform(image).unsqueeze(0) # Add batch dimension
24
+ with torch.no_grad():
25
+ image_features = vgg_model(image_tensor).flatten().tolist() # Flatten feature map
26
+ return image_features
27
+
28
+ # Function to combine the image features with the question and get a chatbot response
29
+ def chatbot(image, question):
30
+ # Process the image to extract features
31
+ image_features = process_image(image)
32
+
33
+ # Create a prompt combining the question and image features
34
+ prompt = f"Image features: {image_features}\nQuestion: {question}\nAnswer:"
35
+
36
+ # Generate response using the gguf model
37
+ response = llm(prompt=prompt, max_tokens=128)
38
+
39
+ # Return the chatbot's response
40
+ return response["choices"][0]["text"].strip()
41
+
42
+ # Gradio interface
43
+ iface = gr.Interface(
44
+ fn=chatbot,
45
+ inputs=[
46
+ gr.inputs.Image(type="pil"), # Image input
47
+ gr.inputs.Textbox(lines=2, placeholder="Ask something about the image...") # Text input
48
+ ],
49
+ outputs="text", # Text output
50
+ title="Image to Text Chatbot",
51
+ description="Upload an image and ask a question to the chatbot. It will try to answer based on the image and your question."
52
+ )
53
+
54
+ # Launch the Gradio app
55
+ iface.launch()