Spaces:

Pretam1
/

Hw9

Sleeping

App Files Files Community

Pretam1 commited on Nov 7, 2024

Commit

d3e0996

verified ·

1 Parent(s): b0ede06

made new file

Browse files

Files changed (1) hide show

app.py +54 -0

app.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import streamlit as st
+from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
+import torch
+from PIL import Image
+# Load pre-trained models and tokenizer
+model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+# Check device and move model to the appropriate device
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+# Set generation parameters
+max_length = 16
+num_beams = 4
+gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
+# Define the prediction function
+def predict_caption(image):
+    if image.mode != "RGB":
+        image = image.convert(mode="RGB")
+    # Process image and move pixel values to device
+    pixel_values = feature_extractor(images=[image], return_tensors="pt").pixel_values.to(device)
+    # Generate caption
+    output_ids = model.generate(pixel_values, **gen_kwargs)
+    preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
+    # Return the caption text
+    return preds[0].strip()
+# Main function for Streamlit app
+def main():
+    st.title("Image Caption Generator")
+    st.write("Upload an image, and the model will describe what it sees.")
+    # Upload image
+    uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
+    if uploaded_file is not None:
+        # Load and display the uploaded image
+        image = Image.open(uploaded_file)
+        st.image(image, caption='Uploaded Image', use_column_width=True)
+        # Generate and display caption
+        caption = predict_caption(image)
+        st.write("Caption:", caption)
+# Run the application
+if __name__ == "__main__":
+    main()