# import required packages import google.generativeai as genai import os import PIL.Image import gradio as gr from gradio_multimodalchatbot import MultimodalChatbot from gradio.data_classes import FileData # For better security practices, retrieve sensitive information like API keys from environment variables. import google.generativeai as genai import os import PIL.Image import gradio as gr from gradio_multimodalchatbot import MultimodalChatbot from gradio.data_classes import FileData # Retrieve API key from environment variable for security GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY') genai.configure(api_key=GOOGLE_API_KEY) # These codelines are just to verify if your api key is correct or not # Use them when you clone the repo and build locally #!curl \ #-H 'Content-Type: application/json' \ #-d '{ "prompt": { "text": "Write a very short story about a magic backpack"} }' \ #"https://generativelanguage.googleapis.com/v1beta3/models/text-bison-001:generateText?key=" # Initialize genai models model = genai.GenerativeModel('gemini-pro') modelvis = genai.GenerativeModel('gemini-pro-vision') def gemini(input, file, chatbot=[]): """ Function to handle gemini model and gemini vision model interactions. Parameters: input (str): The input text. file (File): An optional file object for image processing. chatbot (list): A list to keep track of chatbot interactions. Returns: tuple: Updated chatbot interaction list, an empty string, and None. """ messages = [] print(chatbot) # Process previous chatbot messages if present if len(chatbot) != 0: for user, bot in chatbot: user, bot = user.text, bot.text messages.extend([ {'role': 'user', 'parts': [user]}, {'role': 'model', 'parts': [bot]} ]) messages.append({'role': 'user', 'parts': [input]}) else: messages.append({'role': 'user', 'parts': [input]}) try: # Process image if file is provided if file is not None: with PIL.Image.open(file.name) as img: message = [{'role': 'user', 'parts': [input, img]}] response = modelvis.generate_content(message) gemini_video_resp = response.text messages.append({'role': 'model', 'parts': [gemini_video_resp]}) # Construct list of messages in the required format user_msg = {"text": input, "files": [{"file": FileData(path=file.name)}]} bot_msg = {"text": gemini_video_resp, "files": []} chatbot.append([user_msg, bot_msg]) else: response = model.generate_content(messages) gemini_resp = response.text # Construct list of messages in the required format user_msg = {"text": input, "files": []} bot_msg = {"text": gemini_resp, "files": []} chatbot.append([user_msg, bot_msg]) except Exception as e: # Handling exceptions and raising error to the modal print(f"An error occurred: {e}") raise gr.Error(e) return chatbot, "", None # Custom theme with flexible height for the embedded Gradio component custom_theme = gr.themes.Default().add_class( "gradio-app", "height: 100vh; /* or vh for viewport height */" ) # Define the Gradio Blocks interface with flexible component heights with gr.Blocks(theme=custom_theme) as demo: # Initialize the MultimodalChatbot component multi = MultimodalChatbot(value=[], height=250) # Adjust height as needed with gr.Row(): # Textbox for user input with increased scale for better visibility tb = gr.Textbox(scale=4, placeholder='Message CortexChatV...', height=60) # Adjust height # Upload button for image files up = gr.UploadButton("Attach File", file_types=["image"], scale=1, height=40) # Adjust height # Define the behavior on text submission tb.submit(gemini, [tb, up, multi], [multi, tb, up]) # Define the behavior on image upload # Using chained then() calls to update the upload button's state up.upload(lambda: gr.UploadButton("Uploading Image..."), [], up) \ .then(lambda: gr.UploadButton("Image Uploaded"), [], up) \ .then(lambda: gr.UploadButton("Upload Image"), [], up) # Launch the demo with a queue to handle multiple users demo.queue().launch()