""" title: Manus emoji: 🚀 colorFrom: red colorTo: blue sdk: gradio sdk_version: 5.20.1 app_file: app.py pinned: false license: mit """ import gradio as gr import secrets # In-memory store for valid API keys. valid_api_keys = set() def generate_api_key(): """ Generate a secure 32-character hexadecimal API key, store it in the valid_api_keys set, and return it. """ key = secrets.token_hex(16) valid_api_keys.add(key) return key def llama_vision_inference(api_key, image): """ Dummy inference function for Llama Vision model. Replace the simulated processing below with actual model loading and inference. Ensure that your model is explicitly loaded on CPU. """ if not api_key.strip(): return "Error: API key is required." if api_key not in valid_api_keys: return "Error: Invalid API key. Please generate a valid key first." # Example: Force CPU usage when loading the model. # from llama_vision import LlamaVisionModel # model = LlamaVisionModel.from_pretrained("llama-vision-latest", device="cpu") # result = model.infer(image) # Simulated output for demonstration: result = ( "Simulated Model Output:\n" "- Detected GUI elements: [button, menu, text field]\n" "- Recognized text: 'Sample Linux GUI Screenshot'\n" "- Layout Analysis: Structured layout with header and sidebar" ) return result with gr.Blocks(title="Manus 🚀") as demo: gr.Markdown("# Manus 🚀") gr.Markdown( "This Gradio Space lets you generate an API key and perform vision inference using the Llama Vision model (running in CPU mode)." ) with gr.Tabs(): with gr.TabItem("API Key Generator"): gr.Markdown("Generate an API key to be used with the vision model API.") key_output = gr.Textbox(label="Your Generated API Key", interactive=False) generate_button = gr.Button("Generate API Key") generate_button.click(fn=generate_api_key, outputs=key_output) with gr.TabItem("Vision Model Inference"): gr.Markdown( "Enter your API key and upload an image (e.g., a Linux GUI screenshot) to run inference using the Llama Vision model." ) api_key_input = gr.Textbox(label="API Key", placeholder="Enter your API key here") image_input = gr.Image(type="pil", label="Upload Image") output_text = gr.Textbox(label="Model Output", interactive=False) run_button = gr.Button("Run Vision Model") run_button.click(fn=llama_vision_inference, inputs=[api_key_input, image_input], outputs=output_text) if __name__ == "__main__": demo.launch()