Spaces:

DheepLearning
/

ITR

Sleeping

App Files Files Community

Deepan13 commited on May 7

Commit

aff08a4

1 Parent(s): f23b1f5

with some changes

Browse files

Files changed (3) hide show

README.md +45 -5
app.py +157 -24
requirements.txt +4 -1

README.md CHANGED Viewed

@@ -1,14 +1,54 @@
 ---
-title: ITR
-emoji: 🚀
 colorFrom: blue
 colorTo: green
 sdk: gradio
-sdk_version: 5.29.0
 app_file: app.py
 pinned: false
 license: bigscience-openrail-m
-short_description: Testing Codelama
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: CodeLlama Code Generator
+emoji: 🦙
 colorFrom: blue
 colorTo: green
 sdk: gradio
+sdk_version: 4.19.2
 app_file: app.py
 pinned: false
 license: bigscience-openrail-m
+short_description: Interactive CodeLlama code generation demo
 ---
+# CodeLlama Code Generator
+This is an interactive demo of the CodeLlama-7b model for generating code completions. The application provides a simple interface where you can enter a code prompt and get AI-generated code completions.
+## Features
+- Interactive code generation with CodeLlama-7b
+- Adjustable parameters (temperature, max length, etc.)
+- Example prompts to get started quickly
+- Real-time generation with timing information
+## How to Use
+1. Enter a code prompt in the input box (e.g., a function signature or class definition)
+2. Adjust the generation parameters if needed:
+   - **Max Length**: Controls the maximum length of the generated text
+   - **Temperature**: Controls randomness (lower = more deterministic)
+   - **Top-p**: Controls diversity via nucleus sampling
+   - **Top-k**: Controls diversity via top-k sampling
+3. Click "Generate Code" to get your completion
+4. Try different prompts and parameters to see how they affect the output
+## Examples
+The demo includes several example prompts to help you get started:
+- Function to implement exponential backoff for network pings
+- Fibonacci sequence implementation
+- Binary search tree class
+- Asynchronous data fetching function
+## Technical Details
+This demo uses:
+- CodeLlama-7b model from Meta
+- Hugging Face Transformers library
+- Gradio for the web interface
+## License
+This demo is provided under the BigScience OpenRAIL-M license.

app.py CHANGED Viewed

@@ -1,25 +1,158 @@
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
-# Use prequantized GPTQ model — no BitsAndBytesConfig needed
-model_id = "TheBloke/CodeLlama-7B-GPTQ"
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForCausalLM.from_pretrained(
-    model_id,
-    device_map="auto"
-)
-pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
-def generate_response(prompt):
-    output = pipe(prompt, max_length=512, do_sample=True, temperature=0.3)[0]['generated_text']
-    return output
-gr.Interface(
-    fn=generate_response,
-    inputs=gr.Textbox(lines=5, label="Your prompt"),
-    outputs=gr.Textbox(label="Code Llama response"),
-    title="Code Llama Demo (GPTQ)",
-    description="Ask questions or request code snippets!"
-).launch()

 import gradio as gr
+from transformers import AutoTokenizer
+import transformers
+import torch
+import os
+import time
+# Model configuration
+MODEL_NAME = "meta-llama/CodeLlama-7b-hf"
+# Default example prompts
+EXAMPLES = [
+    ["import socket\n\ndef ping_exponential_backoff(host: str):"],
+    ["def fibonacci(n: int) -> int:"],
+    ["class BinarySearchTree:\n    def __init__(self):"],
+    ["async def fetch_data(url: str):"]
+]
+# Load model with error handling
+def load_model():
+    try:
+        print("Loading model and tokenizer...")
+        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+        # Configure the pipeline based on available resources
+        # Hugging Face Spaces typically have GPU available
+        pipeline = transformers.pipeline(
+            "text-generation",
+            model=MODEL_NAME,
+            torch_dtype=torch.float16,
+            device_map="auto",
+        )
+        print("Model loaded successfully!")
+        return tokenizer, pipeline
+    except Exception as e:
+        print(f"Error loading model: {str(e)}")
+        # Return None to indicate failure
+        return None, None
+# Generate code based on the prompt
+def generate_code(prompt, max_length=200, temperature=0.1, top_p=0.95, top_k=10):
+    try:
+        # Check if model is loaded
+        if tokenizer is None or pipeline is None:
+            return "Error: Model failed to load. Please check the logs."
+        # Add a loading message
+        start_time = time.time()
+        # Generate the code
+        sequences = pipeline(
+            prompt,
+            do_sample=True,
+            top_k=top_k,
+            temperature=temperature,
+            top_p=top_p,
+            num_return_sequences=1,
+            eos_token_id=tokenizer.eos_token_id,
+            max_length=max_length,
+        )
+        # Calculate generation time
+        generation_time = time.time() - start_time
+        # Format the result
+        result = sequences[0]['generated_text']
+        return f"{result}\n\n---\nGeneration time: {generation_time:.2f} seconds"
+    except Exception as e:
+        return f"Error generating code: {str(e)}"
+# Load the model and tokenizer
+print("Initializing CodeLlama...")
+tokenizer, pipeline = load_model()
+# Create the Gradio interface
+with gr.Blocks(title="CodeLlama Code Generation") as demo:
+    gr.Markdown("# CodeLlama Code Generation")
+    gr.Markdown("Enter a code prompt and CodeLlama will complete it for you.")
+    with gr.Row():
+        with gr.Column():
+            prompt = gr.Textbox(
+                label="Code Prompt",
+                placeholder="Enter your code prompt here...",
+                lines=5
+            )
+            with gr.Row():
+                max_length = gr.Slider(
+                    minimum=50,
+                    maximum=500,
+                    value=200,
+                    step=10,
+                    label="Max Length"
+                )
+                temperature = gr.Slider(
+                    minimum=0.1,
+                    maximum=1.0,
+                    value=0.1,
+                    step=0.1,
+                    label="Temperature"
+                )
+            with gr.Row():
+                top_p = gr.Slider(
+                    minimum=0.5,
+                    maximum=1.0,
+                    value=0.95,
+                    step=0.05,
+                    label="Top-p"
+                )
+                top_k = gr.Slider(
+                    minimum=1,
+                    maximum=50,
+                    value=10,
+                    step=1,
+                    label="Top-k"
+                )
+            generate_btn = gr.Button("Generate Code")
+        with gr.Column():
+            output = gr.Textbox(
+                label="Generated Code",
+                lines=20
+            )
+    # Connect the button to the generate function
+    generate_btn.click(
+        fn=generate_code,
+        inputs=[prompt, max_length, temperature, top_p, top_k],
+        outputs=output
+    )
+    # Add examples
+    gr.Examples(
+        examples=EXAMPLES,
+        inputs=prompt
+    )
+    # Add information about the model
+    gr.Markdown("""
+    ## About
+    This demo uses the CodeLlama-7b model to generate code completions based on your prompts.
+    - **Max Length**: Controls the maximum length of the generated text
+    - **Temperature**: Controls randomness (lower = more deterministic)
+    - **Top-p**: Controls diversity via nucleus sampling
+    - **Top-k**: Controls diversity via top-k sampling
+    Created by DheepLearning
+    """)
+# Launch the app
+demo.launch()

requirements.txt CHANGED Viewed

@@ -1,3 +1,6 @@
 transformers==4.39.3
 accelerate
-gradio

 transformers==4.39.3
 accelerate
+gradio>=4.0.0
+torch>=2.0.0
+sentencepiece
+protobuf