Deepan13 commited on
Commit
aff08a4
·
1 Parent(s): f23b1f5

with some changes

Browse files
Files changed (3) hide show
  1. README.md +45 -5
  2. app.py +157 -24
  3. requirements.txt +4 -1
README.md CHANGED
@@ -1,14 +1,54 @@
1
  ---
2
- title: ITR
3
- emoji: 🚀
4
  colorFrom: blue
5
  colorTo: green
6
  sdk: gradio
7
- sdk_version: 5.29.0
8
  app_file: app.py
9
  pinned: false
10
  license: bigscience-openrail-m
11
- short_description: Testing Codelama
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: CodeLlama Code Generator
3
+ emoji: 🦙
4
  colorFrom: blue
5
  colorTo: green
6
  sdk: gradio
7
+ sdk_version: 4.19.2
8
  app_file: app.py
9
  pinned: false
10
  license: bigscience-openrail-m
11
+ short_description: Interactive CodeLlama code generation demo
12
  ---
13
 
14
+ # CodeLlama Code Generator
15
+
16
+ This is an interactive demo of the CodeLlama-7b model for generating code completions. The application provides a simple interface where you can enter a code prompt and get AI-generated code completions.
17
+
18
+ ## Features
19
+
20
+ - Interactive code generation with CodeLlama-7b
21
+ - Adjustable parameters (temperature, max length, etc.)
22
+ - Example prompts to get started quickly
23
+ - Real-time generation with timing information
24
+
25
+ ## How to Use
26
+
27
+ 1. Enter a code prompt in the input box (e.g., a function signature or class definition)
28
+ 2. Adjust the generation parameters if needed:
29
+ - **Max Length**: Controls the maximum length of the generated text
30
+ - **Temperature**: Controls randomness (lower = more deterministic)
31
+ - **Top-p**: Controls diversity via nucleus sampling
32
+ - **Top-k**: Controls diversity via top-k sampling
33
+ 3. Click "Generate Code" to get your completion
34
+ 4. Try different prompts and parameters to see how they affect the output
35
+
36
+ ## Examples
37
+
38
+ The demo includes several example prompts to help you get started:
39
+
40
+ - Function to implement exponential backoff for network pings
41
+ - Fibonacci sequence implementation
42
+ - Binary search tree class
43
+ - Asynchronous data fetching function
44
+
45
+ ## Technical Details
46
+
47
+ This demo uses:
48
+ - CodeLlama-7b model from Meta
49
+ - Hugging Face Transformers library
50
+ - Gradio for the web interface
51
+
52
+ ## License
53
+
54
+ This demo is provided under the BigScience OpenRAIL-M license.
app.py CHANGED
@@ -1,25 +1,158 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
-
4
- # Use prequantized GPTQ model — no BitsAndBytesConfig needed
5
- model_id = "TheBloke/CodeLlama-7B-GPTQ"
6
-
7
- tokenizer = AutoTokenizer.from_pretrained(model_id)
8
- model = AutoModelForCausalLM.from_pretrained(
9
- model_id,
10
- device_map="auto"
11
- )
12
-
13
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
14
-
15
- def generate_response(prompt):
16
- output = pipe(prompt, max_length=512, do_sample=True, temperature=0.3)[0]['generated_text']
17
- return output
18
-
19
- gr.Interface(
20
- fn=generate_response,
21
- inputs=gr.Textbox(lines=5, label="Your prompt"),
22
- outputs=gr.Textbox(label="Code Llama response"),
23
- title="Code Llama Demo (GPTQ)",
24
- description="Ask questions or request code snippets!"
25
- ).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer
3
+ import transformers
4
+ import torch
5
+ import os
6
+ import time
7
+
8
+ # Model configuration
9
+ MODEL_NAME = "meta-llama/CodeLlama-7b-hf"
10
+
11
+ # Default example prompts
12
+ EXAMPLES = [
13
+ ["import socket\n\ndef ping_exponential_backoff(host: str):"],
14
+ ["def fibonacci(n: int) -> int:"],
15
+ ["class BinarySearchTree:\n def __init__(self):"],
16
+ ["async def fetch_data(url: str):"]
17
+ ]
18
+
19
+ # Load model with error handling
20
+ def load_model():
21
+ try:
22
+ print("Loading model and tokenizer...")
23
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
24
+
25
+ # Configure the pipeline based on available resources
26
+ # Hugging Face Spaces typically have GPU available
27
+ pipeline = transformers.pipeline(
28
+ "text-generation",
29
+ model=MODEL_NAME,
30
+ torch_dtype=torch.float16,
31
+ device_map="auto",
32
+ )
33
+
34
+ print("Model loaded successfully!")
35
+ return tokenizer, pipeline
36
+ except Exception as e:
37
+ print(f"Error loading model: {str(e)}")
38
+ # Return None to indicate failure
39
+ return None, None
40
+
41
+ # Generate code based on the prompt
42
+ def generate_code(prompt, max_length=200, temperature=0.1, top_p=0.95, top_k=10):
43
+ try:
44
+ # Check if model is loaded
45
+ if tokenizer is None or pipeline is None:
46
+ return "Error: Model failed to load. Please check the logs."
47
+
48
+ # Add a loading message
49
+ start_time = time.time()
50
+
51
+ # Generate the code
52
+ sequences = pipeline(
53
+ prompt,
54
+ do_sample=True,
55
+ top_k=top_k,
56
+ temperature=temperature,
57
+ top_p=top_p,
58
+ num_return_sequences=1,
59
+ eos_token_id=tokenizer.eos_token_id,
60
+ max_length=max_length,
61
+ )
62
+
63
+ # Calculate generation time
64
+ generation_time = time.time() - start_time
65
+
66
+ # Format the result
67
+ result = sequences[0]['generated_text']
68
+ return f"{result}\n\n---\nGeneration time: {generation_time:.2f} seconds"
69
+
70
+ except Exception as e:
71
+ return f"Error generating code: {str(e)}"
72
+
73
+ # Load the model and tokenizer
74
+ print("Initializing CodeLlama...")
75
+ tokenizer, pipeline = load_model()
76
+
77
+ # Create the Gradio interface
78
+ with gr.Blocks(title="CodeLlama Code Generation") as demo:
79
+ gr.Markdown("# CodeLlama Code Generation")
80
+ gr.Markdown("Enter a code prompt and CodeLlama will complete it for you.")
81
+
82
+ with gr.Row():
83
+ with gr.Column():
84
+ prompt = gr.Textbox(
85
+ label="Code Prompt",
86
+ placeholder="Enter your code prompt here...",
87
+ lines=5
88
+ )
89
+
90
+ with gr.Row():
91
+ max_length = gr.Slider(
92
+ minimum=50,
93
+ maximum=500,
94
+ value=200,
95
+ step=10,
96
+ label="Max Length"
97
+ )
98
+ temperature = gr.Slider(
99
+ minimum=0.1,
100
+ maximum=1.0,
101
+ value=0.1,
102
+ step=0.1,
103
+ label="Temperature"
104
+ )
105
+
106
+ with gr.Row():
107
+ top_p = gr.Slider(
108
+ minimum=0.5,
109
+ maximum=1.0,
110
+ value=0.95,
111
+ step=0.05,
112
+ label="Top-p"
113
+ )
114
+ top_k = gr.Slider(
115
+ minimum=1,
116
+ maximum=50,
117
+ value=10,
118
+ step=1,
119
+ label="Top-k"
120
+ )
121
+
122
+ generate_btn = gr.Button("Generate Code")
123
+
124
+ with gr.Column():
125
+ output = gr.Textbox(
126
+ label="Generated Code",
127
+ lines=20
128
+ )
129
+
130
+ # Connect the button to the generate function
131
+ generate_btn.click(
132
+ fn=generate_code,
133
+ inputs=[prompt, max_length, temperature, top_p, top_k],
134
+ outputs=output
135
+ )
136
+
137
+ # Add examples
138
+ gr.Examples(
139
+ examples=EXAMPLES,
140
+ inputs=prompt
141
+ )
142
+
143
+ # Add information about the model
144
+ gr.Markdown("""
145
+ ## About
146
+
147
+ This demo uses the CodeLlama-7b model to generate code completions based on your prompts.
148
+
149
+ - **Max Length**: Controls the maximum length of the generated text
150
+ - **Temperature**: Controls randomness (lower = more deterministic)
151
+ - **Top-p**: Controls diversity via nucleus sampling
152
+ - **Top-k**: Controls diversity via top-k sampling
153
+
154
+ Created by DheepLearning
155
+ """)
156
+
157
+ # Launch the app
158
+ demo.launch()
requirements.txt CHANGED
@@ -1,3 +1,6 @@
1
  transformers==4.39.3
2
  accelerate
3
- gradio
 
 
 
 
1
  transformers==4.39.3
2
  accelerate
3
+ gradio>=4.0.0
4
+ torch>=2.0.0
5
+ sentencepiece
6
+ protobuf