topshik commited on
Commit
2a7d386
·
verified ·
1 Parent(s): 3c521ef

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +2 -51
README.md CHANGED
@@ -165,62 +165,13 @@ Designed for integration into professional developer tooling (e.g., intelligent
165
  - Security: Code suggestions should not be assumed to be secure or free of vulnerabilities.
166
 
167
  # Sample Usage
168
- Here are examples of how to run and sample from the model.
169
 
170
- ## Generic generaion
171
  ```python
172
  import json
173
  from transformers import AutoTokenizer, AutoModelForCausalLM
174
 
175
- example = """
176
- import sys
177
- import os
178
- import time
179
-
180
- sys.path.append(os.getcwd())
181
-
182
- from cluster.prepare_data import get_headers_pairs_list, write_dist_matrix
183
- from cluster.token_edit_distance import get_distance_matrix
184
-
185
- if len(sys.argv) < 3:
186
- print(
187
- "Too few arguments. You should provide: \n1. dataset_filename" +
188
- "\n2. output_data_filename"
189
- )
190
- sys.exit()
191
-
192
- start = time.perf_counter()
193
- dataset_filename_ = sys.argv[1]
194
- output_data_filename_ = sys.argv[2]
195
-
196
- headers_pairs = get_headers_pairs_list(dataset_filename_, verbose=True)
197
-
198
- dist_matrix, max_dist = get_distance_matrix(
199
- list(map(lambda x: x[1], headers_pairs)),
200
- verbose=True
201
- )
202
-
203
- write_dist_matrix(dist_matrix, max_dist, output_data_filename_, verbose=True)
204
-
205
- end = time.perf_counter()
206
- """
207
-
208
- tokenizer = AutoTokenizer.from_pretrained('JetBrains/Mellum-4b-sft-python')
209
- model = AutoModelForCausalLM.from_pretrained('JetBrains/Mellum-4b-sft-python')
210
- encoded_input = tokenizer(example, return_tensors='pt', return_token_type_ids=False)
211
- input_len = len(encoded_input["input_ids"][0])
212
- out = model.generate(
213
- **encoded_input,
214
- max_new_tokens=100,
215
- )
216
- print("### Context")
217
- print(tokenizer.decode(out[0][:input_len]))
218
- print("### Prediction")
219
- print(tokenizer.decode(out[0][input_len:]))
220
- ```
221
-
222
- ## Fill in the middle with additional files as context generation
223
- ```python
224
  example = """<filename>utils.py
225
  def multiply(x, y):
226
  return x * y
 
165
  - Security: Code suggestions should not be assumed to be secure or free of vulnerabilities.
166
 
167
  # Sample Usage
168
+ Here is an example of how to run and sample from the model with additional files context and fill in the middle.
169
 
170
+ ## Fill in the middle with additional files as context generation
171
  ```python
172
  import json
173
  from transformers import AutoTokenizer, AutoModelForCausalLM
174
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  example = """<filename>utils.py
176
  def multiply(x, y):
177
  return x * y