topshik commited on
Commit
4179e39
·
verified ·
1 Parent(s): bf021d8

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +2 -3
README.md CHANGED
@@ -227,7 +227,7 @@ Keep in mind that base model is not fine-tuned for downstream tasks out-of-the-b
227
 
228
  # Training Data
229
  - Total Training Tokens: ~4.2 trillion tokens
230
- - Corpus: StackV1, Starcoderdata, StackV2, CommitPack, English wiki
231
 
232
  # Training Details
233
  - Context Window: 8,192 tokens
@@ -346,8 +346,7 @@ if __name__ == "__main__":
346
  print(fibonacci(10))
347
  """
348
 
349
- encoded_input = tokenizer(f"<fim_suffix>suffix<fim_prefix>{prefix}<fim_middle>", return_tensors='pt', return_token_type_ids=False)
350
- input_len = len(encoded_input["input_ids"][0])
351
  out = model.generate(
352
  **encoded_input,
353
  max_new_tokens=100,
 
227
 
228
  # Training Data
229
  - Total Training Tokens: ~4.2 trillion tokens
230
+ - Corpus: The Stack, StarCoder Training Dataset, The Stack v2, CommitPack, English Wikipedia
231
 
232
  # Training Details
233
  - Context Window: 8,192 tokens
 
346
  print(fibonacci(10))
347
  """
348
 
349
+ encoded_input = tokenizer(f"<fim_suffix>{suffix}<fim_prefix>{prefix}<fim_middle>", return_tensors='pt', return_token_type_ids=False)
 
350
  out = model.generate(
351
  **encoded_input,
352
  max_new_tokens=100,