metascroy commited on
Commit
c2a8bb5
·
verified ·
1 Parent(s): 0b0f0da

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +18 -16
README.md CHANGED
@@ -13,6 +13,8 @@ import logging
13
 
14
  import torch
15
  from transformers import AutoModelForCausalLM, AutoTokenizer, TorchAoConfig
 
 
16
 
17
  # Configure logging to see warnings and debug information
18
  logging.basicConfig(
@@ -47,41 +49,41 @@ tokenizer = AutoTokenizer.from_pretrained(model_id)
47
 
48
  # Push to hub
49
  MODEL_NAME = model_id.split("/")[-1]
50
- save_to = f"torchao-testing/{MODEL_NAME}-IntxWeightOnlyConfig-v{version}-0.14.0.dev-safetensors"
51
  quantized_model.push_to_hub(save_to, safe_serialization=False)
52
  tokenizer.push_to_hub(save_to)
53
 
54
 
55
  # Manual Testing
56
- prompt = "What are we having for dinner?"
57
  print("Prompt:", prompt)
58
  inputs = tokenizer(
59
  prompt,
60
  return_tensors="pt",
61
  ).to("cuda")
62
 
63
- # Detting temperature to 0 to make sure result deterministic
64
  generated_ids = quantized_model.generate(**inputs, max_new_tokens=128, temperature=0)
65
 
66
- correct_output_text = tokenizer.batch_decode(
67
- generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
 
 
 
 
 
68
  )
69
- print("Response:", correct_output_text[0][len(prompt) :])
70
-
71
 
72
- # Load model from saved checkpoint
73
- reloaded_model = AutoModelForCausalLM.from_pretrained(
74
- save_to,
75
- device_map="auto",
76
- torch_dtype=torch.bfloat16,
 
77
  )
78
 
79
- generated_ids = reloaded_model.generate(**inputs, max_new_tokens=128, temperature=0)
80
  output_text = tokenizer.batch_decode(
81
  generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
82
  )
83
  print("Response:", output_text[0][len(prompt) :])
84
-
85
- assert(correct_output_text == output_text)
86
-
87
  ```
 
13
 
14
  import torch
15
  from transformers import AutoModelForCausalLM, AutoTokenizer, TorchAoConfig
16
+ from huggingface_hub import HfApi
17
+ import io
18
 
19
  # Configure logging to see warnings and debug information
20
  logging.basicConfig(
 
49
 
50
  # Push to hub
51
  MODEL_NAME = model_id.split("/")[-1]
52
+ save_to = f"torchao-testing/{MODEL_NAME}-IntxWeightOnlyConfig-v{version}-0.14.0.dev"
53
  quantized_model.push_to_hub(save_to, safe_serialization=False)
54
  tokenizer.push_to_hub(save_to)
55
 
56
 
57
  # Manual Testing
58
+ prompt = "Hey, are you conscious? Can you talk to me?"
59
  print("Prompt:", prompt)
60
  inputs = tokenizer(
61
  prompt,
62
  return_tensors="pt",
63
  ).to("cuda")
64
 
65
+ # setting temperature to 0 to make sure result deterministic
66
  generated_ids = quantized_model.generate(**inputs, max_new_tokens=128, temperature=0)
67
 
68
+ api = HfApi()
69
+ buf = io.BytesIO()
70
+ torch.save(prompt, buf)
71
+ api.upload_file(
72
+ path_or_fileobj=buf,
73
+ path_in_repo="model_prompt.pt",
74
+ repo_id=save_to,
75
  )
 
 
76
 
77
+ buf = io.BytesIO()
78
+ torch.save(generated_ids, buf)
79
+ api.upload_file(
80
+ path_or_fileobj=buf,
81
+ path_in_repo="model_output.pt",
82
+ repo_id=save_to,
83
  )
84
 
 
85
  output_text = tokenizer.batch_decode(
86
  generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
87
  )
88
  print("Response:", output_text[0][len(prompt) :])
 
 
 
89
  ```