Push model using huggingface_hub.

Files changed (5) hide show

README.md CHANGED Viewed

@@ -1,5 +1,6 @@
 ---
 license: apache-2.0
 tags:
 - trl
 - ppo
@@ -25,7 +26,7 @@ You can then generate text as follows:
 ```python
 from transformers import pipeline
-generator = pipeline("text-generation", model="yuansui//tmp/tmpp9w_oz9m/yuansui/llama-160m-PPO-tuned")
 outputs = generator("Hello, my llama is cute")
 ```
@@ -35,8 +36,8 @@ If you want to use the model for training or to obtain the outputs from the valu
 from transformers import AutoTokenizer
 from trl import AutoModelForCausalLMWithValueHead
-tokenizer = AutoTokenizer.from_pretrained("yuansui//tmp/tmpp9w_oz9m/yuansui/llama-160m-PPO-tuned")
-model = AutoModelForCausalLMWithValueHead.from_pretrained("yuansui//tmp/tmpp9w_oz9m/yuansui/llama-160m-PPO-tuned")
 inputs = tokenizer("Hello, my llama is cute", return_tensors="pt")
 outputs = model(**inputs, labels=inputs["input_ids"])

 ---
 license: apache-2.0
+library_name: transformers
 tags:
 - trl
 - ppo
 ```python
 from transformers import pipeline
+generator = pipeline("text-generation", model="yuansui//tmp/tmpo9dzyeo0/yuansui/llama-160m-PPO-tuned")
 outputs = generator("Hello, my llama is cute")
 ```
 from transformers import AutoTokenizer
 from trl import AutoModelForCausalLMWithValueHead
+tokenizer = AutoTokenizer.from_pretrained("yuansui//tmp/tmpo9dzyeo0/yuansui/llama-160m-PPO-tuned")
+model = AutoModelForCausalLMWithValueHead.from_pretrained("yuansui//tmp/tmpo9dzyeo0/yuansui/llama-160m-PPO-tuned")
 inputs = tokenizer("Hello, my llama is cute", return_tensors="pt")
 outputs = model(**inputs, labels=inputs["input_ids"])

adapter_config.json CHANGED Viewed

@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "q_proj",
-    "up_proj",
-    "o_proj",
     "gate_proj",
     "k_proj",
-    "v_proj",
-    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "v_proj",
     "q_proj",
     "gate_proj",
+    "up_proj",
     "k_proj",
+    "down_proj",
+    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6a87fb41313ba9e61ae1aa134ff1e62eb31a1e5b56f0dcfa5e2e26025e764be4
 size 6804944

 version https://git-lfs.github.com/spec/v1
+oid sha256:05f68b7f9aa6b17927f005e0eb2cba73a8586ae6a68cffe9fa194eeff35d3645
 size 6804944

config.json CHANGED Viewed

@@ -1,20 +1,21 @@
 {
   "accelerator_kwargs": {},
   "adap_kl_ctrl": true,
-  "backward_batch_size": 64,
-  "batch_size": 64,
   "cliprange": 0.2,
   "cliprange_value": 0.2,
   "compare_steps": 1,
   "early_stopping": true,
-  "exp_name": "ppo_v2",
   "forward_batch_size": null,
-  "gamma": 1,
-  "global_backward_batch_size": 256,
-  "global_batch_size": 256,
-  "gradient_accumulation_steps": 4,
   "gradient_checkpointing": false,
-  "horizon": 10000,
   "init_kl_coef": 0.2,
   "is_encoder_decoder": false,
   "is_peft_model": true,
@@ -23,21 +24,21 @@
   "learning_rate": 1.41e-05,
   "log_with": null,
   "max_grad_norm": null,
-  "mini_batch_size": 16,
   "model_name": "JackFram/llama-160m",
   "optimize_cuda_cache": true,
   "optimize_device_cache": false,
   "ppo_epochs": 4,
   "project_kwargs": {},
   "push_to_hub_if_best_kwargs": {},
-  "query_dataset": "imdb",
   "ratio_threshold": 10.0,
   "remove_unused_columns": true,
   "reward_model": "sentiment-analysis:lvwerra/distilbert-imdb",
   "score_clip": null,
   "seed": 0,
   "steps": 20000,
-  "target": 6,
   "target_kl": 0.1,
   "task_name": null,
   "tracker_kwargs": {},

 {
   "accelerator_kwargs": {},
   "adap_kl_ctrl": true,
+  "backward_batch_size": 4,
+  "batch_size": 4,
   "cliprange": 0.2,
   "cliprange_value": 0.2,
   "compare_steps": 1,
+  "dataset_num_proc": null,
   "early_stopping": true,
+  "exp_name": "ppo",
   "forward_batch_size": null,
+  "gamma": 1.0,
+  "global_backward_batch_size": 16,
+  "global_batch_size": 16,
+  "gradient_accumulation_steps": 1,
   "gradient_checkpointing": false,
+  "horizon": 10000.0,
   "init_kl_coef": 0.2,
   "is_encoder_decoder": false,
   "is_peft_model": true,
   "learning_rate": 1.41e-05,
   "log_with": null,
   "max_grad_norm": null,
+  "mini_batch_size": 4,
   "model_name": "JackFram/llama-160m",
   "optimize_cuda_cache": true,
   "optimize_device_cache": false,
   "ppo_epochs": 4,
   "project_kwargs": {},
   "push_to_hub_if_best_kwargs": {},
+  "query_dataset": "stanfordnlp/imdb",
   "ratio_threshold": 10.0,
   "remove_unused_columns": true,
   "reward_model": "sentiment-analysis:lvwerra/distilbert-imdb",
   "score_clip": null,
   "seed": 0,
   "steps": 20000,
+  "target": 6.0,
   "target_kl": 0.1,
   "task_name": null,
   "tracker_kwargs": {},

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:933b0bc7a6b26769d3eb5b8470bcc78f4a0ab989508da9aa2ff3609f6ce56db6
 size 6785850

 version https://git-lfs.github.com/spec/v1
+oid sha256:03ad8d1f3504263cebf551f92ba1b59b8d358cdbecb9b424ab9ce66e211d612b
 size 6785850