|
|
--- |
|
|
inference: false |
|
|
library_name: transformers |
|
|
language: |
|
|
- en |
|
|
- fr |
|
|
- de |
|
|
- es |
|
|
- it |
|
|
- pt |
|
|
- ja |
|
|
- ko |
|
|
- zh |
|
|
- ar |
|
|
- el |
|
|
- fa |
|
|
- pl |
|
|
- id |
|
|
- cs |
|
|
- he |
|
|
- hi |
|
|
- nl |
|
|
- ro |
|
|
- ru |
|
|
- tr |
|
|
- uk |
|
|
- vi |
|
|
license: cc-by-nc-4.0 |
|
|
base_model: |
|
|
- CohereLabs/c4ai-command-a-03-2025 |
|
|
datasets: |
|
|
- jukofyork/instruction-refusals-500MB |
|
|
- jukofyork/instruction-responses-500MB |
|
|
--- |
|
|
|
|
|
**NOTE**: Read the [Control Adapter documentation](https://github.com/jukofyork/qlora-pipe-lite/blob/main/docs/ControlAdapters.md) for implementation details. |
|
|
|
|
|
--- |
|
|
|
|
|
Trained via [qlora-pipe-lite](https://github.com/jukofyork/qlora-pipe-lite): |
|
|
|
|
|
```toml |
|
|
# ============================== |
|
|
# MODEL AND OUTPUT CONFIGURATION |
|
|
# ============================== |
|
|
|
|
|
model_dir = '/mnt/models/command-a-03-2025' |
|
|
output_dir = '/mnt/finetunes/command-a-03-2025-uncut' |
|
|
|
|
|
# =========================== |
|
|
# TRAINING TYPE CONFIGURATION |
|
|
# =========================== |
|
|
|
|
|
use_control_adapters = true |
|
|
|
|
|
load_in_4bit = true |
|
|
|
|
|
# ============================= |
|
|
# CONTROL ADAPTER CONFIGURATION |
|
|
# ============================= |
|
|
|
|
|
# ~4 tokens per trainable parameter (2e8/(64*64*(12288+1))) |
|
|
lora_rank = 64 |
|
|
|
|
|
control_adapter_gamma = 0.1 |
|
|
|
|
|
# ======================= |
|
|
# OPTIMIZER CONFIGURATION |
|
|
# ======================= |
|
|
|
|
|
lr = 1e-3 |
|
|
|
|
|
# ====================== |
|
|
# TRAINING CONFIGURATION |
|
|
# ====================== |
|
|
|
|
|
sequence_len = 4096 |
|
|
|
|
|
pipeline_stages = 2 |
|
|
|
|
|
# 30 batch size (3*10) --> 120k tokens per step (4096*30) |
|
|
gradient_accumulation_steps = 10 |
|
|
|
|
|
use_column_major_topology = true |
|
|
|
|
|
# ===================== |
|
|
# DATASET CONFIGURATION |
|
|
# ===================== |
|
|
|
|
|
sequence_prefix = 5 # "<BOS_TOKEN>" |
|
|
|
|
|
document_prefix = [255000, 255007] # "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" |
|
|
document_suffix = 255001 # "<|END_OF_TURN_TOKEN|>" |
|
|
|
|
|
mask_tokens = true # Mask all special tokens |
|
|
|
|
|
drop_tails = true |
|
|
|
|
|
mix_datasets = true |
|
|
|
|
|
# ------------------- |
|
|
# POSITIVE CLASS DATA |
|
|
# ------------------- |
|
|
|
|
|
[[datasets]] |
|
|
dataset_path = '/mnt/datasets/instruction-responses-500MB/*.json' |
|
|
control_class = 1 |
|
|
|
|
|
# ------------------- |
|
|
# NEGATIVE CLASS DATA |
|
|
# ------------------- |
|
|
|
|
|
[[datasets]] |
|
|
dataset_path = '/mnt/datasets/instruction-refusals-500MB/*.json' |
|
|
control_class = -1 |
|
|
``` |
|
|
|
|
|
using ~200M tokens (ie: ~100M positive and ~100M negative) from: |
|
|
|
|
|
- [jukofyork/instruction-responses-500MB](https://huggingface.co/datasets/jukofyork/instruction-responses-500MB) |
|
|
- [jukofyork/instruction-refusals-500MB](https://huggingface.co/datasets/jukofyork/instruction-refusals-500MB) |
|
|
|
|
|
taking just under 4 days using 6x `RTX A6000` over 3 machines: |
|
|
|
|
|
 |
|
|
|
|
|
 |
|
|
|
|
|
 |
|
|
|
|
|
(hence the 30 batch size: `(num_gpus / pipeline_stages) * gradient_accumulation_steps = (6 / 2) * 10 = 30`) |
|
|
|
|
|
--- |
|
|
|
|
|
The control adapter was then converted to a LoRA using [control_adapter_to_lora.py](https://github.com/jukofyork/qlora-pipe-lite/blob/main/control_adapter_to_lora.py): |
|
|
|
|
|
[jukofyork/command-a-03-2025-uncut-lora](https://huggingface.co/jukofyork/command-a-03-2025-uncut-lora) |
|
|
|
|
|
and then merged using the [merge-lora](https://huggingface.co/spaces/jukofyork/merge-lora) space. |
|
|
|
|
|
--- |
|
|
|
|
|
See [https://huggingface.co/jukofyork/command-a-03-2025-uncut-lora-gguf](https://huggingface.co/jukofyork/command-a-03-2025-uncut-lora-gguf) for a LoRA in GGUF format that can be used with the `--lora` option on top of the base [CohereLabs/c4ai-command-a-03-2025](https://huggingface.co/CohereLabs/c4ai-command-a-03-2025) to get the same effect. |