See axolotl config

axolotl version: 0.4.1

base_model: collinear-ai/prometheus-7b-v2.0
tokenizer_config: collinear-ai/prometheus-7b-v2.0
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer

is_mistral_derived_model: True

# Output configuration
hub_model_id: collinear-ai/prometheus2-7b-judge-for-iclr-gt-margin2-task-prompt-margin3-fullFT
dataset_prepared_path: /workspace/gen_judge/data/prometheus2-7b-judge-for-iclr-gt-margin2-task-prompt-margi-fullFT
output_dir: /workspace/gen_judge/prometheus2-7b-judge-for-iclr-gt-margin-margin2-task-prompt-margin3fullFT

# Do the Q in QLora
# load_in_8bit: false
# load_in_4bit: true
strict: false

# Format the dataset into the right instruction format.
chat_template: inst # "{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content'] %}\n    {%- set loop_messages = messages[1:] %}\n{%- else %}\n    {%- set loop_messages = messages %}\n{%- endif %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n        {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}\n    {%- endif %}\n    {%- if message['role'] == 'user' %}\n        {%- if loop.first and system_message is defined %}\n            {{- ' [INST] ' + system_message + '\\n\\n' + message['content'] + ' [/INST]' }}\n        {%- else %}\n            {{- ' [INST] ' + message['content'] + ' [/INST]' }}\n        {%- endif %}\n    {%- elif message['role'] == 'assistant' %}\n        {{- ' ' + message['content'] + eos_token}}\n    {%- else %}\n        {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}\n    {%- endif %}\n{%- endfor %}\n"
datasets:
  - path: collinear-ai/dpo-iclr-feedbackcollection-experiments-models-V2-CORRECT
    split: dpo_for_gt_margin_2_prom7b
    type: chat_template.default
    field_messages: conversation
    field_chosen: chosen
    field_rejected: rejected
    message_field_role: role
    message_field_content: content
    roles:
      system:
        - system
      user:
        - user
      assistant:
        - assistant
val_set_size: 0.1

# RL
rl: dpo
beta: 0.1

# QLora Go
# adapter: qlora
# lora_model_dir:

# Data packing
sequence_len: 1024
eval_sample_packing: false
sample_packing: false
pad_to_sequence_len: true

# Lora config
# lora_r: 16
# lora_alpha: 16
# lora_dropout: 0.05
# lora_target_linear: true
# lora_fan_in_fan_out:
# lora_target_modules:
#   - gate_proj
#   - down_proj
#   - up_proj
#   - q_proj
#   - v_proj
#   - k_proj
#   - o_proj
# lora_modules_to_save: 
#   - embed_tokens
#   - lm_head

# Logging config
wandb_project: iclr-dpo
wandb_entity: nazneen
wandb_name: prometheus2-7b-judge-for-iclr-margin2-task-prompt-margin3-fullFT

# Trainer config
gradient_accumulation_steps: 4
micro_batch_size: 2
num_epochs: 20
optimizer: paged_adamw_8bit
lr_scheduler: cosine
learning_rate: 0.0000005
adam_beta1: 0.95
adam_beta2: 0.999
adam_epsilon: 0.0000001

train_on_inputs: false
group_by_length: false
bf16: true
fp16:
tf32: false

gradient_checkpointing: true
early_stopping_patience:
resume_from_checkpoint:
local_rank:
logging_steps: 1
xformers_attention:

loss_watchdog_threshold: 5.0
loss_watchdog_patience: 3

flash_attention: true
warmup_steps: 100
eval_steps: 5 # some high number to avoid evaluating which causes the FSDP issue to break
eval_table_size:
eval_max_new_tokens: 128
saves_per_epoch: 2
debug:
deepspeed:
weight_decay: 0.02
fsdp:
special_tokens:
  pad_token: </s>
  bos_token: <s>

prometheus2-7b-judge-for-iclr-gt-margin2-task-prompt-margin3-fullFT

This model is a fine-tuned version of collinear-ai/prometheus-7b-v2.0 on an unknown dataset.

Model description

More information needed

Intended uses & limitations

More information needed

Training and evaluation data

More information needed

Training procedure

Training hyperparameters

The following hyperparameters were used during training:

learning_rate: 5e-07
train_batch_size: 2
eval_batch_size: 8
seed: 42
distributed_type: multi-GPU
num_devices: 2
gradient_accumulation_steps: 4
total_train_batch_size: 16
total_eval_batch_size: 16
optimizer: Adam with betas=(0.95,0.999) and epsilon=1e-07
lr_scheduler_type: cosine
lr_scheduler_warmup_steps: 100
training_steps: 1880

Training results

Framework versions

Transformers 4.44.2
Pytorch 2.1.2+cu118
Datasets 2.19.1
Tokenizers 0.19.1

collinear-ai
/

prometheus2-7b-judge-for-iclr-gt-margin2-task-prompt-margin3-fullFT-final

prometheus2-7b-judge-for-iclr-gt-margin2-task-prompt-margin3-fullFT

Model description

Intended uses & limitations

Training and evaluation data

Training procedure

Training hyperparameters

Training results

Framework versions

Model tree for collinear-ai/prometheus2-7b-judge-for-iclr-gt-margin2-task-prompt-margin3-fullFT-final

Evaluation results