Spaces:
Sleeping
Sleeping
architecture: | |
backbone_dtype: float16 | |
gradient_checkpointing: false | |
intermediate_dropout: 0.0 | |
config_item_that_is_not_used: 0 | |
augmentation: | |
token_mask_probability: 0.0 | |
dataset: | |
add_eos_token_to_answer: true | |
add_eos_token_to_prompt: true | |
answer_column: output | |
data_sample: 0.1 | |
data_sample_choice: | |
- Train | |
- Validation | |
mask_prompt_labels: false | |
prompt_column: | |
- instruction | |
text_answer_separator: \n | |
text_prompt_start: '' | |
train_dataframe: data/user/train/train.csv | |
validation_dataframe: None | |
validation_size: 0.01 | |
validation_strategy: automatic | |
environment: | |
compile_model: false | |
find_unused_parameters: false | |
gpus: | |
- '0' | |
mixed_precision: true | |
number_of_workers: 8 | |
seed: -1 | |
experiment_name: test | |
llm_backbone: EleutherAI/pythia-12b-deduped | |
logging: | |
logger: None | |
neptune_project: '' | |
output_directory: output/user/test/ | |
prediction: | |
batch_size_inference: 0 | |
do_sample: false | |
max_length_inference: 256 | |
max_time: 0.0 | |
metric: GPT3.5 | |
min_length_inference: 2 | |
num_beams: 2 | |
repetition_penalty: 1.2 | |
stop_tokens: "" | |
temperature: 0.0 | |
problem_type: text_causal_language_modeling | |
tokenizer: | |
max_length: 144 | |
padding_quantile: 1.0 | |
tokenizer_kwargs: '{"use_fast": true, "add_prefix_space": false}' | |
training: | |
batch_size: 3 | |
epochs: 0 | |
evaluate_before_training: true | |
evaluation_epochs: 1.0 | |
grad_accumulation: 1 | |
gradient_clip: 0.0 | |
learning_rate: 0.0001 | |
lora: true | |
use_dora: false | |
lora_alpha: 16 | |
lora_dropout: 0.05 | |
lora_r: 4 | |
lora_target_modules: '' | |
optimizer: AdamW | |
save_checkpoint: "last" | |
schedule: Cosine | |
train_validation_data: false | |
warmup_epochs: 0.0 | |
weight_decay: 0.0 | |