LORA_R = 32
LORA_ALPHA = 64
LORA_DROPOUT = 0.1
MRL_DIMS_CONFIG = [] + original hidden_state 2560
BATCH_SIZE = 8
LEARNING_RATE = 2e-5
EPOCHS = 1
SAVE_MODEL_CYCLE = 1
GRAD_ACCUMULATION_STEPS = 4
SIMCSE_TEMPERATURE = 0.05
TRAIN_SET_SIZE_SCALE = 0.05
VAL_SET_SIZE_SCALE = 0.05
GLOBAL_STEP_SAVE = 100
dataset used : princeton-nlp/datasets-for-simcse