|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.620745542949757, |
|
"eval_steps": 100, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0, |
|
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.5733333333333334, |
|
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.88, |
|
"eval_NanoBEIR_mean_cosine_map@100": 0.6499700198395969, |
|
"eval_NanoBEIR_mean_cosine_mrr@10": 0.7025343915343916, |
|
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.5733333333333334, |
|
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.6933983700329899, |
|
"eval_NanoBEIR_mean_cosine_precision@1": 0.5733333333333334, |
|
"eval_NanoBEIR_mean_cosine_precision@5": 0.21599999999999997, |
|
"eval_NanoBEIR_mean_cosine_recall@1": 0.44222222222222224, |
|
"eval_NanoBEIR_mean_cosine_recall@5": 0.8077777777777778, |
|
"eval_NanoFEVER_cosine_accuracy@1": 0.66, |
|
"eval_NanoFEVER_cosine_accuracy@5": 0.94, |
|
"eval_NanoFEVER_cosine_map@100": 0.7456105053991163, |
|
"eval_NanoFEVER_cosine_mrr@10": 0.7750000000000001, |
|
"eval_NanoFEVER_cosine_ndcg@1": 0.66, |
|
"eval_NanoFEVER_cosine_ndcg@5": 0.7909186074531693, |
|
"eval_NanoFEVER_cosine_precision@1": 0.66, |
|
"eval_NanoFEVER_cosine_precision@5": 0.19999999999999996, |
|
"eval_NanoFEVER_cosine_recall@1": 0.6166666666666667, |
|
"eval_NanoFEVER_cosine_recall@5": 0.9133333333333333, |
|
"eval_NanoHotpotQA_cosine_accuracy@1": 0.7, |
|
"eval_NanoHotpotQA_cosine_accuracy@5": 0.92, |
|
"eval_NanoHotpotQA_cosine_map@100": 0.659131307951428, |
|
"eval_NanoHotpotQA_cosine_mrr@10": 0.795, |
|
"eval_NanoHotpotQA_cosine_ndcg@1": 0.7, |
|
"eval_NanoHotpotQA_cosine_ndcg@5": 0.7005437199248264, |
|
"eval_NanoHotpotQA_cosine_precision@1": 0.7, |
|
"eval_NanoHotpotQA_cosine_precision@5": 0.292, |
|
"eval_NanoHotpotQA_cosine_recall@1": 0.35, |
|
"eval_NanoHotpotQA_cosine_recall@5": 0.73, |
|
"eval_NanoMSMARCO_cosine_accuracy@1": 0.36, |
|
"eval_NanoMSMARCO_cosine_accuracy@5": 0.78, |
|
"eval_NanoMSMARCO_cosine_map@100": 0.5451682461682461, |
|
"eval_NanoMSMARCO_cosine_mrr@10": 0.5376031746031745, |
|
"eval_NanoMSMARCO_cosine_ndcg@1": 0.36, |
|
"eval_NanoMSMARCO_cosine_ndcg@5": 0.5887327827209738, |
|
"eval_NanoMSMARCO_cosine_precision@1": 0.36, |
|
"eval_NanoMSMARCO_cosine_precision@5": 0.156, |
|
"eval_NanoMSMARCO_cosine_recall@1": 0.36, |
|
"eval_NanoMSMARCO_cosine_recall@5": 0.78, |
|
"eval_mldr_loss": 98.74517822265625, |
|
"eval_mldr_runtime": 49.7129, |
|
"eval_mldr_samples_per_second": 10.058, |
|
"eval_mldr_steps_per_second": 0.644, |
|
"step": 0 |
|
}, |
|
{ |
|
"epoch": 0, |
|
"eval_squad_loss": 4.950023174285889, |
|
"eval_squad_runtime": 0.9354, |
|
"eval_squad_samples_per_second": 51.314, |
|
"eval_squad_steps_per_second": 3.207, |
|
"step": 0 |
|
}, |
|
{ |
|
"epoch": 0, |
|
"eval_narrative_qa_loss": 49.38987731933594, |
|
"eval_narrative_qa_runtime": 1.0306, |
|
"eval_narrative_qa_samples_per_second": 290.117, |
|
"eval_narrative_qa_steps_per_second": 18.436, |
|
"step": 0 |
|
}, |
|
{ |
|
"epoch": 0.01620745542949757, |
|
"grad_norm": 1336.0, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 86.6132, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03241491085899514, |
|
"grad_norm": 828.0, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 83.7595, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04862236628849271, |
|
"grad_norm": 388.0, |
|
"learning_rate": 2.7272727272727273e-05, |
|
"loss": 65.5413, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06482982171799027, |
|
"grad_norm": 320.0, |
|
"learning_rate": 3.6363636363636364e-05, |
|
"loss": 40.6319, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08103727714748785, |
|
"grad_norm": 296.0, |
|
"learning_rate": 4.545454545454546e-05, |
|
"loss": 26.2768, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09724473257698542, |
|
"grad_norm": 290.0, |
|
"learning_rate": 4.9997781212386585e-05, |
|
"loss": 14.4879, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11345218800648298, |
|
"grad_norm": 160.0, |
|
"learning_rate": 4.9980033274458193e-05, |
|
"loss": 9.4888, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12965964343598055, |
|
"grad_norm": 212.0, |
|
"learning_rate": 4.994454999929178e-05, |
|
"loss": 8.831, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1458670988654781, |
|
"grad_norm": 109.0, |
|
"learning_rate": 4.98913565793218e-05, |
|
"loss": 7.6776, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1620745542949757, |
|
"grad_norm": 71.5, |
|
"learning_rate": 4.982049078084071e-05, |
|
"loss": 6.4716, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1620745542949757, |
|
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.32666666666666666, |
|
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.6333333333333333, |
|
"eval_NanoBEIR_mean_cosine_map@100": 0.39934768753390587, |
|
"eval_NanoBEIR_mean_cosine_mrr@10": 0.4495740740740741, |
|
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.32666666666666666, |
|
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.43241374431385005, |
|
"eval_NanoBEIR_mean_cosine_precision@1": 0.32666666666666666, |
|
"eval_NanoBEIR_mean_cosine_precision@5": 0.13466666666666668, |
|
"eval_NanoBEIR_mean_cosine_recall@1": 0.25666666666666665, |
|
"eval_NanoBEIR_mean_cosine_recall@5": 0.5466666666666667, |
|
"eval_NanoFEVER_cosine_accuracy@1": 0.24, |
|
"eval_NanoFEVER_cosine_accuracy@5": 0.66, |
|
"eval_NanoFEVER_cosine_map@100": 0.3935747452453245, |
|
"eval_NanoFEVER_cosine_mrr@10": 0.39322222222222225, |
|
"eval_NanoFEVER_cosine_ndcg@1": 0.24, |
|
"eval_NanoFEVER_cosine_ndcg@5": 0.4387408055008589, |
|
"eval_NanoFEVER_cosine_precision@1": 0.24, |
|
"eval_NanoFEVER_cosine_precision@5": 0.132, |
|
"eval_NanoFEVER_cosine_recall@1": 0.24, |
|
"eval_NanoFEVER_cosine_recall@5": 0.63, |
|
"eval_NanoHotpotQA_cosine_accuracy@1": 0.42, |
|
"eval_NanoHotpotQA_cosine_accuracy@5": 0.58, |
|
"eval_NanoHotpotQA_cosine_map@100": 0.3274226457297244, |
|
"eval_NanoHotpotQA_cosine_mrr@10": 0.4911666666666667, |
|
"eval_NanoHotpotQA_cosine_ndcg@1": 0.42, |
|
"eval_NanoHotpotQA_cosine_ndcg@5": 0.35120107956309937, |
|
"eval_NanoHotpotQA_cosine_precision@1": 0.42, |
|
"eval_NanoHotpotQA_cosine_precision@5": 0.14, |
|
"eval_NanoHotpotQA_cosine_recall@1": 0.21, |
|
"eval_NanoHotpotQA_cosine_recall@5": 0.35, |
|
"eval_NanoMSMARCO_cosine_accuracy@1": 0.32, |
|
"eval_NanoMSMARCO_cosine_accuracy@5": 0.66, |
|
"eval_NanoMSMARCO_cosine_map@100": 0.47704567162666855, |
|
"eval_NanoMSMARCO_cosine_mrr@10": 0.4643333333333334, |
|
"eval_NanoMSMARCO_cosine_ndcg@1": 0.32, |
|
"eval_NanoMSMARCO_cosine_ndcg@5": 0.5072993478775919, |
|
"eval_NanoMSMARCO_cosine_precision@1": 0.32, |
|
"eval_NanoMSMARCO_cosine_precision@5": 0.132, |
|
"eval_NanoMSMARCO_cosine_recall@1": 0.32, |
|
"eval_NanoMSMARCO_cosine_recall@5": 0.66, |
|
"eval_mldr_loss": 4.950920581817627, |
|
"eval_mldr_runtime": 14.3444, |
|
"eval_mldr_samples_per_second": 34.857, |
|
"eval_mldr_steps_per_second": 2.231, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1620745542949757, |
|
"eval_squad_loss": 2.0146381855010986, |
|
"eval_squad_runtime": 0.9385, |
|
"eval_squad_samples_per_second": 51.146, |
|
"eval_squad_steps_per_second": 3.197, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1620745542949757, |
|
"eval_narrative_qa_loss": 4.486134052276611, |
|
"eval_narrative_qa_runtime": 1.1506, |
|
"eval_narrative_qa_samples_per_second": 259.856, |
|
"eval_narrative_qa_steps_per_second": 16.513, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.17828200972447325, |
|
"grad_norm": 107.5, |
|
"learning_rate": 4.973200291718561e-05, |
|
"loss": 6.1959, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.19448946515397084, |
|
"grad_norm": 50.75, |
|
"learning_rate": 4.962595581301673e-05, |
|
"loss": 4.5491, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2106969205834684, |
|
"grad_norm": 50.25, |
|
"learning_rate": 4.95024247597132e-05, |
|
"loss": 4.0871, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.22690437601296595, |
|
"grad_norm": 53.25, |
|
"learning_rate": 4.936149746191758e-05, |
|
"loss": 3.0861, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.24311183144246354, |
|
"grad_norm": 25.5, |
|
"learning_rate": 4.920327397526731e-05, |
|
"loss": 2.7792, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2593192868719611, |
|
"grad_norm": 122.0, |
|
"learning_rate": 4.9027866635357136e-05, |
|
"loss": 2.2107, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2755267423014587, |
|
"grad_norm": 410.0, |
|
"learning_rate": 4.883539997798303e-05, |
|
"loss": 1.6997, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2917341977309562, |
|
"grad_norm": 66.0, |
|
"learning_rate": 4.8626010650724204e-05, |
|
"loss": 1.4911, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3079416531604538, |
|
"grad_norm": 1072.0, |
|
"learning_rate": 4.8399847315926e-05, |
|
"loss": 1.6304, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3241491085899514, |
|
"grad_norm": 16.5, |
|
"learning_rate": 4.815707054515248e-05, |
|
"loss": 1.5042, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3241491085899514, |
|
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.24666666666666667, |
|
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.4533333333333333, |
|
"eval_NanoBEIR_mean_cosine_map@100": 0.2917657022707883, |
|
"eval_NanoBEIR_mean_cosine_mrr@10": 0.3391984126984127, |
|
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.24666666666666667, |
|
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.3047832475255773, |
|
"eval_NanoBEIR_mean_cosine_precision@1": 0.24666666666666667, |
|
"eval_NanoBEIR_mean_cosine_precision@5": 0.09466666666666669, |
|
"eval_NanoBEIR_mean_cosine_recall@1": 0.18333333333333335, |
|
"eval_NanoBEIR_mean_cosine_recall@5": 0.37333333333333335, |
|
"eval_NanoFEVER_cosine_accuracy@1": 0.08, |
|
"eval_NanoFEVER_cosine_accuracy@5": 0.26, |
|
"eval_NanoFEVER_cosine_map@100": 0.15831046306304816, |
|
"eval_NanoFEVER_cosine_mrr@10": 0.1541904761904762, |
|
"eval_NanoFEVER_cosine_ndcg@1": 0.08, |
|
"eval_NanoFEVER_cosine_ndcg@5": 0.15530091408199076, |
|
"eval_NanoFEVER_cosine_precision@1": 0.08, |
|
"eval_NanoFEVER_cosine_precision@5": 0.052000000000000005, |
|
"eval_NanoFEVER_cosine_recall@1": 0.07, |
|
"eval_NanoFEVER_cosine_recall@5": 0.24, |
|
"eval_NanoHotpotQA_cosine_accuracy@1": 0.36, |
|
"eval_NanoHotpotQA_cosine_accuracy@5": 0.5, |
|
"eval_NanoHotpotQA_cosine_map@100": 0.2657203181494037, |
|
"eval_NanoHotpotQA_cosine_mrr@10": 0.4257142857142857, |
|
"eval_NanoHotpotQA_cosine_ndcg@1": 0.36, |
|
"eval_NanoHotpotQA_cosine_ndcg@5": 0.2891308855457201, |
|
"eval_NanoHotpotQA_cosine_precision@1": 0.36, |
|
"eval_NanoHotpotQA_cosine_precision@5": 0.11200000000000003, |
|
"eval_NanoHotpotQA_cosine_recall@1": 0.18, |
|
"eval_NanoHotpotQA_cosine_recall@5": 0.28, |
|
"eval_NanoMSMARCO_cosine_accuracy@1": 0.3, |
|
"eval_NanoMSMARCO_cosine_accuracy@5": 0.6, |
|
"eval_NanoMSMARCO_cosine_map@100": 0.45126632559991314, |
|
"eval_NanoMSMARCO_cosine_mrr@10": 0.4376904761904762, |
|
"eval_NanoMSMARCO_cosine_ndcg@1": 0.3, |
|
"eval_NanoMSMARCO_cosine_ndcg@5": 0.46991794294902106, |
|
"eval_NanoMSMARCO_cosine_precision@1": 0.3, |
|
"eval_NanoMSMARCO_cosine_precision@5": 0.12000000000000002, |
|
"eval_NanoMSMARCO_cosine_recall@1": 0.3, |
|
"eval_NanoMSMARCO_cosine_recall@5": 0.6, |
|
"eval_mldr_loss": 1.3893917798995972, |
|
"eval_mldr_runtime": 14.4056, |
|
"eval_mldr_samples_per_second": 34.709, |
|
"eval_mldr_steps_per_second": 2.221, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3241491085899514, |
|
"eval_squad_loss": 0.7646868824958801, |
|
"eval_squad_runtime": 1.072, |
|
"eval_squad_samples_per_second": 44.776, |
|
"eval_squad_steps_per_second": 2.799, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3241491085899514, |
|
"eval_narrative_qa_loss": 2.59745192527771, |
|
"eval_narrative_qa_runtime": 1.1589, |
|
"eval_narrative_qa_samples_per_second": 257.994, |
|
"eval_narrative_qa_steps_per_second": 16.394, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.34035656401944897, |
|
"grad_norm": 35.0, |
|
"learning_rate": 4.7897852705183785e-05, |
|
"loss": 1.6009, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.3565640194489465, |
|
"grad_norm": 1352.0, |
|
"learning_rate": 4.7622377835639064e-05, |
|
"loss": 1.5156, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3727714748784441, |
|
"grad_norm": 704.0, |
|
"learning_rate": 4.73308415183119e-05, |
|
"loss": 1.5305, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3889789303079417, |
|
"grad_norm": 824.0, |
|
"learning_rate": 4.702345073831109e-05, |
|
"loss": 1.6842, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4051863857374392, |
|
"grad_norm": 21760.0, |
|
"learning_rate": 4.6700423737105236e-05, |
|
"loss": 1.2207, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4213938411669368, |
|
"grad_norm": 3040.0, |
|
"learning_rate": 4.63619898575755e-05, |
|
"loss": 1.2634, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.4376012965964344, |
|
"grad_norm": 46.5, |
|
"learning_rate": 4.600838938118672e-05, |
|
"loss": 1.3175, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.4538087520259319, |
|
"grad_norm": 6112.0, |
|
"learning_rate": 4.563987335739216e-05, |
|
"loss": 1.3107, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4700162074554295, |
|
"grad_norm": 2256.0, |
|
"learning_rate": 4.525670342539332e-05, |
|
"loss": 1.3163, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4862236628849271, |
|
"grad_norm": 860.0, |
|
"learning_rate": 4.485915162838122e-05, |
|
"loss": 1.3703, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4862236628849271, |
|
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.18666666666666668, |
|
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.37333333333333335, |
|
"eval_NanoBEIR_mean_cosine_map@100": 0.24055436231400043, |
|
"eval_NanoBEIR_mean_cosine_mrr@10": 0.26788624338624334, |
|
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.18666666666666668, |
|
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.24938490224868604, |
|
"eval_NanoBEIR_mean_cosine_precision@1": 0.18666666666666668, |
|
"eval_NanoBEIR_mean_cosine_precision@5": 0.07733333333333335, |
|
"eval_NanoBEIR_mean_cosine_recall@1": 0.15, |
|
"eval_NanoBEIR_mean_cosine_recall@5": 0.31666666666666665, |
|
"eval_NanoFEVER_cosine_accuracy@1": 0.06, |
|
"eval_NanoFEVER_cosine_accuracy@5": 0.18, |
|
"eval_NanoFEVER_cosine_map@100": 0.11320690658806287, |
|
"eval_NanoFEVER_cosine_mrr@10": 0.11538888888888887, |
|
"eval_NanoFEVER_cosine_ndcg@1": 0.06, |
|
"eval_NanoFEVER_cosine_ndcg@5": 0.11246425246579403, |
|
"eval_NanoFEVER_cosine_precision@1": 0.06, |
|
"eval_NanoFEVER_cosine_precision@5": 0.036000000000000004, |
|
"eval_NanoFEVER_cosine_recall@1": 0.05, |
|
"eval_NanoFEVER_cosine_recall@5": 0.17, |
|
"eval_NanoHotpotQA_cosine_accuracy@1": 0.2, |
|
"eval_NanoHotpotQA_cosine_accuracy@5": 0.36, |
|
"eval_NanoHotpotQA_cosine_map@100": 0.17264044237203358, |
|
"eval_NanoHotpotQA_cosine_mrr@10": 0.26374603174603173, |
|
"eval_NanoHotpotQA_cosine_ndcg@1": 0.2, |
|
"eval_NanoHotpotQA_cosine_ndcg@5": 0.18589124040083965, |
|
"eval_NanoHotpotQA_cosine_precision@1": 0.2, |
|
"eval_NanoHotpotQA_cosine_precision@5": 0.08, |
|
"eval_NanoHotpotQA_cosine_recall@1": 0.1, |
|
"eval_NanoHotpotQA_cosine_recall@5": 0.2, |
|
"eval_NanoMSMARCO_cosine_accuracy@1": 0.3, |
|
"eval_NanoMSMARCO_cosine_accuracy@5": 0.58, |
|
"eval_NanoMSMARCO_cosine_map@100": 0.4358157379819049, |
|
"eval_NanoMSMARCO_cosine_mrr@10": 0.42452380952380947, |
|
"eval_NanoMSMARCO_cosine_ndcg@1": 0.3, |
|
"eval_NanoMSMARCO_cosine_ndcg@5": 0.44979921387942445, |
|
"eval_NanoMSMARCO_cosine_precision@1": 0.3, |
|
"eval_NanoMSMARCO_cosine_precision@5": 0.11600000000000002, |
|
"eval_NanoMSMARCO_cosine_recall@1": 0.3, |
|
"eval_NanoMSMARCO_cosine_recall@5": 0.58, |
|
"eval_mldr_loss": 1.218342900276184, |
|
"eval_mldr_runtime": 14.4679, |
|
"eval_mldr_samples_per_second": 34.559, |
|
"eval_mldr_steps_per_second": 2.212, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4862236628849271, |
|
"eval_squad_loss": 1.0243560075759888, |
|
"eval_squad_runtime": 0.9647, |
|
"eval_squad_samples_per_second": 49.755, |
|
"eval_squad_steps_per_second": 3.11, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4862236628849271, |
|
"eval_narrative_qa_loss": 2.635084629058838, |
|
"eval_narrative_qa_runtime": 1.1616, |
|
"eval_narrative_qa_samples_per_second": 257.409, |
|
"eval_narrative_qa_steps_per_second": 16.357, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5024311183144247, |
|
"grad_norm": 39.25, |
|
"learning_rate": 4.444750022039099e-05, |
|
"loss": 1.2901, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5186385737439222, |
|
"grad_norm": 260.0, |
|
"learning_rate": 4.4022041465907036e-05, |
|
"loss": 1.245, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5348460291734197, |
|
"grad_norm": 2192.0, |
|
"learning_rate": 4.358307743236092e-05, |
|
"loss": 1.1749, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5510534846029174, |
|
"grad_norm": 1064.0, |
|
"learning_rate": 4.3130919775669374e-05, |
|
"loss": 1.0522, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5672609400324149, |
|
"grad_norm": 1120.0, |
|
"learning_rate": 4.2665889518964684e-05, |
|
"loss": 1.1319, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5834683954619124, |
|
"grad_norm": 1552.0, |
|
"learning_rate": 4.2188316824674504e-05, |
|
"loss": 1.1986, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5996758508914101, |
|
"grad_norm": 133.0, |
|
"learning_rate": 4.169854076011292e-05, |
|
"loss": 1.3508, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6158833063209076, |
|
"grad_norm": 5408.0, |
|
"learning_rate": 4.119690905674937e-05, |
|
"loss": 1.3465, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6320907617504052, |
|
"grad_norm": 1624.0, |
|
"learning_rate": 4.068377786332593e-05, |
|
"loss": 1.3062, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.6482982171799028, |
|
"grad_norm": 1192.0, |
|
"learning_rate": 4.0159511492998746e-05, |
|
"loss": 0.9974, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6482982171799028, |
|
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.20000000000000004, |
|
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.39333333333333337, |
|
"eval_NanoBEIR_mean_cosine_map@100": 0.25566257669608067, |
|
"eval_NanoBEIR_mean_cosine_mrr@10": 0.2828835978835979, |
|
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.20000000000000004, |
|
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.2711361604281084, |
|
"eval_NanoBEIR_mean_cosine_precision@1": 0.20000000000000004, |
|
"eval_NanoBEIR_mean_cosine_precision@5": 0.08266666666666668, |
|
"eval_NanoBEIR_mean_cosine_recall@1": 0.16333333333333333, |
|
"eval_NanoBEIR_mean_cosine_recall@5": 0.3433333333333333, |
|
"eval_NanoFEVER_cosine_accuracy@1": 0.08, |
|
"eval_NanoFEVER_cosine_accuracy@5": 0.2, |
|
"eval_NanoFEVER_cosine_map@100": 0.12628338770667222, |
|
"eval_NanoFEVER_cosine_mrr@10": 0.1275, |
|
"eval_NanoFEVER_cosine_ndcg@1": 0.08, |
|
"eval_NanoFEVER_cosine_ndcg@5": 0.1285946755597121, |
|
"eval_NanoFEVER_cosine_precision@1": 0.08, |
|
"eval_NanoFEVER_cosine_precision@5": 0.04, |
|
"eval_NanoFEVER_cosine_recall@1": 0.07, |
|
"eval_NanoFEVER_cosine_recall@5": 0.18, |
|
"eval_NanoHotpotQA_cosine_accuracy@1": 0.2, |
|
"eval_NanoHotpotQA_cosine_accuracy@5": 0.32, |
|
"eval_NanoHotpotQA_cosine_map@100": 0.17161850356661693, |
|
"eval_NanoHotpotQA_cosine_mrr@10": 0.26146031746031745, |
|
"eval_NanoHotpotQA_cosine_ndcg@1": 0.2, |
|
"eval_NanoHotpotQA_cosine_ndcg@5": 0.18168469434151963, |
|
"eval_NanoHotpotQA_cosine_precision@1": 0.2, |
|
"eval_NanoHotpotQA_cosine_precision@5": 0.07600000000000001, |
|
"eval_NanoHotpotQA_cosine_recall@1": 0.1, |
|
"eval_NanoHotpotQA_cosine_recall@5": 0.19, |
|
"eval_NanoMSMARCO_cosine_accuracy@1": 0.32, |
|
"eval_NanoMSMARCO_cosine_accuracy@5": 0.66, |
|
"eval_NanoMSMARCO_cosine_map@100": 0.4690858388149529, |
|
"eval_NanoMSMARCO_cosine_mrr@10": 0.45969047619047615, |
|
"eval_NanoMSMARCO_cosine_ndcg@1": 0.32, |
|
"eval_NanoMSMARCO_cosine_ndcg@5": 0.5031291113830936, |
|
"eval_NanoMSMARCO_cosine_precision@1": 0.32, |
|
"eval_NanoMSMARCO_cosine_precision@5": 0.132, |
|
"eval_NanoMSMARCO_cosine_recall@1": 0.32, |
|
"eval_NanoMSMARCO_cosine_recall@5": 0.66, |
|
"eval_mldr_loss": 1.7250500917434692, |
|
"eval_mldr_runtime": 14.4371, |
|
"eval_mldr_samples_per_second": 34.633, |
|
"eval_mldr_steps_per_second": 2.217, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6482982171799028, |
|
"eval_squad_loss": 0.8224272131919861, |
|
"eval_squad_runtime": 0.9831, |
|
"eval_squad_samples_per_second": 48.823, |
|
"eval_squad_steps_per_second": 3.051, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6482982171799028, |
|
"eval_narrative_qa_loss": 2.191936492919922, |
|
"eval_narrative_qa_runtime": 1.1776, |
|
"eval_narrative_qa_samples_per_second": 253.914, |
|
"eval_narrative_qa_steps_per_second": 16.135, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6645056726094003, |
|
"grad_norm": 796.0, |
|
"learning_rate": 3.962448216468275e-05, |
|
"loss": 3.2988, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.6807131280388979, |
|
"grad_norm": 9216.0, |
|
"learning_rate": 3.9079069738783484e-05, |
|
"loss": 1.3305, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.6969205834683955, |
|
"grad_norm": 227.0, |
|
"learning_rate": 3.852366144750358e-05, |
|
"loss": 1.2771, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.713128038897893, |
|
"grad_norm": 1456.0, |
|
"learning_rate": 3.7958651619915495e-05, |
|
"loss": 1.0724, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7293354943273906, |
|
"grad_norm": 704.0, |
|
"learning_rate": 3.738444140199549e-05, |
|
"loss": 1.2473, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7455429497568882, |
|
"grad_norm": 4608.0, |
|
"learning_rate": 3.680143847181783e-05, |
|
"loss": 1.4045, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.7617504051863857, |
|
"grad_norm": 8384.0, |
|
"learning_rate": 3.621005675011127e-05, |
|
"loss": 1.6167, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.7779578606158833, |
|
"grad_norm": 10752.0, |
|
"learning_rate": 3.5610716106383426e-05, |
|
"loss": 1.427, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.7941653160453809, |
|
"grad_norm": 32512.0, |
|
"learning_rate": 3.500384206082155e-05, |
|
"loss": 1.256, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8103727714748784, |
|
"grad_norm": 2080.0, |
|
"learning_rate": 3.438986548218155e-05, |
|
"loss": 1.7336, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8103727714748784, |
|
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.22666666666666668, |
|
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.35333333333333333, |
|
"eval_NanoBEIR_mean_cosine_map@100": 0.25285734426656203, |
|
"eval_NanoBEIR_mean_cosine_mrr@10": 0.28679100529100526, |
|
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.22666666666666668, |
|
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.26096692405722594, |
|
"eval_NanoBEIR_mean_cosine_precision@1": 0.22666666666666668, |
|
"eval_NanoBEIR_mean_cosine_precision@5": 0.07600000000000001, |
|
"eval_NanoBEIR_mean_cosine_recall@1": 0.17666666666666667, |
|
"eval_NanoBEIR_mean_cosine_recall@5": 0.31, |
|
"eval_NanoFEVER_cosine_accuracy@1": 0.08, |
|
"eval_NanoFEVER_cosine_accuracy@5": 0.18, |
|
"eval_NanoFEVER_cosine_map@100": 0.11514089868666741, |
|
"eval_NanoFEVER_cosine_mrr@10": 0.11641269841269843, |
|
"eval_NanoFEVER_cosine_ndcg@1": 0.08, |
|
"eval_NanoFEVER_cosine_ndcg@5": 0.11458961164975079, |
|
"eval_NanoFEVER_cosine_precision@1": 0.08, |
|
"eval_NanoFEVER_cosine_precision@5": 0.036000000000000004, |
|
"eval_NanoFEVER_cosine_recall@1": 0.07, |
|
"eval_NanoFEVER_cosine_recall@5": 0.16, |
|
"eval_NanoHotpotQA_cosine_accuracy@1": 0.28, |
|
"eval_NanoHotpotQA_cosine_accuracy@5": 0.3, |
|
"eval_NanoHotpotQA_cosine_map@100": 0.1921273663105645, |
|
"eval_NanoHotpotQA_cosine_mrr@10": 0.3077380952380952, |
|
"eval_NanoHotpotQA_cosine_ndcg@1": 0.28, |
|
"eval_NanoHotpotQA_cosine_ndcg@5": 0.202243938877232, |
|
"eval_NanoHotpotQA_cosine_precision@1": 0.28, |
|
"eval_NanoHotpotQA_cosine_precision@5": 0.07600000000000001, |
|
"eval_NanoHotpotQA_cosine_recall@1": 0.14, |
|
"eval_NanoHotpotQA_cosine_recall@5": 0.19, |
|
"eval_NanoMSMARCO_cosine_accuracy@1": 0.32, |
|
"eval_NanoMSMARCO_cosine_accuracy@5": 0.58, |
|
"eval_NanoMSMARCO_cosine_map@100": 0.4513037678024542, |
|
"eval_NanoMSMARCO_cosine_mrr@10": 0.4362222222222223, |
|
"eval_NanoMSMARCO_cosine_ndcg@1": 0.32, |
|
"eval_NanoMSMARCO_cosine_ndcg@5": 0.4660672216446949, |
|
"eval_NanoMSMARCO_cosine_precision@1": 0.32, |
|
"eval_NanoMSMARCO_cosine_precision@5": 0.11600000000000002, |
|
"eval_NanoMSMARCO_cosine_recall@1": 0.32, |
|
"eval_NanoMSMARCO_cosine_recall@5": 0.58, |
|
"eval_mldr_loss": 1.2118109464645386, |
|
"eval_mldr_runtime": 14.4727, |
|
"eval_mldr_samples_per_second": 34.548, |
|
"eval_mldr_steps_per_second": 2.211, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8103727714748784, |
|
"eval_squad_loss": 0.669793426990509, |
|
"eval_squad_runtime": 0.9528, |
|
"eval_squad_samples_per_second": 50.377, |
|
"eval_squad_steps_per_second": 3.149, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8103727714748784, |
|
"eval_narrative_qa_loss": 2.0540804862976074, |
|
"eval_narrative_qa_runtime": 1.1722, |
|
"eval_narrative_qa_samples_per_second": 255.077, |
|
"eval_narrative_qa_steps_per_second": 16.209, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.826580226904376, |
|
"grad_norm": 5792.0, |
|
"learning_rate": 3.3769222281879495e-05, |
|
"loss": 0.9241, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.8427876823338736, |
|
"grad_norm": 1080.0, |
|
"learning_rate": 3.314235310450306e-05, |
|
"loss": 1.2186, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.8589951377633711, |
|
"grad_norm": 1264.0, |
|
"learning_rate": 3.250970301496237e-05, |
|
"loss": 1.3873, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.8752025931928687, |
|
"grad_norm": 308.0, |
|
"learning_rate": 3.187172118250266e-05, |
|
"loss": 1.299, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.8914100486223663, |
|
"grad_norm": 5568.0, |
|
"learning_rate": 3.122886056180284e-05, |
|
"loss": 1.4101, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9076175040518638, |
|
"grad_norm": 1480.0, |
|
"learning_rate": 3.0581577571386426e-05, |
|
"loss": 1.047, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.9238249594813615, |
|
"grad_norm": 34.0, |
|
"learning_rate": 2.9930331769573365e-05, |
|
"loss": 1.1558, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.940032414910859, |
|
"grad_norm": 4640.0, |
|
"learning_rate": 2.9275585528202516e-05, |
|
"loss": 1.1769, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.9562398703403565, |
|
"grad_norm": 191.0, |
|
"learning_rate": 2.8617803704356672e-05, |
|
"loss": 1.1054, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.9724473257698542, |
|
"grad_norm": 167.0, |
|
"learning_rate": 2.7957453310323073e-05, |
|
"loss": 3.6745, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9724473257698542, |
|
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.20666666666666667, |
|
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.36000000000000004, |
|
"eval_NanoBEIR_mean_cosine_map@100": 0.24725005259175806, |
|
"eval_NanoBEIR_mean_cosine_mrr@10": 0.2764206349206349, |
|
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.20666666666666667, |
|
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.2557813556128824, |
|
"eval_NanoBEIR_mean_cosine_precision@1": 0.20666666666666667, |
|
"eval_NanoBEIR_mean_cosine_precision@5": 0.076, |
|
"eval_NanoBEIR_mean_cosine_recall@1": 0.16666666666666666, |
|
"eval_NanoBEIR_mean_cosine_recall@5": 0.31, |
|
"eval_NanoFEVER_cosine_accuracy@1": 0.1, |
|
"eval_NanoFEVER_cosine_accuracy@5": 0.12, |
|
"eval_NanoFEVER_cosine_map@100": 0.12500495211409104, |
|
"eval_NanoFEVER_cosine_mrr@10": 0.12576984126984128, |
|
"eval_NanoFEVER_cosine_ndcg@1": 0.1, |
|
"eval_NanoFEVER_cosine_ndcg@5": 0.10488153892673832, |
|
"eval_NanoFEVER_cosine_precision@1": 0.1, |
|
"eval_NanoFEVER_cosine_precision@5": 0.024000000000000004, |
|
"eval_NanoFEVER_cosine_recall@1": 0.09, |
|
"eval_NanoFEVER_cosine_recall@5": 0.11, |
|
"eval_NanoHotpotQA_cosine_accuracy@1": 0.22, |
|
"eval_NanoHotpotQA_cosine_accuracy@5": 0.34, |
|
"eval_NanoHotpotQA_cosine_map@100": 0.17072760263191902, |
|
"eval_NanoHotpotQA_cosine_mrr@10": 0.2686904761904762, |
|
"eval_NanoHotpotQA_cosine_ndcg@1": 0.22, |
|
"eval_NanoHotpotQA_cosine_ndcg@5": 0.19019906156669042, |
|
"eval_NanoHotpotQA_cosine_precision@1": 0.22, |
|
"eval_NanoHotpotQA_cosine_precision@5": 0.08, |
|
"eval_NanoHotpotQA_cosine_recall@1": 0.11, |
|
"eval_NanoHotpotQA_cosine_recall@5": 0.2, |
|
"eval_NanoMSMARCO_cosine_accuracy@1": 0.3, |
|
"eval_NanoMSMARCO_cosine_accuracy@5": 0.62, |
|
"eval_NanoMSMARCO_cosine_map@100": 0.44601760302926413, |
|
"eval_NanoMSMARCO_cosine_mrr@10": 0.4348015873015873, |
|
"eval_NanoMSMARCO_cosine_ndcg@1": 0.3, |
|
"eval_NanoMSMARCO_cosine_ndcg@5": 0.4722634663452185, |
|
"eval_NanoMSMARCO_cosine_precision@1": 0.3, |
|
"eval_NanoMSMARCO_cosine_precision@5": 0.124, |
|
"eval_NanoMSMARCO_cosine_recall@1": 0.3, |
|
"eval_NanoMSMARCO_cosine_recall@5": 0.62, |
|
"eval_mldr_loss": 1.1025381088256836, |
|
"eval_mldr_runtime": 14.5605, |
|
"eval_mldr_samples_per_second": 34.339, |
|
"eval_mldr_steps_per_second": 2.198, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9724473257698542, |
|
"eval_squad_loss": 0.7168652415275574, |
|
"eval_squad_runtime": 0.8907, |
|
"eval_squad_samples_per_second": 53.891, |
|
"eval_squad_steps_per_second": 3.368, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9724473257698542, |
|
"eval_narrative_qa_loss": 1.757150411605835, |
|
"eval_narrative_qa_runtime": 1.1289, |
|
"eval_narrative_qa_samples_per_second": 264.853, |
|
"eval_narrative_qa_steps_per_second": 16.83, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9886547811993517, |
|
"grad_norm": 414.0, |
|
"learning_rate": 2.7295003182023793e-05, |
|
"loss": 1.2095, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.0048622366288493, |
|
"grad_norm": 760.0, |
|
"learning_rate": 2.663092364615134e-05, |
|
"loss": 1.0888, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.0210696920583469, |
|
"grad_norm": 884.0, |
|
"learning_rate": 2.596568618624588e-05, |
|
"loss": 1.1728, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.0372771474878444, |
|
"grad_norm": 1568.0, |
|
"learning_rate": 2.529976310795108e-05, |
|
"loss": 1.2033, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.053484602917342, |
|
"grad_norm": 716.0, |
|
"learning_rate": 2.4633627203686322e-05, |
|
"loss": 1.0108, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.0696920583468394, |
|
"grad_norm": 50.5, |
|
"learning_rate": 2.3967751416973304e-05, |
|
"loss": 0.9941, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.0858995137763372, |
|
"grad_norm": 39.5, |
|
"learning_rate": 2.3302608506655286e-05, |
|
"loss": 1.1392, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.1021069692058347, |
|
"grad_norm": 1000.0, |
|
"learning_rate": 2.2638670711247605e-05, |
|
"loss": 1.083, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.1183144246353323, |
|
"grad_norm": 7456.0, |
|
"learning_rate": 2.197640941365743e-05, |
|
"loss": 1.0115, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.1345218800648298, |
|
"grad_norm": 2944.0, |
|
"learning_rate": 2.131629480651116e-05, |
|
"loss": 1.2825, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1345218800648298, |
|
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.17333333333333334, |
|
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.34, |
|
"eval_NanoBEIR_mean_cosine_map@100": 0.2227183429409336, |
|
"eval_NanoBEIR_mean_cosine_mrr@10": 0.25063756613756616, |
|
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.17333333333333334, |
|
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.23117905501265376, |
|
"eval_NanoBEIR_mean_cosine_precision@1": 0.17333333333333334, |
|
"eval_NanoBEIR_mean_cosine_precision@5": 0.07066666666666667, |
|
"eval_NanoBEIR_mean_cosine_recall@1": 0.14, |
|
"eval_NanoBEIR_mean_cosine_recall@5": 0.29000000000000004, |
|
"eval_NanoFEVER_cosine_accuracy@1": 0.06, |
|
"eval_NanoFEVER_cosine_accuracy@5": 0.14, |
|
"eval_NanoFEVER_cosine_map@100": 0.10226378102099443, |
|
"eval_NanoFEVER_cosine_mrr@10": 0.107, |
|
"eval_NanoFEVER_cosine_ndcg@1": 0.06, |
|
"eval_NanoFEVER_cosine_ndcg@5": 0.09611366515963533, |
|
"eval_NanoFEVER_cosine_precision@1": 0.06, |
|
"eval_NanoFEVER_cosine_precision@5": 0.027999999999999997, |
|
"eval_NanoFEVER_cosine_recall@1": 0.05, |
|
"eval_NanoFEVER_cosine_recall@5": 0.13, |
|
"eval_NanoHotpotQA_cosine_accuracy@1": 0.18, |
|
"eval_NanoHotpotQA_cosine_accuracy@5": 0.32, |
|
"eval_NanoHotpotQA_cosine_map@100": 0.1431808584557489, |
|
"eval_NanoHotpotQA_cosine_mrr@10": 0.2346666666666667, |
|
"eval_NanoHotpotQA_cosine_ndcg@1": 0.18, |
|
"eval_NanoHotpotQA_cosine_ndcg@5": 0.16151062083926612, |
|
"eval_NanoHotpotQA_cosine_precision@1": 0.18, |
|
"eval_NanoHotpotQA_cosine_precision@5": 0.07200000000000001, |
|
"eval_NanoHotpotQA_cosine_recall@1": 0.09, |
|
"eval_NanoHotpotQA_cosine_recall@5": 0.18, |
|
"eval_NanoMSMARCO_cosine_accuracy@1": 0.28, |
|
"eval_NanoMSMARCO_cosine_accuracy@5": 0.56, |
|
"eval_NanoMSMARCO_cosine_map@100": 0.4227103893460575, |
|
"eval_NanoMSMARCO_cosine_mrr@10": 0.4102460317460318, |
|
"eval_NanoMSMARCO_cosine_ndcg@1": 0.28, |
|
"eval_NanoMSMARCO_cosine_ndcg@5": 0.4359128790390598, |
|
"eval_NanoMSMARCO_cosine_precision@1": 0.28, |
|
"eval_NanoMSMARCO_cosine_precision@5": 0.11200000000000002, |
|
"eval_NanoMSMARCO_cosine_recall@1": 0.28, |
|
"eval_NanoMSMARCO_cosine_recall@5": 0.56, |
|
"eval_mldr_loss": 1.04865562915802, |
|
"eval_mldr_runtime": 14.5246, |
|
"eval_mldr_samples_per_second": 34.424, |
|
"eval_mldr_steps_per_second": 2.203, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1345218800648298, |
|
"eval_squad_loss": 0.7077043652534485, |
|
"eval_squad_runtime": 0.907, |
|
"eval_squad_samples_per_second": 52.924, |
|
"eval_squad_steps_per_second": 3.308, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1345218800648298, |
|
"eval_narrative_qa_loss": 1.7767502069473267, |
|
"eval_narrative_qa_runtime": 1.1658, |
|
"eval_narrative_qa_samples_per_second": 256.487, |
|
"eval_narrative_qa_steps_per_second": 16.298, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1507293354943273, |
|
"grad_norm": 1976.0, |
|
"learning_rate": 2.0658795558326743e-05, |
|
"loss": 1.1286, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.1669367909238249, |
|
"grad_norm": 1640.0, |
|
"learning_rate": 2.000437848076822e-05, |
|
"loss": 1.174, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.1831442463533226, |
|
"grad_norm": 3776.0, |
|
"learning_rate": 1.935350819721849e-05, |
|
"loss": 1.124, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.1993517017828201, |
|
"grad_norm": 5568.0, |
|
"learning_rate": 1.870664681290575e-05, |
|
"loss": 0.9694, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.2155591572123177, |
|
"grad_norm": 532.0, |
|
"learning_rate": 1.8064253586817816e-05, |
|
"loss": 1.165, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.2317666126418152, |
|
"grad_norm": 928.0, |
|
"learning_rate": 1.7426784605637153e-05, |
|
"loss": 1.1989, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.2479740680713127, |
|
"grad_norm": 560.0, |
|
"learning_rate": 1.6794692459928223e-05, |
|
"loss": 0.9966, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.2641815235008105, |
|
"grad_norm": 123.5, |
|
"learning_rate": 1.6168425922807008e-05, |
|
"loss": 0.9768, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.280388978930308, |
|
"grad_norm": 884.0, |
|
"learning_rate": 1.5548429631320806e-05, |
|
"loss": 1.1039, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.2965964343598055, |
|
"grad_norm": 15.75, |
|
"learning_rate": 1.4935143770764601e-05, |
|
"loss": 2.7281, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.2965964343598055, |
|
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.17333333333333334, |
|
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.35333333333333333, |
|
"eval_NanoBEIR_mean_cosine_map@100": 0.2232392549386716, |
|
"eval_NanoBEIR_mean_cosine_mrr@10": 0.24966402116402117, |
|
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.17333333333333334, |
|
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.23771617121243702, |
|
"eval_NanoBEIR_mean_cosine_precision@1": 0.17333333333333334, |
|
"eval_NanoBEIR_mean_cosine_precision@5": 0.07333333333333333, |
|
"eval_NanoBEIR_mean_cosine_recall@1": 0.1366666666666667, |
|
"eval_NanoBEIR_mean_cosine_recall@5": 0.31, |
|
"eval_NanoFEVER_cosine_accuracy@1": 0.06, |
|
"eval_NanoFEVER_cosine_accuracy@5": 0.16, |
|
"eval_NanoFEVER_cosine_map@100": 0.1100930297556344, |
|
"eval_NanoFEVER_cosine_mrr@10": 0.11335714285714285, |
|
"eval_NanoFEVER_cosine_ndcg@1": 0.06, |
|
"eval_NanoFEVER_cosine_ndcg@5": 0.10785578521428746, |
|
"eval_NanoFEVER_cosine_precision@1": 0.06, |
|
"eval_NanoFEVER_cosine_precision@5": 0.032, |
|
"eval_NanoFEVER_cosine_recall@1": 0.05, |
|
"eval_NanoFEVER_cosine_recall@5": 0.15, |
|
"eval_NanoHotpotQA_cosine_accuracy@1": 0.2, |
|
"eval_NanoHotpotQA_cosine_accuracy@5": 0.28, |
|
"eval_NanoHotpotQA_cosine_map@100": 0.14784815274029572, |
|
"eval_NanoHotpotQA_cosine_mrr@10": 0.2350555555555556, |
|
"eval_NanoHotpotQA_cosine_ndcg@1": 0.2, |
|
"eval_NanoHotpotQA_cosine_ndcg@5": 0.16017374485985264, |
|
"eval_NanoHotpotQA_cosine_precision@1": 0.2, |
|
"eval_NanoHotpotQA_cosine_precision@5": 0.064, |
|
"eval_NanoHotpotQA_cosine_recall@1": 0.1, |
|
"eval_NanoHotpotQA_cosine_recall@5": 0.16, |
|
"eval_NanoMSMARCO_cosine_accuracy@1": 0.26, |
|
"eval_NanoMSMARCO_cosine_accuracy@5": 0.62, |
|
"eval_NanoMSMARCO_cosine_map@100": 0.4117765823200847, |
|
"eval_NanoMSMARCO_cosine_mrr@10": 0.40057936507936504, |
|
"eval_NanoMSMARCO_cosine_ndcg@1": 0.26, |
|
"eval_NanoMSMARCO_cosine_ndcg@5": 0.445118983563171, |
|
"eval_NanoMSMARCO_cosine_precision@1": 0.26, |
|
"eval_NanoMSMARCO_cosine_precision@5": 0.124, |
|
"eval_NanoMSMARCO_cosine_recall@1": 0.26, |
|
"eval_NanoMSMARCO_cosine_recall@5": 0.62, |
|
"eval_mldr_loss": 1.0107322931289673, |
|
"eval_mldr_runtime": 14.5338, |
|
"eval_mldr_samples_per_second": 34.403, |
|
"eval_mldr_steps_per_second": 2.202, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.2965964343598055, |
|
"eval_squad_loss": 0.6025272607803345, |
|
"eval_squad_runtime": 0.8024, |
|
"eval_squad_samples_per_second": 59.822, |
|
"eval_squad_steps_per_second": 3.739, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.2965964343598055, |
|
"eval_narrative_qa_loss": 1.7681714296340942, |
|
"eval_narrative_qa_runtime": 1.1303, |
|
"eval_narrative_qa_samples_per_second": 264.533, |
|
"eval_narrative_qa_steps_per_second": 16.81, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.312803889789303, |
|
"grad_norm": 4288.0, |
|
"learning_rate": 1.4329003762158106e-05, |
|
"loss": 1.2271, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.3290113452188006, |
|
"grad_norm": 11840.0, |
|
"learning_rate": 1.3730439953105243e-05, |
|
"loss": 1.0453, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.3452188006482984, |
|
"grad_norm": 1440.0, |
|
"learning_rate": 1.3139877312255799e-05, |
|
"loss": 1.106, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.3614262560777957, |
|
"grad_norm": 440.0, |
|
"learning_rate": 1.2557735127585917e-05, |
|
"loss": 1.0065, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.3776337115072934, |
|
"grad_norm": 616.0, |
|
"learning_rate": 1.1984426708711804e-05, |
|
"loss": 1.124, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.393841166936791, |
|
"grad_norm": 12.5, |
|
"learning_rate": 1.1420359093447966e-05, |
|
"loss": 1.3403, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.4100486223662885, |
|
"grad_norm": 1280.0, |
|
"learning_rate": 1.0865932758818258e-05, |
|
"loss": 1.1274, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.426256077795786, |
|
"grad_norm": 186.0, |
|
"learning_rate": 1.0321541336725027e-05, |
|
"loss": 1.1335, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.4424635332252835, |
|
"grad_norm": 6656.0, |
|
"learning_rate": 9.787571334478101e-06, |
|
"loss": 1.0805, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.4586709886547813, |
|
"grad_norm": 73.5, |
|
"learning_rate": 9.26440186038211e-06, |
|
"loss": 1.1117, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.4586709886547813, |
|
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.2066666666666667, |
|
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.3466666666666667, |
|
"eval_NanoBEIR_mean_cosine_map@100": 0.24109707671460848, |
|
"eval_NanoBEIR_mean_cosine_mrr@10": 0.27005291005291004, |
|
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.2066666666666667, |
|
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.24663303266901904, |
|
"eval_NanoBEIR_mean_cosine_precision@1": 0.2066666666666667, |
|
"eval_NanoBEIR_mean_cosine_precision@5": 0.07200000000000001, |
|
"eval_NanoBEIR_mean_cosine_recall@1": 0.17, |
|
"eval_NanoBEIR_mean_cosine_recall@5": 0.29666666666666663, |
|
"eval_NanoFEVER_cosine_accuracy@1": 0.08, |
|
"eval_NanoFEVER_cosine_accuracy@5": 0.14, |
|
"eval_NanoFEVER_cosine_map@100": 0.10717352446028915, |
|
"eval_NanoFEVER_cosine_mrr@10": 0.1099126984126984, |
|
"eval_NanoFEVER_cosine_ndcg@1": 0.08, |
|
"eval_NanoFEVER_cosine_ndcg@5": 0.10123212623289701, |
|
"eval_NanoFEVER_cosine_precision@1": 0.08, |
|
"eval_NanoFEVER_cosine_precision@5": 0.028000000000000004, |
|
"eval_NanoFEVER_cosine_recall@1": 0.07, |
|
"eval_NanoFEVER_cosine_recall@5": 0.13, |
|
"eval_NanoHotpotQA_cosine_accuracy@1": 0.2, |
|
"eval_NanoHotpotQA_cosine_accuracy@5": 0.32, |
|
"eval_NanoHotpotQA_cosine_map@100": 0.15637152907438528, |
|
"eval_NanoHotpotQA_cosine_mrr@10": 0.2523888888888889, |
|
"eval_NanoHotpotQA_cosine_ndcg@1": 0.2, |
|
"eval_NanoHotpotQA_cosine_ndcg@5": 0.17287282180469699, |
|
"eval_NanoHotpotQA_cosine_precision@1": 0.2, |
|
"eval_NanoHotpotQA_cosine_precision@5": 0.07200000000000001, |
|
"eval_NanoHotpotQA_cosine_recall@1": 0.1, |
|
"eval_NanoHotpotQA_cosine_recall@5": 0.18, |
|
"eval_NanoMSMARCO_cosine_accuracy@1": 0.34, |
|
"eval_NanoMSMARCO_cosine_accuracy@5": 0.58, |
|
"eval_NanoMSMARCO_cosine_map@100": 0.45974617660915107, |
|
"eval_NanoMSMARCO_cosine_mrr@10": 0.44785714285714284, |
|
"eval_NanoMSMARCO_cosine_ndcg@1": 0.34, |
|
"eval_NanoMSMARCO_cosine_ndcg@5": 0.4657941499694631, |
|
"eval_NanoMSMARCO_cosine_precision@1": 0.34, |
|
"eval_NanoMSMARCO_cosine_precision@5": 0.11600000000000002, |
|
"eval_NanoMSMARCO_cosine_recall@1": 0.34, |
|
"eval_NanoMSMARCO_cosine_recall@5": 0.58, |
|
"eval_mldr_loss": 1.006673812866211, |
|
"eval_mldr_runtime": 14.487, |
|
"eval_mldr_samples_per_second": 34.514, |
|
"eval_mldr_steps_per_second": 2.209, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.4586709886547813, |
|
"eval_squad_loss": 0.7755841612815857, |
|
"eval_squad_runtime": 0.8044, |
|
"eval_squad_samples_per_second": 59.675, |
|
"eval_squad_steps_per_second": 3.73, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.4586709886547813, |
|
"eval_narrative_qa_loss": 1.7425211668014526, |
|
"eval_narrative_qa_runtime": 1.1372, |
|
"eval_narrative_qa_samples_per_second": 262.924, |
|
"eval_narrative_qa_steps_per_second": 16.708, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.4748784440842788, |
|
"grad_norm": 712.0, |
|
"learning_rate": 8.752404354577052e-06, |
|
"loss": 1.0897, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.4910858995137763, |
|
"grad_norm": 1200.0, |
|
"learning_rate": 8.251942325322903e-06, |
|
"loss": 0.9964, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.5072933549432739, |
|
"grad_norm": 2016.0, |
|
"learning_rate": 7.763371090915932e-06, |
|
"loss": 0.954, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.5235008103727714, |
|
"grad_norm": 4512.0, |
|
"learning_rate": 7.287037527419604e-06, |
|
"loss": 1.3415, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.5397082658022692, |
|
"grad_norm": 438.0, |
|
"learning_rate": 6.823279822389278e-06, |
|
"loss": 0.9926, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.5559157212317665, |
|
"grad_norm": 158.0, |
|
"learning_rate": 6.372427234765688e-06, |
|
"loss": 1.1362, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.5721231766612642, |
|
"grad_norm": 163.0, |
|
"learning_rate": 5.934799861107507e-06, |
|
"loss": 1.1178, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.5883306320907618, |
|
"grad_norm": 1216.0, |
|
"learning_rate": 5.5107084083289576e-06, |
|
"loss": 0.8676, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.6045380875202593, |
|
"grad_norm": 37632.0, |
|
"learning_rate": 5.1004539731040245e-06, |
|
"loss": 1.1794, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.620745542949757, |
|
"grad_norm": 268.0, |
|
"learning_rate": 4.704327828093641e-06, |
|
"loss": 1.0575, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.620745542949757, |
|
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.18666666666666668, |
|
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.3333333333333333, |
|
"eval_NanoBEIR_mean_cosine_map@100": 0.23251229334833623, |
|
"eval_NanoBEIR_mean_cosine_mrr@10": 0.250457671957672, |
|
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.18666666666666668, |
|
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.23607659211604262, |
|
"eval_NanoBEIR_mean_cosine_precision@1": 0.18666666666666668, |
|
"eval_NanoBEIR_mean_cosine_precision@5": 0.06933333333333334, |
|
"eval_NanoBEIR_mean_cosine_recall@1": 0.15666666666666668, |
|
"eval_NanoBEIR_mean_cosine_recall@5": 0.29, |
|
"eval_NanoFEVER_cosine_accuracy@1": 0.08, |
|
"eval_NanoFEVER_cosine_accuracy@5": 0.14, |
|
"eval_NanoFEVER_cosine_map@100": 0.10995542705065009, |
|
"eval_NanoFEVER_cosine_mrr@10": 0.11060317460317462, |
|
"eval_NanoFEVER_cosine_ndcg@1": 0.08, |
|
"eval_NanoFEVER_cosine_ndcg@5": 0.10261859507142916, |
|
"eval_NanoFEVER_cosine_precision@1": 0.08, |
|
"eval_NanoFEVER_cosine_precision@5": 0.027999999999999997, |
|
"eval_NanoFEVER_cosine_recall@1": 0.07, |
|
"eval_NanoFEVER_cosine_recall@5": 0.13, |
|
"eval_NanoHotpotQA_cosine_accuracy@1": 0.16, |
|
"eval_NanoHotpotQA_cosine_accuracy@5": 0.28, |
|
"eval_NanoHotpotQA_cosine_map@100": 0.14864943925499563, |
|
"eval_NanoHotpotQA_cosine_mrr@10": 0.21274603174603177, |
|
"eval_NanoHotpotQA_cosine_ndcg@1": 0.16, |
|
"eval_NanoHotpotQA_cosine_ndcg@5": 0.15294562020041982, |
|
"eval_NanoHotpotQA_cosine_precision@1": 0.16, |
|
"eval_NanoHotpotQA_cosine_precision@5": 0.064, |
|
"eval_NanoHotpotQA_cosine_recall@1": 0.08, |
|
"eval_NanoHotpotQA_cosine_recall@5": 0.16, |
|
"eval_NanoMSMARCO_cosine_accuracy@1": 0.32, |
|
"eval_NanoMSMARCO_cosine_accuracy@5": 0.58, |
|
"eval_NanoMSMARCO_cosine_map@100": 0.4389320137393629, |
|
"eval_NanoMSMARCO_cosine_mrr@10": 0.42802380952380953, |
|
"eval_NanoMSMARCO_cosine_ndcg@1": 0.32, |
|
"eval_NanoMSMARCO_cosine_ndcg@5": 0.4526655610762789, |
|
"eval_NanoMSMARCO_cosine_precision@1": 0.32, |
|
"eval_NanoMSMARCO_cosine_precision@5": 0.11600000000000002, |
|
"eval_NanoMSMARCO_cosine_recall@1": 0.32, |
|
"eval_NanoMSMARCO_cosine_recall@5": 0.58, |
|
"eval_mldr_loss": 1.0088629722595215, |
|
"eval_mldr_runtime": 14.5077, |
|
"eval_mldr_samples_per_second": 34.464, |
|
"eval_mldr_steps_per_second": 2.206, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.620745542949757, |
|
"eval_squad_loss": 0.6442683339118958, |
|
"eval_squad_runtime": 0.8625, |
|
"eval_squad_samples_per_second": 55.652, |
|
"eval_squad_steps_per_second": 3.478, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.620745542949757, |
|
"eval_narrative_qa_loss": 1.7451977729797363, |
|
"eval_narrative_qa_runtime": 1.1443, |
|
"eval_narrative_qa_samples_per_second": 261.298, |
|
"eval_narrative_qa_steps_per_second": 16.604, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1234, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|