mlconti's picture
Upload folder using huggingface_hub
d2f8fa7 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.620745542949757,
"eval_steps": 100,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.5733333333333334,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.88,
"eval_NanoBEIR_mean_cosine_map@100": 0.6499700198395969,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.7025343915343916,
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.5733333333333334,
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.6933983700329899,
"eval_NanoBEIR_mean_cosine_precision@1": 0.5733333333333334,
"eval_NanoBEIR_mean_cosine_precision@5": 0.21599999999999997,
"eval_NanoBEIR_mean_cosine_recall@1": 0.44222222222222224,
"eval_NanoBEIR_mean_cosine_recall@5": 0.8077777777777778,
"eval_NanoFEVER_cosine_accuracy@1": 0.66,
"eval_NanoFEVER_cosine_accuracy@5": 0.94,
"eval_NanoFEVER_cosine_map@100": 0.7456105053991163,
"eval_NanoFEVER_cosine_mrr@10": 0.7750000000000001,
"eval_NanoFEVER_cosine_ndcg@1": 0.66,
"eval_NanoFEVER_cosine_ndcg@5": 0.7909186074531693,
"eval_NanoFEVER_cosine_precision@1": 0.66,
"eval_NanoFEVER_cosine_precision@5": 0.19999999999999996,
"eval_NanoFEVER_cosine_recall@1": 0.6166666666666667,
"eval_NanoFEVER_cosine_recall@5": 0.9133333333333333,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.7,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.92,
"eval_NanoHotpotQA_cosine_map@100": 0.659131307951428,
"eval_NanoHotpotQA_cosine_mrr@10": 0.795,
"eval_NanoHotpotQA_cosine_ndcg@1": 0.7,
"eval_NanoHotpotQA_cosine_ndcg@5": 0.7005437199248264,
"eval_NanoHotpotQA_cosine_precision@1": 0.7,
"eval_NanoHotpotQA_cosine_precision@5": 0.292,
"eval_NanoHotpotQA_cosine_recall@1": 0.35,
"eval_NanoHotpotQA_cosine_recall@5": 0.73,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.36,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.78,
"eval_NanoMSMARCO_cosine_map@100": 0.5451682461682461,
"eval_NanoMSMARCO_cosine_mrr@10": 0.5376031746031745,
"eval_NanoMSMARCO_cosine_ndcg@1": 0.36,
"eval_NanoMSMARCO_cosine_ndcg@5": 0.5887327827209738,
"eval_NanoMSMARCO_cosine_precision@1": 0.36,
"eval_NanoMSMARCO_cosine_precision@5": 0.156,
"eval_NanoMSMARCO_cosine_recall@1": 0.36,
"eval_NanoMSMARCO_cosine_recall@5": 0.78,
"eval_mldr_loss": 98.74517822265625,
"eval_mldr_runtime": 49.7129,
"eval_mldr_samples_per_second": 10.058,
"eval_mldr_steps_per_second": 0.644,
"step": 0
},
{
"epoch": 0,
"eval_squad_loss": 4.950023174285889,
"eval_squad_runtime": 0.9354,
"eval_squad_samples_per_second": 51.314,
"eval_squad_steps_per_second": 3.207,
"step": 0
},
{
"epoch": 0,
"eval_narrative_qa_loss": 49.38987731933594,
"eval_narrative_qa_runtime": 1.0306,
"eval_narrative_qa_samples_per_second": 290.117,
"eval_narrative_qa_steps_per_second": 18.436,
"step": 0
},
{
"epoch": 0.01620745542949757,
"grad_norm": 1336.0,
"learning_rate": 9.090909090909091e-06,
"loss": 86.6132,
"step": 10
},
{
"epoch": 0.03241491085899514,
"grad_norm": 828.0,
"learning_rate": 1.8181818181818182e-05,
"loss": 83.7595,
"step": 20
},
{
"epoch": 0.04862236628849271,
"grad_norm": 388.0,
"learning_rate": 2.7272727272727273e-05,
"loss": 65.5413,
"step": 30
},
{
"epoch": 0.06482982171799027,
"grad_norm": 320.0,
"learning_rate": 3.6363636363636364e-05,
"loss": 40.6319,
"step": 40
},
{
"epoch": 0.08103727714748785,
"grad_norm": 296.0,
"learning_rate": 4.545454545454546e-05,
"loss": 26.2768,
"step": 50
},
{
"epoch": 0.09724473257698542,
"grad_norm": 290.0,
"learning_rate": 4.9997781212386585e-05,
"loss": 14.4879,
"step": 60
},
{
"epoch": 0.11345218800648298,
"grad_norm": 160.0,
"learning_rate": 4.9980033274458193e-05,
"loss": 9.4888,
"step": 70
},
{
"epoch": 0.12965964343598055,
"grad_norm": 212.0,
"learning_rate": 4.994454999929178e-05,
"loss": 8.831,
"step": 80
},
{
"epoch": 0.1458670988654781,
"grad_norm": 109.0,
"learning_rate": 4.98913565793218e-05,
"loss": 7.6776,
"step": 90
},
{
"epoch": 0.1620745542949757,
"grad_norm": 71.5,
"learning_rate": 4.982049078084071e-05,
"loss": 6.4716,
"step": 100
},
{
"epoch": 0.1620745542949757,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.32666666666666666,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.6333333333333333,
"eval_NanoBEIR_mean_cosine_map@100": 0.39934768753390587,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.4495740740740741,
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.32666666666666666,
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.43241374431385005,
"eval_NanoBEIR_mean_cosine_precision@1": 0.32666666666666666,
"eval_NanoBEIR_mean_cosine_precision@5": 0.13466666666666668,
"eval_NanoBEIR_mean_cosine_recall@1": 0.25666666666666665,
"eval_NanoBEIR_mean_cosine_recall@5": 0.5466666666666667,
"eval_NanoFEVER_cosine_accuracy@1": 0.24,
"eval_NanoFEVER_cosine_accuracy@5": 0.66,
"eval_NanoFEVER_cosine_map@100": 0.3935747452453245,
"eval_NanoFEVER_cosine_mrr@10": 0.39322222222222225,
"eval_NanoFEVER_cosine_ndcg@1": 0.24,
"eval_NanoFEVER_cosine_ndcg@5": 0.4387408055008589,
"eval_NanoFEVER_cosine_precision@1": 0.24,
"eval_NanoFEVER_cosine_precision@5": 0.132,
"eval_NanoFEVER_cosine_recall@1": 0.24,
"eval_NanoFEVER_cosine_recall@5": 0.63,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.42,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.58,
"eval_NanoHotpotQA_cosine_map@100": 0.3274226457297244,
"eval_NanoHotpotQA_cosine_mrr@10": 0.4911666666666667,
"eval_NanoHotpotQA_cosine_ndcg@1": 0.42,
"eval_NanoHotpotQA_cosine_ndcg@5": 0.35120107956309937,
"eval_NanoHotpotQA_cosine_precision@1": 0.42,
"eval_NanoHotpotQA_cosine_precision@5": 0.14,
"eval_NanoHotpotQA_cosine_recall@1": 0.21,
"eval_NanoHotpotQA_cosine_recall@5": 0.35,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.32,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.66,
"eval_NanoMSMARCO_cosine_map@100": 0.47704567162666855,
"eval_NanoMSMARCO_cosine_mrr@10": 0.4643333333333334,
"eval_NanoMSMARCO_cosine_ndcg@1": 0.32,
"eval_NanoMSMARCO_cosine_ndcg@5": 0.5072993478775919,
"eval_NanoMSMARCO_cosine_precision@1": 0.32,
"eval_NanoMSMARCO_cosine_precision@5": 0.132,
"eval_NanoMSMARCO_cosine_recall@1": 0.32,
"eval_NanoMSMARCO_cosine_recall@5": 0.66,
"eval_mldr_loss": 4.950920581817627,
"eval_mldr_runtime": 14.3444,
"eval_mldr_samples_per_second": 34.857,
"eval_mldr_steps_per_second": 2.231,
"step": 100
},
{
"epoch": 0.1620745542949757,
"eval_squad_loss": 2.0146381855010986,
"eval_squad_runtime": 0.9385,
"eval_squad_samples_per_second": 51.146,
"eval_squad_steps_per_second": 3.197,
"step": 100
},
{
"epoch": 0.1620745542949757,
"eval_narrative_qa_loss": 4.486134052276611,
"eval_narrative_qa_runtime": 1.1506,
"eval_narrative_qa_samples_per_second": 259.856,
"eval_narrative_qa_steps_per_second": 16.513,
"step": 100
},
{
"epoch": 0.17828200972447325,
"grad_norm": 107.5,
"learning_rate": 4.973200291718561e-05,
"loss": 6.1959,
"step": 110
},
{
"epoch": 0.19448946515397084,
"grad_norm": 50.75,
"learning_rate": 4.962595581301673e-05,
"loss": 4.5491,
"step": 120
},
{
"epoch": 0.2106969205834684,
"grad_norm": 50.25,
"learning_rate": 4.95024247597132e-05,
"loss": 4.0871,
"step": 130
},
{
"epoch": 0.22690437601296595,
"grad_norm": 53.25,
"learning_rate": 4.936149746191758e-05,
"loss": 3.0861,
"step": 140
},
{
"epoch": 0.24311183144246354,
"grad_norm": 25.5,
"learning_rate": 4.920327397526731e-05,
"loss": 2.7792,
"step": 150
},
{
"epoch": 0.2593192868719611,
"grad_norm": 122.0,
"learning_rate": 4.9027866635357136e-05,
"loss": 2.2107,
"step": 160
},
{
"epoch": 0.2755267423014587,
"grad_norm": 410.0,
"learning_rate": 4.883539997798303e-05,
"loss": 1.6997,
"step": 170
},
{
"epoch": 0.2917341977309562,
"grad_norm": 66.0,
"learning_rate": 4.8626010650724204e-05,
"loss": 1.4911,
"step": 180
},
{
"epoch": 0.3079416531604538,
"grad_norm": 1072.0,
"learning_rate": 4.8399847315926e-05,
"loss": 1.6304,
"step": 190
},
{
"epoch": 0.3241491085899514,
"grad_norm": 16.5,
"learning_rate": 4.815707054515248e-05,
"loss": 1.5042,
"step": 200
},
{
"epoch": 0.3241491085899514,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.24666666666666667,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.4533333333333333,
"eval_NanoBEIR_mean_cosine_map@100": 0.2917657022707883,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.3391984126984127,
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.24666666666666667,
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.3047832475255773,
"eval_NanoBEIR_mean_cosine_precision@1": 0.24666666666666667,
"eval_NanoBEIR_mean_cosine_precision@5": 0.09466666666666669,
"eval_NanoBEIR_mean_cosine_recall@1": 0.18333333333333335,
"eval_NanoBEIR_mean_cosine_recall@5": 0.37333333333333335,
"eval_NanoFEVER_cosine_accuracy@1": 0.08,
"eval_NanoFEVER_cosine_accuracy@5": 0.26,
"eval_NanoFEVER_cosine_map@100": 0.15831046306304816,
"eval_NanoFEVER_cosine_mrr@10": 0.1541904761904762,
"eval_NanoFEVER_cosine_ndcg@1": 0.08,
"eval_NanoFEVER_cosine_ndcg@5": 0.15530091408199076,
"eval_NanoFEVER_cosine_precision@1": 0.08,
"eval_NanoFEVER_cosine_precision@5": 0.052000000000000005,
"eval_NanoFEVER_cosine_recall@1": 0.07,
"eval_NanoFEVER_cosine_recall@5": 0.24,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.36,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.5,
"eval_NanoHotpotQA_cosine_map@100": 0.2657203181494037,
"eval_NanoHotpotQA_cosine_mrr@10": 0.4257142857142857,
"eval_NanoHotpotQA_cosine_ndcg@1": 0.36,
"eval_NanoHotpotQA_cosine_ndcg@5": 0.2891308855457201,
"eval_NanoHotpotQA_cosine_precision@1": 0.36,
"eval_NanoHotpotQA_cosine_precision@5": 0.11200000000000003,
"eval_NanoHotpotQA_cosine_recall@1": 0.18,
"eval_NanoHotpotQA_cosine_recall@5": 0.28,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.3,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.6,
"eval_NanoMSMARCO_cosine_map@100": 0.45126632559991314,
"eval_NanoMSMARCO_cosine_mrr@10": 0.4376904761904762,
"eval_NanoMSMARCO_cosine_ndcg@1": 0.3,
"eval_NanoMSMARCO_cosine_ndcg@5": 0.46991794294902106,
"eval_NanoMSMARCO_cosine_precision@1": 0.3,
"eval_NanoMSMARCO_cosine_precision@5": 0.12000000000000002,
"eval_NanoMSMARCO_cosine_recall@1": 0.3,
"eval_NanoMSMARCO_cosine_recall@5": 0.6,
"eval_mldr_loss": 1.3893917798995972,
"eval_mldr_runtime": 14.4056,
"eval_mldr_samples_per_second": 34.709,
"eval_mldr_steps_per_second": 2.221,
"step": 200
},
{
"epoch": 0.3241491085899514,
"eval_squad_loss": 0.7646868824958801,
"eval_squad_runtime": 1.072,
"eval_squad_samples_per_second": 44.776,
"eval_squad_steps_per_second": 2.799,
"step": 200
},
{
"epoch": 0.3241491085899514,
"eval_narrative_qa_loss": 2.59745192527771,
"eval_narrative_qa_runtime": 1.1589,
"eval_narrative_qa_samples_per_second": 257.994,
"eval_narrative_qa_steps_per_second": 16.394,
"step": 200
},
{
"epoch": 0.34035656401944897,
"grad_norm": 35.0,
"learning_rate": 4.7897852705183785e-05,
"loss": 1.6009,
"step": 210
},
{
"epoch": 0.3565640194489465,
"grad_norm": 1352.0,
"learning_rate": 4.7622377835639064e-05,
"loss": 1.5156,
"step": 220
},
{
"epoch": 0.3727714748784441,
"grad_norm": 704.0,
"learning_rate": 4.73308415183119e-05,
"loss": 1.5305,
"step": 230
},
{
"epoch": 0.3889789303079417,
"grad_norm": 824.0,
"learning_rate": 4.702345073831109e-05,
"loss": 1.6842,
"step": 240
},
{
"epoch": 0.4051863857374392,
"grad_norm": 21760.0,
"learning_rate": 4.6700423737105236e-05,
"loss": 1.2207,
"step": 250
},
{
"epoch": 0.4213938411669368,
"grad_norm": 3040.0,
"learning_rate": 4.63619898575755e-05,
"loss": 1.2634,
"step": 260
},
{
"epoch": 0.4376012965964344,
"grad_norm": 46.5,
"learning_rate": 4.600838938118672e-05,
"loss": 1.3175,
"step": 270
},
{
"epoch": 0.4538087520259319,
"grad_norm": 6112.0,
"learning_rate": 4.563987335739216e-05,
"loss": 1.3107,
"step": 280
},
{
"epoch": 0.4700162074554295,
"grad_norm": 2256.0,
"learning_rate": 4.525670342539332e-05,
"loss": 1.3163,
"step": 290
},
{
"epoch": 0.4862236628849271,
"grad_norm": 860.0,
"learning_rate": 4.485915162838122e-05,
"loss": 1.3703,
"step": 300
},
{
"epoch": 0.4862236628849271,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.18666666666666668,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.37333333333333335,
"eval_NanoBEIR_mean_cosine_map@100": 0.24055436231400043,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.26788624338624334,
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.18666666666666668,
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.24938490224868604,
"eval_NanoBEIR_mean_cosine_precision@1": 0.18666666666666668,
"eval_NanoBEIR_mean_cosine_precision@5": 0.07733333333333335,
"eval_NanoBEIR_mean_cosine_recall@1": 0.15,
"eval_NanoBEIR_mean_cosine_recall@5": 0.31666666666666665,
"eval_NanoFEVER_cosine_accuracy@1": 0.06,
"eval_NanoFEVER_cosine_accuracy@5": 0.18,
"eval_NanoFEVER_cosine_map@100": 0.11320690658806287,
"eval_NanoFEVER_cosine_mrr@10": 0.11538888888888887,
"eval_NanoFEVER_cosine_ndcg@1": 0.06,
"eval_NanoFEVER_cosine_ndcg@5": 0.11246425246579403,
"eval_NanoFEVER_cosine_precision@1": 0.06,
"eval_NanoFEVER_cosine_precision@5": 0.036000000000000004,
"eval_NanoFEVER_cosine_recall@1": 0.05,
"eval_NanoFEVER_cosine_recall@5": 0.17,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.2,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.36,
"eval_NanoHotpotQA_cosine_map@100": 0.17264044237203358,
"eval_NanoHotpotQA_cosine_mrr@10": 0.26374603174603173,
"eval_NanoHotpotQA_cosine_ndcg@1": 0.2,
"eval_NanoHotpotQA_cosine_ndcg@5": 0.18589124040083965,
"eval_NanoHotpotQA_cosine_precision@1": 0.2,
"eval_NanoHotpotQA_cosine_precision@5": 0.08,
"eval_NanoHotpotQA_cosine_recall@1": 0.1,
"eval_NanoHotpotQA_cosine_recall@5": 0.2,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.3,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.58,
"eval_NanoMSMARCO_cosine_map@100": 0.4358157379819049,
"eval_NanoMSMARCO_cosine_mrr@10": 0.42452380952380947,
"eval_NanoMSMARCO_cosine_ndcg@1": 0.3,
"eval_NanoMSMARCO_cosine_ndcg@5": 0.44979921387942445,
"eval_NanoMSMARCO_cosine_precision@1": 0.3,
"eval_NanoMSMARCO_cosine_precision@5": 0.11600000000000002,
"eval_NanoMSMARCO_cosine_recall@1": 0.3,
"eval_NanoMSMARCO_cosine_recall@5": 0.58,
"eval_mldr_loss": 1.218342900276184,
"eval_mldr_runtime": 14.4679,
"eval_mldr_samples_per_second": 34.559,
"eval_mldr_steps_per_second": 2.212,
"step": 300
},
{
"epoch": 0.4862236628849271,
"eval_squad_loss": 1.0243560075759888,
"eval_squad_runtime": 0.9647,
"eval_squad_samples_per_second": 49.755,
"eval_squad_steps_per_second": 3.11,
"step": 300
},
{
"epoch": 0.4862236628849271,
"eval_narrative_qa_loss": 2.635084629058838,
"eval_narrative_qa_runtime": 1.1616,
"eval_narrative_qa_samples_per_second": 257.409,
"eval_narrative_qa_steps_per_second": 16.357,
"step": 300
},
{
"epoch": 0.5024311183144247,
"grad_norm": 39.25,
"learning_rate": 4.444750022039099e-05,
"loss": 1.2901,
"step": 310
},
{
"epoch": 0.5186385737439222,
"grad_norm": 260.0,
"learning_rate": 4.4022041465907036e-05,
"loss": 1.245,
"step": 320
},
{
"epoch": 0.5348460291734197,
"grad_norm": 2192.0,
"learning_rate": 4.358307743236092e-05,
"loss": 1.1749,
"step": 330
},
{
"epoch": 0.5510534846029174,
"grad_norm": 1064.0,
"learning_rate": 4.3130919775669374e-05,
"loss": 1.0522,
"step": 340
},
{
"epoch": 0.5672609400324149,
"grad_norm": 1120.0,
"learning_rate": 4.2665889518964684e-05,
"loss": 1.1319,
"step": 350
},
{
"epoch": 0.5834683954619124,
"grad_norm": 1552.0,
"learning_rate": 4.2188316824674504e-05,
"loss": 1.1986,
"step": 360
},
{
"epoch": 0.5996758508914101,
"grad_norm": 133.0,
"learning_rate": 4.169854076011292e-05,
"loss": 1.3508,
"step": 370
},
{
"epoch": 0.6158833063209076,
"grad_norm": 5408.0,
"learning_rate": 4.119690905674937e-05,
"loss": 1.3465,
"step": 380
},
{
"epoch": 0.6320907617504052,
"grad_norm": 1624.0,
"learning_rate": 4.068377786332593e-05,
"loss": 1.3062,
"step": 390
},
{
"epoch": 0.6482982171799028,
"grad_norm": 1192.0,
"learning_rate": 4.0159511492998746e-05,
"loss": 0.9974,
"step": 400
},
{
"epoch": 0.6482982171799028,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.20000000000000004,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.39333333333333337,
"eval_NanoBEIR_mean_cosine_map@100": 0.25566257669608067,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.2828835978835979,
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.20000000000000004,
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.2711361604281084,
"eval_NanoBEIR_mean_cosine_precision@1": 0.20000000000000004,
"eval_NanoBEIR_mean_cosine_precision@5": 0.08266666666666668,
"eval_NanoBEIR_mean_cosine_recall@1": 0.16333333333333333,
"eval_NanoBEIR_mean_cosine_recall@5": 0.3433333333333333,
"eval_NanoFEVER_cosine_accuracy@1": 0.08,
"eval_NanoFEVER_cosine_accuracy@5": 0.2,
"eval_NanoFEVER_cosine_map@100": 0.12628338770667222,
"eval_NanoFEVER_cosine_mrr@10": 0.1275,
"eval_NanoFEVER_cosine_ndcg@1": 0.08,
"eval_NanoFEVER_cosine_ndcg@5": 0.1285946755597121,
"eval_NanoFEVER_cosine_precision@1": 0.08,
"eval_NanoFEVER_cosine_precision@5": 0.04,
"eval_NanoFEVER_cosine_recall@1": 0.07,
"eval_NanoFEVER_cosine_recall@5": 0.18,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.2,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.32,
"eval_NanoHotpotQA_cosine_map@100": 0.17161850356661693,
"eval_NanoHotpotQA_cosine_mrr@10": 0.26146031746031745,
"eval_NanoHotpotQA_cosine_ndcg@1": 0.2,
"eval_NanoHotpotQA_cosine_ndcg@5": 0.18168469434151963,
"eval_NanoHotpotQA_cosine_precision@1": 0.2,
"eval_NanoHotpotQA_cosine_precision@5": 0.07600000000000001,
"eval_NanoHotpotQA_cosine_recall@1": 0.1,
"eval_NanoHotpotQA_cosine_recall@5": 0.19,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.32,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.66,
"eval_NanoMSMARCO_cosine_map@100": 0.4690858388149529,
"eval_NanoMSMARCO_cosine_mrr@10": 0.45969047619047615,
"eval_NanoMSMARCO_cosine_ndcg@1": 0.32,
"eval_NanoMSMARCO_cosine_ndcg@5": 0.5031291113830936,
"eval_NanoMSMARCO_cosine_precision@1": 0.32,
"eval_NanoMSMARCO_cosine_precision@5": 0.132,
"eval_NanoMSMARCO_cosine_recall@1": 0.32,
"eval_NanoMSMARCO_cosine_recall@5": 0.66,
"eval_mldr_loss": 1.7250500917434692,
"eval_mldr_runtime": 14.4371,
"eval_mldr_samples_per_second": 34.633,
"eval_mldr_steps_per_second": 2.217,
"step": 400
},
{
"epoch": 0.6482982171799028,
"eval_squad_loss": 0.8224272131919861,
"eval_squad_runtime": 0.9831,
"eval_squad_samples_per_second": 48.823,
"eval_squad_steps_per_second": 3.051,
"step": 400
},
{
"epoch": 0.6482982171799028,
"eval_narrative_qa_loss": 2.191936492919922,
"eval_narrative_qa_runtime": 1.1776,
"eval_narrative_qa_samples_per_second": 253.914,
"eval_narrative_qa_steps_per_second": 16.135,
"step": 400
},
{
"epoch": 0.6645056726094003,
"grad_norm": 796.0,
"learning_rate": 3.962448216468275e-05,
"loss": 3.2988,
"step": 410
},
{
"epoch": 0.6807131280388979,
"grad_norm": 9216.0,
"learning_rate": 3.9079069738783484e-05,
"loss": 1.3305,
"step": 420
},
{
"epoch": 0.6969205834683955,
"grad_norm": 227.0,
"learning_rate": 3.852366144750358e-05,
"loss": 1.2771,
"step": 430
},
{
"epoch": 0.713128038897893,
"grad_norm": 1456.0,
"learning_rate": 3.7958651619915495e-05,
"loss": 1.0724,
"step": 440
},
{
"epoch": 0.7293354943273906,
"grad_norm": 704.0,
"learning_rate": 3.738444140199549e-05,
"loss": 1.2473,
"step": 450
},
{
"epoch": 0.7455429497568882,
"grad_norm": 4608.0,
"learning_rate": 3.680143847181783e-05,
"loss": 1.4045,
"step": 460
},
{
"epoch": 0.7617504051863857,
"grad_norm": 8384.0,
"learning_rate": 3.621005675011127e-05,
"loss": 1.6167,
"step": 470
},
{
"epoch": 0.7779578606158833,
"grad_norm": 10752.0,
"learning_rate": 3.5610716106383426e-05,
"loss": 1.427,
"step": 480
},
{
"epoch": 0.7941653160453809,
"grad_norm": 32512.0,
"learning_rate": 3.500384206082155e-05,
"loss": 1.256,
"step": 490
},
{
"epoch": 0.8103727714748784,
"grad_norm": 2080.0,
"learning_rate": 3.438986548218155e-05,
"loss": 1.7336,
"step": 500
},
{
"epoch": 0.8103727714748784,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.22666666666666668,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.35333333333333333,
"eval_NanoBEIR_mean_cosine_map@100": 0.25285734426656203,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.28679100529100526,
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.22666666666666668,
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.26096692405722594,
"eval_NanoBEIR_mean_cosine_precision@1": 0.22666666666666668,
"eval_NanoBEIR_mean_cosine_precision@5": 0.07600000000000001,
"eval_NanoBEIR_mean_cosine_recall@1": 0.17666666666666667,
"eval_NanoBEIR_mean_cosine_recall@5": 0.31,
"eval_NanoFEVER_cosine_accuracy@1": 0.08,
"eval_NanoFEVER_cosine_accuracy@5": 0.18,
"eval_NanoFEVER_cosine_map@100": 0.11514089868666741,
"eval_NanoFEVER_cosine_mrr@10": 0.11641269841269843,
"eval_NanoFEVER_cosine_ndcg@1": 0.08,
"eval_NanoFEVER_cosine_ndcg@5": 0.11458961164975079,
"eval_NanoFEVER_cosine_precision@1": 0.08,
"eval_NanoFEVER_cosine_precision@5": 0.036000000000000004,
"eval_NanoFEVER_cosine_recall@1": 0.07,
"eval_NanoFEVER_cosine_recall@5": 0.16,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.28,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.3,
"eval_NanoHotpotQA_cosine_map@100": 0.1921273663105645,
"eval_NanoHotpotQA_cosine_mrr@10": 0.3077380952380952,
"eval_NanoHotpotQA_cosine_ndcg@1": 0.28,
"eval_NanoHotpotQA_cosine_ndcg@5": 0.202243938877232,
"eval_NanoHotpotQA_cosine_precision@1": 0.28,
"eval_NanoHotpotQA_cosine_precision@5": 0.07600000000000001,
"eval_NanoHotpotQA_cosine_recall@1": 0.14,
"eval_NanoHotpotQA_cosine_recall@5": 0.19,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.32,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.58,
"eval_NanoMSMARCO_cosine_map@100": 0.4513037678024542,
"eval_NanoMSMARCO_cosine_mrr@10": 0.4362222222222223,
"eval_NanoMSMARCO_cosine_ndcg@1": 0.32,
"eval_NanoMSMARCO_cosine_ndcg@5": 0.4660672216446949,
"eval_NanoMSMARCO_cosine_precision@1": 0.32,
"eval_NanoMSMARCO_cosine_precision@5": 0.11600000000000002,
"eval_NanoMSMARCO_cosine_recall@1": 0.32,
"eval_NanoMSMARCO_cosine_recall@5": 0.58,
"eval_mldr_loss": 1.2118109464645386,
"eval_mldr_runtime": 14.4727,
"eval_mldr_samples_per_second": 34.548,
"eval_mldr_steps_per_second": 2.211,
"step": 500
},
{
"epoch": 0.8103727714748784,
"eval_squad_loss": 0.669793426990509,
"eval_squad_runtime": 0.9528,
"eval_squad_samples_per_second": 50.377,
"eval_squad_steps_per_second": 3.149,
"step": 500
},
{
"epoch": 0.8103727714748784,
"eval_narrative_qa_loss": 2.0540804862976074,
"eval_narrative_qa_runtime": 1.1722,
"eval_narrative_qa_samples_per_second": 255.077,
"eval_narrative_qa_steps_per_second": 16.209,
"step": 500
},
{
"epoch": 0.826580226904376,
"grad_norm": 5792.0,
"learning_rate": 3.3769222281879495e-05,
"loss": 0.9241,
"step": 510
},
{
"epoch": 0.8427876823338736,
"grad_norm": 1080.0,
"learning_rate": 3.314235310450306e-05,
"loss": 1.2186,
"step": 520
},
{
"epoch": 0.8589951377633711,
"grad_norm": 1264.0,
"learning_rate": 3.250970301496237e-05,
"loss": 1.3873,
"step": 530
},
{
"epoch": 0.8752025931928687,
"grad_norm": 308.0,
"learning_rate": 3.187172118250266e-05,
"loss": 1.299,
"step": 540
},
{
"epoch": 0.8914100486223663,
"grad_norm": 5568.0,
"learning_rate": 3.122886056180284e-05,
"loss": 1.4101,
"step": 550
},
{
"epoch": 0.9076175040518638,
"grad_norm": 1480.0,
"learning_rate": 3.0581577571386426e-05,
"loss": 1.047,
"step": 560
},
{
"epoch": 0.9238249594813615,
"grad_norm": 34.0,
"learning_rate": 2.9930331769573365e-05,
"loss": 1.1558,
"step": 570
},
{
"epoch": 0.940032414910859,
"grad_norm": 4640.0,
"learning_rate": 2.9275585528202516e-05,
"loss": 1.1769,
"step": 580
},
{
"epoch": 0.9562398703403565,
"grad_norm": 191.0,
"learning_rate": 2.8617803704356672e-05,
"loss": 1.1054,
"step": 590
},
{
"epoch": 0.9724473257698542,
"grad_norm": 167.0,
"learning_rate": 2.7957453310323073e-05,
"loss": 3.6745,
"step": 600
},
{
"epoch": 0.9724473257698542,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.20666666666666667,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.36000000000000004,
"eval_NanoBEIR_mean_cosine_map@100": 0.24725005259175806,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.2764206349206349,
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.20666666666666667,
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.2557813556128824,
"eval_NanoBEIR_mean_cosine_precision@1": 0.20666666666666667,
"eval_NanoBEIR_mean_cosine_precision@5": 0.076,
"eval_NanoBEIR_mean_cosine_recall@1": 0.16666666666666666,
"eval_NanoBEIR_mean_cosine_recall@5": 0.31,
"eval_NanoFEVER_cosine_accuracy@1": 0.1,
"eval_NanoFEVER_cosine_accuracy@5": 0.12,
"eval_NanoFEVER_cosine_map@100": 0.12500495211409104,
"eval_NanoFEVER_cosine_mrr@10": 0.12576984126984128,
"eval_NanoFEVER_cosine_ndcg@1": 0.1,
"eval_NanoFEVER_cosine_ndcg@5": 0.10488153892673832,
"eval_NanoFEVER_cosine_precision@1": 0.1,
"eval_NanoFEVER_cosine_precision@5": 0.024000000000000004,
"eval_NanoFEVER_cosine_recall@1": 0.09,
"eval_NanoFEVER_cosine_recall@5": 0.11,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.22,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.34,
"eval_NanoHotpotQA_cosine_map@100": 0.17072760263191902,
"eval_NanoHotpotQA_cosine_mrr@10": 0.2686904761904762,
"eval_NanoHotpotQA_cosine_ndcg@1": 0.22,
"eval_NanoHotpotQA_cosine_ndcg@5": 0.19019906156669042,
"eval_NanoHotpotQA_cosine_precision@1": 0.22,
"eval_NanoHotpotQA_cosine_precision@5": 0.08,
"eval_NanoHotpotQA_cosine_recall@1": 0.11,
"eval_NanoHotpotQA_cosine_recall@5": 0.2,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.3,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.62,
"eval_NanoMSMARCO_cosine_map@100": 0.44601760302926413,
"eval_NanoMSMARCO_cosine_mrr@10": 0.4348015873015873,
"eval_NanoMSMARCO_cosine_ndcg@1": 0.3,
"eval_NanoMSMARCO_cosine_ndcg@5": 0.4722634663452185,
"eval_NanoMSMARCO_cosine_precision@1": 0.3,
"eval_NanoMSMARCO_cosine_precision@5": 0.124,
"eval_NanoMSMARCO_cosine_recall@1": 0.3,
"eval_NanoMSMARCO_cosine_recall@5": 0.62,
"eval_mldr_loss": 1.1025381088256836,
"eval_mldr_runtime": 14.5605,
"eval_mldr_samples_per_second": 34.339,
"eval_mldr_steps_per_second": 2.198,
"step": 600
},
{
"epoch": 0.9724473257698542,
"eval_squad_loss": 0.7168652415275574,
"eval_squad_runtime": 0.8907,
"eval_squad_samples_per_second": 53.891,
"eval_squad_steps_per_second": 3.368,
"step": 600
},
{
"epoch": 0.9724473257698542,
"eval_narrative_qa_loss": 1.757150411605835,
"eval_narrative_qa_runtime": 1.1289,
"eval_narrative_qa_samples_per_second": 264.853,
"eval_narrative_qa_steps_per_second": 16.83,
"step": 600
},
{
"epoch": 0.9886547811993517,
"grad_norm": 414.0,
"learning_rate": 2.7295003182023793e-05,
"loss": 1.2095,
"step": 610
},
{
"epoch": 1.0048622366288493,
"grad_norm": 760.0,
"learning_rate": 2.663092364615134e-05,
"loss": 1.0888,
"step": 620
},
{
"epoch": 1.0210696920583469,
"grad_norm": 884.0,
"learning_rate": 2.596568618624588e-05,
"loss": 1.1728,
"step": 630
},
{
"epoch": 1.0372771474878444,
"grad_norm": 1568.0,
"learning_rate": 2.529976310795108e-05,
"loss": 1.2033,
"step": 640
},
{
"epoch": 1.053484602917342,
"grad_norm": 716.0,
"learning_rate": 2.4633627203686322e-05,
"loss": 1.0108,
"step": 650
},
{
"epoch": 1.0696920583468394,
"grad_norm": 50.5,
"learning_rate": 2.3967751416973304e-05,
"loss": 0.9941,
"step": 660
},
{
"epoch": 1.0858995137763372,
"grad_norm": 39.5,
"learning_rate": 2.3302608506655286e-05,
"loss": 1.1392,
"step": 670
},
{
"epoch": 1.1021069692058347,
"grad_norm": 1000.0,
"learning_rate": 2.2638670711247605e-05,
"loss": 1.083,
"step": 680
},
{
"epoch": 1.1183144246353323,
"grad_norm": 7456.0,
"learning_rate": 2.197640941365743e-05,
"loss": 1.0115,
"step": 690
},
{
"epoch": 1.1345218800648298,
"grad_norm": 2944.0,
"learning_rate": 2.131629480651116e-05,
"loss": 1.2825,
"step": 700
},
{
"epoch": 1.1345218800648298,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.17333333333333334,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.34,
"eval_NanoBEIR_mean_cosine_map@100": 0.2227183429409336,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.25063756613756616,
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.17333333333333334,
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.23117905501265376,
"eval_NanoBEIR_mean_cosine_precision@1": 0.17333333333333334,
"eval_NanoBEIR_mean_cosine_precision@5": 0.07066666666666667,
"eval_NanoBEIR_mean_cosine_recall@1": 0.14,
"eval_NanoBEIR_mean_cosine_recall@5": 0.29000000000000004,
"eval_NanoFEVER_cosine_accuracy@1": 0.06,
"eval_NanoFEVER_cosine_accuracy@5": 0.14,
"eval_NanoFEVER_cosine_map@100": 0.10226378102099443,
"eval_NanoFEVER_cosine_mrr@10": 0.107,
"eval_NanoFEVER_cosine_ndcg@1": 0.06,
"eval_NanoFEVER_cosine_ndcg@5": 0.09611366515963533,
"eval_NanoFEVER_cosine_precision@1": 0.06,
"eval_NanoFEVER_cosine_precision@5": 0.027999999999999997,
"eval_NanoFEVER_cosine_recall@1": 0.05,
"eval_NanoFEVER_cosine_recall@5": 0.13,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.18,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.32,
"eval_NanoHotpotQA_cosine_map@100": 0.1431808584557489,
"eval_NanoHotpotQA_cosine_mrr@10": 0.2346666666666667,
"eval_NanoHotpotQA_cosine_ndcg@1": 0.18,
"eval_NanoHotpotQA_cosine_ndcg@5": 0.16151062083926612,
"eval_NanoHotpotQA_cosine_precision@1": 0.18,
"eval_NanoHotpotQA_cosine_precision@5": 0.07200000000000001,
"eval_NanoHotpotQA_cosine_recall@1": 0.09,
"eval_NanoHotpotQA_cosine_recall@5": 0.18,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.28,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.56,
"eval_NanoMSMARCO_cosine_map@100": 0.4227103893460575,
"eval_NanoMSMARCO_cosine_mrr@10": 0.4102460317460318,
"eval_NanoMSMARCO_cosine_ndcg@1": 0.28,
"eval_NanoMSMARCO_cosine_ndcg@5": 0.4359128790390598,
"eval_NanoMSMARCO_cosine_precision@1": 0.28,
"eval_NanoMSMARCO_cosine_precision@5": 0.11200000000000002,
"eval_NanoMSMARCO_cosine_recall@1": 0.28,
"eval_NanoMSMARCO_cosine_recall@5": 0.56,
"eval_mldr_loss": 1.04865562915802,
"eval_mldr_runtime": 14.5246,
"eval_mldr_samples_per_second": 34.424,
"eval_mldr_steps_per_second": 2.203,
"step": 700
},
{
"epoch": 1.1345218800648298,
"eval_squad_loss": 0.7077043652534485,
"eval_squad_runtime": 0.907,
"eval_squad_samples_per_second": 52.924,
"eval_squad_steps_per_second": 3.308,
"step": 700
},
{
"epoch": 1.1345218800648298,
"eval_narrative_qa_loss": 1.7767502069473267,
"eval_narrative_qa_runtime": 1.1658,
"eval_narrative_qa_samples_per_second": 256.487,
"eval_narrative_qa_steps_per_second": 16.298,
"step": 700
},
{
"epoch": 1.1507293354943273,
"grad_norm": 1976.0,
"learning_rate": 2.0658795558326743e-05,
"loss": 1.1286,
"step": 710
},
{
"epoch": 1.1669367909238249,
"grad_norm": 1640.0,
"learning_rate": 2.000437848076822e-05,
"loss": 1.174,
"step": 720
},
{
"epoch": 1.1831442463533226,
"grad_norm": 3776.0,
"learning_rate": 1.935350819721849e-05,
"loss": 1.124,
"step": 730
},
{
"epoch": 1.1993517017828201,
"grad_norm": 5568.0,
"learning_rate": 1.870664681290575e-05,
"loss": 0.9694,
"step": 740
},
{
"epoch": 1.2155591572123177,
"grad_norm": 532.0,
"learning_rate": 1.8064253586817816e-05,
"loss": 1.165,
"step": 750
},
{
"epoch": 1.2317666126418152,
"grad_norm": 928.0,
"learning_rate": 1.7426784605637153e-05,
"loss": 1.1989,
"step": 760
},
{
"epoch": 1.2479740680713127,
"grad_norm": 560.0,
"learning_rate": 1.6794692459928223e-05,
"loss": 0.9966,
"step": 770
},
{
"epoch": 1.2641815235008105,
"grad_norm": 123.5,
"learning_rate": 1.6168425922807008e-05,
"loss": 0.9768,
"step": 780
},
{
"epoch": 1.280388978930308,
"grad_norm": 884.0,
"learning_rate": 1.5548429631320806e-05,
"loss": 1.1039,
"step": 790
},
{
"epoch": 1.2965964343598055,
"grad_norm": 15.75,
"learning_rate": 1.4935143770764601e-05,
"loss": 2.7281,
"step": 800
},
{
"epoch": 1.2965964343598055,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.17333333333333334,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.35333333333333333,
"eval_NanoBEIR_mean_cosine_map@100": 0.2232392549386716,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.24966402116402117,
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.17333333333333334,
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.23771617121243702,
"eval_NanoBEIR_mean_cosine_precision@1": 0.17333333333333334,
"eval_NanoBEIR_mean_cosine_precision@5": 0.07333333333333333,
"eval_NanoBEIR_mean_cosine_recall@1": 0.1366666666666667,
"eval_NanoBEIR_mean_cosine_recall@5": 0.31,
"eval_NanoFEVER_cosine_accuracy@1": 0.06,
"eval_NanoFEVER_cosine_accuracy@5": 0.16,
"eval_NanoFEVER_cosine_map@100": 0.1100930297556344,
"eval_NanoFEVER_cosine_mrr@10": 0.11335714285714285,
"eval_NanoFEVER_cosine_ndcg@1": 0.06,
"eval_NanoFEVER_cosine_ndcg@5": 0.10785578521428746,
"eval_NanoFEVER_cosine_precision@1": 0.06,
"eval_NanoFEVER_cosine_precision@5": 0.032,
"eval_NanoFEVER_cosine_recall@1": 0.05,
"eval_NanoFEVER_cosine_recall@5": 0.15,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.2,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.28,
"eval_NanoHotpotQA_cosine_map@100": 0.14784815274029572,
"eval_NanoHotpotQA_cosine_mrr@10": 0.2350555555555556,
"eval_NanoHotpotQA_cosine_ndcg@1": 0.2,
"eval_NanoHotpotQA_cosine_ndcg@5": 0.16017374485985264,
"eval_NanoHotpotQA_cosine_precision@1": 0.2,
"eval_NanoHotpotQA_cosine_precision@5": 0.064,
"eval_NanoHotpotQA_cosine_recall@1": 0.1,
"eval_NanoHotpotQA_cosine_recall@5": 0.16,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.26,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.62,
"eval_NanoMSMARCO_cosine_map@100": 0.4117765823200847,
"eval_NanoMSMARCO_cosine_mrr@10": 0.40057936507936504,
"eval_NanoMSMARCO_cosine_ndcg@1": 0.26,
"eval_NanoMSMARCO_cosine_ndcg@5": 0.445118983563171,
"eval_NanoMSMARCO_cosine_precision@1": 0.26,
"eval_NanoMSMARCO_cosine_precision@5": 0.124,
"eval_NanoMSMARCO_cosine_recall@1": 0.26,
"eval_NanoMSMARCO_cosine_recall@5": 0.62,
"eval_mldr_loss": 1.0107322931289673,
"eval_mldr_runtime": 14.5338,
"eval_mldr_samples_per_second": 34.403,
"eval_mldr_steps_per_second": 2.202,
"step": 800
},
{
"epoch": 1.2965964343598055,
"eval_squad_loss": 0.6025272607803345,
"eval_squad_runtime": 0.8024,
"eval_squad_samples_per_second": 59.822,
"eval_squad_steps_per_second": 3.739,
"step": 800
},
{
"epoch": 1.2965964343598055,
"eval_narrative_qa_loss": 1.7681714296340942,
"eval_narrative_qa_runtime": 1.1303,
"eval_narrative_qa_samples_per_second": 264.533,
"eval_narrative_qa_steps_per_second": 16.81,
"step": 800
},
{
"epoch": 1.312803889789303,
"grad_norm": 4288.0,
"learning_rate": 1.4329003762158106e-05,
"loss": 1.2271,
"step": 810
},
{
"epoch": 1.3290113452188006,
"grad_norm": 11840.0,
"learning_rate": 1.3730439953105243e-05,
"loss": 1.0453,
"step": 820
},
{
"epoch": 1.3452188006482984,
"grad_norm": 1440.0,
"learning_rate": 1.3139877312255799e-05,
"loss": 1.106,
"step": 830
},
{
"epoch": 1.3614262560777957,
"grad_norm": 440.0,
"learning_rate": 1.2557735127585917e-05,
"loss": 1.0065,
"step": 840
},
{
"epoch": 1.3776337115072934,
"grad_norm": 616.0,
"learning_rate": 1.1984426708711804e-05,
"loss": 1.124,
"step": 850
},
{
"epoch": 1.393841166936791,
"grad_norm": 12.5,
"learning_rate": 1.1420359093447966e-05,
"loss": 1.3403,
"step": 860
},
{
"epoch": 1.4100486223662885,
"grad_norm": 1280.0,
"learning_rate": 1.0865932758818258e-05,
"loss": 1.1274,
"step": 870
},
{
"epoch": 1.426256077795786,
"grad_norm": 186.0,
"learning_rate": 1.0321541336725027e-05,
"loss": 1.1335,
"step": 880
},
{
"epoch": 1.4424635332252835,
"grad_norm": 6656.0,
"learning_rate": 9.787571334478101e-06,
"loss": 1.0805,
"step": 890
},
{
"epoch": 1.4586709886547813,
"grad_norm": 73.5,
"learning_rate": 9.26440186038211e-06,
"loss": 1.1117,
"step": 900
},
{
"epoch": 1.4586709886547813,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.2066666666666667,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.3466666666666667,
"eval_NanoBEIR_mean_cosine_map@100": 0.24109707671460848,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.27005291005291004,
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.2066666666666667,
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.24663303266901904,
"eval_NanoBEIR_mean_cosine_precision@1": 0.2066666666666667,
"eval_NanoBEIR_mean_cosine_precision@5": 0.07200000000000001,
"eval_NanoBEIR_mean_cosine_recall@1": 0.17,
"eval_NanoBEIR_mean_cosine_recall@5": 0.29666666666666663,
"eval_NanoFEVER_cosine_accuracy@1": 0.08,
"eval_NanoFEVER_cosine_accuracy@5": 0.14,
"eval_NanoFEVER_cosine_map@100": 0.10717352446028915,
"eval_NanoFEVER_cosine_mrr@10": 0.1099126984126984,
"eval_NanoFEVER_cosine_ndcg@1": 0.08,
"eval_NanoFEVER_cosine_ndcg@5": 0.10123212623289701,
"eval_NanoFEVER_cosine_precision@1": 0.08,
"eval_NanoFEVER_cosine_precision@5": 0.028000000000000004,
"eval_NanoFEVER_cosine_recall@1": 0.07,
"eval_NanoFEVER_cosine_recall@5": 0.13,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.2,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.32,
"eval_NanoHotpotQA_cosine_map@100": 0.15637152907438528,
"eval_NanoHotpotQA_cosine_mrr@10": 0.2523888888888889,
"eval_NanoHotpotQA_cosine_ndcg@1": 0.2,
"eval_NanoHotpotQA_cosine_ndcg@5": 0.17287282180469699,
"eval_NanoHotpotQA_cosine_precision@1": 0.2,
"eval_NanoHotpotQA_cosine_precision@5": 0.07200000000000001,
"eval_NanoHotpotQA_cosine_recall@1": 0.1,
"eval_NanoHotpotQA_cosine_recall@5": 0.18,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.34,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.58,
"eval_NanoMSMARCO_cosine_map@100": 0.45974617660915107,
"eval_NanoMSMARCO_cosine_mrr@10": 0.44785714285714284,
"eval_NanoMSMARCO_cosine_ndcg@1": 0.34,
"eval_NanoMSMARCO_cosine_ndcg@5": 0.4657941499694631,
"eval_NanoMSMARCO_cosine_precision@1": 0.34,
"eval_NanoMSMARCO_cosine_precision@5": 0.11600000000000002,
"eval_NanoMSMARCO_cosine_recall@1": 0.34,
"eval_NanoMSMARCO_cosine_recall@5": 0.58,
"eval_mldr_loss": 1.006673812866211,
"eval_mldr_runtime": 14.487,
"eval_mldr_samples_per_second": 34.514,
"eval_mldr_steps_per_second": 2.209,
"step": 900
},
{
"epoch": 1.4586709886547813,
"eval_squad_loss": 0.7755841612815857,
"eval_squad_runtime": 0.8044,
"eval_squad_samples_per_second": 59.675,
"eval_squad_steps_per_second": 3.73,
"step": 900
},
{
"epoch": 1.4586709886547813,
"eval_narrative_qa_loss": 1.7425211668014526,
"eval_narrative_qa_runtime": 1.1372,
"eval_narrative_qa_samples_per_second": 262.924,
"eval_narrative_qa_steps_per_second": 16.708,
"step": 900
},
{
"epoch": 1.4748784440842788,
"grad_norm": 712.0,
"learning_rate": 8.752404354577052e-06,
"loss": 1.0897,
"step": 910
},
{
"epoch": 1.4910858995137763,
"grad_norm": 1200.0,
"learning_rate": 8.251942325322903e-06,
"loss": 0.9964,
"step": 920
},
{
"epoch": 1.5072933549432739,
"grad_norm": 2016.0,
"learning_rate": 7.763371090915932e-06,
"loss": 0.954,
"step": 930
},
{
"epoch": 1.5235008103727714,
"grad_norm": 4512.0,
"learning_rate": 7.287037527419604e-06,
"loss": 1.3415,
"step": 940
},
{
"epoch": 1.5397082658022692,
"grad_norm": 438.0,
"learning_rate": 6.823279822389278e-06,
"loss": 0.9926,
"step": 950
},
{
"epoch": 1.5559157212317665,
"grad_norm": 158.0,
"learning_rate": 6.372427234765688e-06,
"loss": 1.1362,
"step": 960
},
{
"epoch": 1.5721231766612642,
"grad_norm": 163.0,
"learning_rate": 5.934799861107507e-06,
"loss": 1.1178,
"step": 970
},
{
"epoch": 1.5883306320907618,
"grad_norm": 1216.0,
"learning_rate": 5.5107084083289576e-06,
"loss": 0.8676,
"step": 980
},
{
"epoch": 1.6045380875202593,
"grad_norm": 37632.0,
"learning_rate": 5.1004539731040245e-06,
"loss": 1.1794,
"step": 990
},
{
"epoch": 1.620745542949757,
"grad_norm": 268.0,
"learning_rate": 4.704327828093641e-06,
"loss": 1.0575,
"step": 1000
},
{
"epoch": 1.620745542949757,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.18666666666666668,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.3333333333333333,
"eval_NanoBEIR_mean_cosine_map@100": 0.23251229334833623,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.250457671957672,
"eval_NanoBEIR_mean_cosine_ndcg@1": 0.18666666666666668,
"eval_NanoBEIR_mean_cosine_ndcg@5": 0.23607659211604262,
"eval_NanoBEIR_mean_cosine_precision@1": 0.18666666666666668,
"eval_NanoBEIR_mean_cosine_precision@5": 0.06933333333333334,
"eval_NanoBEIR_mean_cosine_recall@1": 0.15666666666666668,
"eval_NanoBEIR_mean_cosine_recall@5": 0.29,
"eval_NanoFEVER_cosine_accuracy@1": 0.08,
"eval_NanoFEVER_cosine_accuracy@5": 0.14,
"eval_NanoFEVER_cosine_map@100": 0.10995542705065009,
"eval_NanoFEVER_cosine_mrr@10": 0.11060317460317462,
"eval_NanoFEVER_cosine_ndcg@1": 0.08,
"eval_NanoFEVER_cosine_ndcg@5": 0.10261859507142916,
"eval_NanoFEVER_cosine_precision@1": 0.08,
"eval_NanoFEVER_cosine_precision@5": 0.027999999999999997,
"eval_NanoFEVER_cosine_recall@1": 0.07,
"eval_NanoFEVER_cosine_recall@5": 0.13,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.16,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.28,
"eval_NanoHotpotQA_cosine_map@100": 0.14864943925499563,
"eval_NanoHotpotQA_cosine_mrr@10": 0.21274603174603177,
"eval_NanoHotpotQA_cosine_ndcg@1": 0.16,
"eval_NanoHotpotQA_cosine_ndcg@5": 0.15294562020041982,
"eval_NanoHotpotQA_cosine_precision@1": 0.16,
"eval_NanoHotpotQA_cosine_precision@5": 0.064,
"eval_NanoHotpotQA_cosine_recall@1": 0.08,
"eval_NanoHotpotQA_cosine_recall@5": 0.16,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.32,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.58,
"eval_NanoMSMARCO_cosine_map@100": 0.4389320137393629,
"eval_NanoMSMARCO_cosine_mrr@10": 0.42802380952380953,
"eval_NanoMSMARCO_cosine_ndcg@1": 0.32,
"eval_NanoMSMARCO_cosine_ndcg@5": 0.4526655610762789,
"eval_NanoMSMARCO_cosine_precision@1": 0.32,
"eval_NanoMSMARCO_cosine_precision@5": 0.11600000000000002,
"eval_NanoMSMARCO_cosine_recall@1": 0.32,
"eval_NanoMSMARCO_cosine_recall@5": 0.58,
"eval_mldr_loss": 1.0088629722595215,
"eval_mldr_runtime": 14.5077,
"eval_mldr_samples_per_second": 34.464,
"eval_mldr_steps_per_second": 2.206,
"step": 1000
},
{
"epoch": 1.620745542949757,
"eval_squad_loss": 0.6442683339118958,
"eval_squad_runtime": 0.8625,
"eval_squad_samples_per_second": 55.652,
"eval_squad_steps_per_second": 3.478,
"step": 1000
},
{
"epoch": 1.620745542949757,
"eval_narrative_qa_loss": 1.7451977729797363,
"eval_narrative_qa_runtime": 1.1443,
"eval_narrative_qa_samples_per_second": 261.298,
"eval_narrative_qa_steps_per_second": 16.604,
"step": 1000
}
],
"logging_steps": 10,
"max_steps": 1234,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}