{ "metadata": { "timestamp": "2025-04-07T13:42:49.983941", "vidore_benchmark_version": "5.0.1.dev6+g9e0da63" }, "metrics": { "./data_dir/eval_vidore/syntheticDocQA_artificial_intelligence_test": { "ndcg_at_1": 1.0, "ndcg_at_3": 1.0, "ndcg_at_5": 1.0, "ndcg_at_10": 1.0, "ndcg_at_20": 1.0, "ndcg_at_50": 1.0, "ndcg_at_100": 1.0, "map_at_1": 1.0, "map_at_3": 1.0, "map_at_5": 1.0, "map_at_10": 1.0, "map_at_20": 1.0, "map_at_50": 1.0, "map_at_100": 1.0, "recall_at_1": 1.0, "recall_at_3": 1.0, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 1.0, "precision_at_3": 0.33333, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 1.0, "mrr_at_3": 1.0, "mrr_at_5": 1.0, "mrr_at_10": 1.0, "mrr_at_20": 1.0, "mrr_at_50": 1.0, "mrr_at_100": 1.0, "naucs_at_1_max": null, "naucs_at_1_std": null, "naucs_at_1_diff1": null, "naucs_at_3_max": 1.0, "naucs_at_3_std": 1.0, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null }, "./data_dir/eval_vidore/syntheticDocQA_energy_test": { "ndcg_at_1": 0.96, "ndcg_at_3": 0.96631, "ndcg_at_5": 0.97062, "ndcg_at_10": 0.97062, "ndcg_at_20": 0.97602, "ndcg_at_50": 0.97602, "ndcg_at_100": 0.97602, "map_at_1": 0.96, "map_at_3": 0.965, "map_at_5": 0.9675, "map_at_10": 0.9675, "map_at_20": 0.96917, "map_at_50": 0.96917, "map_at_100": 0.96917, "recall_at_1": 0.96, "recall_at_3": 0.97, "recall_at_5": 0.98, "recall_at_10": 0.98, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.96, "precision_at_3": 0.32333, "precision_at_5": 0.196, "precision_at_10": 0.098, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.96, "mrr_at_3": 0.965, "mrr_at_5": 0.9675, "mrr_at_10": 0.9675, "mrr_at_20": 0.9693181818181819, "mrr_at_50": 0.9693181818181819, "mrr_at_100": 0.9693181818181819, "naucs_at_1_max": 0.7700746965452845, "naucs_at_1_std": -0.8232959850606876, "naucs_at_1_diff1": 0.96732026143791, "naucs_at_3_max": 0.7860255213196378, "naucs_at_3_std": -1.21708683473389, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 0.6790382819794609, "naucs_at_5_std": -1.7399626517273414, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 0.6790382819794609, "naucs_at_10_std": -1.7399626517273414, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null }, "./data_dir/eval_vidore/tatdqa_test": { "ndcg_at_1": 0.70352, "ndcg_at_3": 0.79986, "ndcg_at_5": 0.81641, "ndcg_at_10": 0.83016, "ndcg_at_20": 0.83437, "ndcg_at_50": 0.8377, "ndcg_at_100": 0.83916, "map_at_1": 0.70352, "map_at_3": 0.77643, "map_at_5": 0.78578, "map_at_10": 0.79157, "map_at_20": 0.79277, "map_at_50": 0.79328, "map_at_100": 0.7934, "recall_at_1": 0.70352, "recall_at_3": 0.86756, "recall_at_5": 0.90705, "recall_at_10": 0.94897, "recall_at_20": 0.96537, "recall_at_50": 0.98238, "recall_at_100": 0.99149, "precision_at_1": 0.70352, "precision_at_3": 0.28919, "precision_at_5": 0.18141, "precision_at_10": 0.0949, "precision_at_20": 0.04827, "precision_at_50": 0.01965, "precision_at_100": 0.00991, "mrr_at_1": 0.7004860267314702, "mrr_at_3": 0.7765289590927501, "mrr_at_5": 0.7844268934791414, "mrr_at_10": 0.790128787440452, "mrr_at_20": 0.7913423674323303, "mrr_at_50": 0.7919207273733465, "mrr_at_100": 0.792037412007671, "naucs_at_1_max": 0.30712069371146206, "naucs_at_1_std": -0.14877076174901377, "naucs_at_1_diff1": 0.8349485938382216, "naucs_at_3_max": 0.3678958030568473, "naucs_at_3_std": -0.02507864846676702, "naucs_at_3_diff1": 0.7517064585275754, "naucs_at_5_max": 0.44253146077029726, "naucs_at_5_std": 0.11026281660700264, "naucs_at_5_diff1": 0.7300454594518606, "naucs_at_10_max": 0.5290095815470773, "naucs_at_10_std": 0.33727621290447424, "naucs_at_10_diff1": 0.665454536221219, "naucs_at_20_max": 0.5525939398718661, "naucs_at_20_std": 0.44184740259085264, "naucs_at_20_diff1": 0.6237746503620547, "naucs_at_50_max": 0.6332654079181073, "naucs_at_50_std": 0.5148901808157141, "naucs_at_50_diff1": 0.7220173231390383, "naucs_at_100_max": 0.7592562052200388, "naucs_at_100_std": 0.6908846813166498, "naucs_at_100_diff1": 0.7670992011544225 }, "./data_dir/eval_vidore/shiftproject_test": { "ndcg_at_1": 0.77, "ndcg_at_3": 0.87702, "ndcg_at_5": 0.8895, "ndcg_at_10": 0.8895, "ndcg_at_20": 0.8895, "ndcg_at_50": 0.89367, "ndcg_at_100": 0.89367, "map_at_1": 0.77, "map_at_3": 0.85167, "map_at_5": 0.85867, "map_at_10": 0.85867, "map_at_20": 0.85867, "map_at_50": 0.85942, "map_at_100": 0.85942, "recall_at_1": 0.77, "recall_at_3": 0.95, "recall_at_5": 0.98, "recall_at_10": 0.98, "recall_at_20": 0.98, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.77, "precision_at_3": 0.31667, "precision_at_5": 0.196, "precision_at_10": 0.098, "precision_at_20": 0.049, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.77, "mrr_at_3": 0.8516666666666667, "mrr_at_5": 0.8586666666666666, "mrr_at_10": 0.8586666666666666, "mrr_at_20": 0.8586666666666666, "mrr_at_50": 0.8594585921325052, "mrr_at_100": 0.8594585921325052, "naucs_at_1_max": -0.027170385622524204, "naucs_at_1_std": -0.41825384391575815, "naucs_at_1_diff1": 0.7566527159195185, "naucs_at_3_max": -0.22203548085900757, "naucs_at_3_std": -0.38478057889822304, "naucs_at_3_diff1": 0.7399626517273589, "naucs_at_5_max": -0.43534080298785716, "naucs_at_5_std": -0.5088702147525547, "naucs_at_5_diff1": 0.9346405228758136, "naucs_at_10_max": -0.43534080298785716, "naucs_at_10_std": -0.5088702147525547, "naucs_at_10_diff1": 0.9346405228758136, "naucs_at_20_max": -0.43534080298785716, "naucs_at_20_std": -0.5088702147525547, "naucs_at_20_diff1": 0.9346405228758136, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null }, "./data_dir/eval_vidore/syntheticDocQA_healthcare_industry_test": { "ndcg_at_1": 0.97, "ndcg_at_3": 0.98893, "ndcg_at_5": 0.98893, "ndcg_at_10": 0.98893, "ndcg_at_20": 0.98893, "ndcg_at_50": 0.98893, "ndcg_at_100": 0.98893, "map_at_1": 0.97, "map_at_3": 0.985, "map_at_5": 0.985, "map_at_10": 0.985, "map_at_20": 0.985, "map_at_50": 0.985, "map_at_100": 0.985, "recall_at_1": 0.97, "recall_at_3": 1.0, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.97, "precision_at_3": 0.33333, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.98, "mrr_at_3": 0.99, "mrr_at_5": 0.99, "mrr_at_10": 0.99, "mrr_at_20": 0.99, "mrr_at_50": 0.99, "mrr_at_100": 0.99, "naucs_at_1_max": 0.8078120136943662, "naucs_at_1_std": -0.10597572362278272, "naucs_at_1_diff1": 0.9564270152505465, "naucs_at_3_max": 1.0, "naucs_at_3_std": 1.0, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null }, "./data_dir/eval_vidore/syntheticDocQA_government_reports_test": { "ndcg_at_1": 0.94, "ndcg_at_3": 0.96524, "ndcg_at_5": 0.97385, "ndcg_at_10": 0.97385, "ndcg_at_20": 0.97385, "ndcg_at_50": 0.97385, "ndcg_at_100": 0.97385, "map_at_1": 0.94, "map_at_3": 0.96, "map_at_5": 0.965, "map_at_10": 0.965, "map_at_20": 0.965, "map_at_50": 0.965, "map_at_100": 0.965, "recall_at_1": 0.94, "recall_at_3": 0.98, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.94, "precision_at_3": 0.32667, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.95, "mrr_at_3": 0.965, "mrr_at_5": 0.97, "mrr_at_10": 0.97, "mrr_at_20": 0.97, "mrr_at_50": 0.97, "mrr_at_100": 0.97, "naucs_at_1_max": 0.7857142857142844, "naucs_at_1_std": 0.5056022408963584, "naucs_at_1_diff1": 0.9319172113289763, "naucs_at_3_max": 0.8611111111111119, "naucs_at_3_std": 0.5401493930905577, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null }, "./data_dir/eval_vidore/docvqa_test_subsampled": { "ndcg_at_1": 0.54767, "ndcg_at_3": 0.60458, "ndcg_at_5": 0.62758, "ndcg_at_10": 0.64623, "ndcg_at_20": 0.65727, "ndcg_at_50": 0.66313, "ndcg_at_100": 0.66931, "map_at_1": 0.54767, "map_at_3": 0.59054, "map_at_5": 0.6034, "map_at_10": 0.6111, "map_at_20": 0.61404, "map_at_50": 0.61504, "map_at_100": 0.6156, "recall_at_1": 0.54767, "recall_at_3": 0.64523, "recall_at_5": 0.70067, "recall_at_10": 0.75831, "recall_at_20": 0.80266, "recall_at_50": 0.83149, "recall_at_100": 0.86918, "precision_at_1": 0.54767, "precision_at_3": 0.21508, "precision_at_5": 0.14013, "precision_at_10": 0.07583, "precision_at_20": 0.04013, "precision_at_50": 0.01663, "precision_at_100": 0.00869, "mrr_at_1": 0.549889135254989, "mrr_at_3": 0.5916481892091647, "mrr_at_5": 0.6031781226903179, "mrr_at_10": 0.6112298947664802, "mrr_at_20": 0.6144150798334311, "mrr_at_50": 0.615365808840627, "mrr_at_100": 0.6158082058718903, "naucs_at_1_max": 0.26994818984174423, "naucs_at_1_std": 0.5906240429505522, "naucs_at_1_diff1": 0.9167196184748201, "naucs_at_3_max": 0.23789664157644613, "naucs_at_3_std": 0.7236619387134813, "naucs_at_3_diff1": 0.8666053534752101, "naucs_at_5_max": 0.11514001916616068, "naucs_at_5_std": 0.7631042600804109, "naucs_at_5_diff1": 0.8479060689502268, "naucs_at_10_max": -0.043203523595493495, "naucs_at_10_std": 0.862093103464622, "naucs_at_10_diff1": 0.8564068404442808, "naucs_at_20_max": -0.18212291788839394, "naucs_at_20_std": 0.9249806242337382, "naucs_at_20_diff1": 0.8845536894418387, "naucs_at_50_max": -0.24037092450687947, "naucs_at_50_std": 0.9368754047403369, "naucs_at_50_diff1": 0.8812951566425166, "naucs_at_100_max": -0.40683538353746845, "naucs_at_100_std": 0.9294103270531832, "naucs_at_100_diff1": 0.8702662400596464 }, "./data_dir/eval_vidore/infovqa_test_subsampled": { "ndcg_at_1": 0.89069, "ndcg_at_3": 0.91847, "ndcg_at_5": 0.92536, "ndcg_at_10": 0.92807, "ndcg_at_20": 0.93014, "ndcg_at_50": 0.93334, "ndcg_at_100": 0.93435, "map_at_1": 0.89069, "map_at_3": 0.91194, "map_at_5": 0.91589, "map_at_10": 0.91706, "map_at_20": 0.91765, "map_at_50": 0.91815, "map_at_100": 0.91825, "recall_at_1": 0.89069, "recall_at_3": 0.93725, "recall_at_5": 0.95344, "recall_at_10": 0.96154, "recall_at_20": 0.96964, "recall_at_50": 0.98583, "recall_at_100": 0.9919, "precision_at_1": 0.89069, "precision_at_3": 0.31242, "precision_at_5": 0.19069, "precision_at_10": 0.09615, "precision_at_20": 0.04848, "precision_at_50": 0.01972, "precision_at_100": 0.00992, "mrr_at_1": 0.8866396761133604, "mrr_at_3": 0.9089068825910931, "mrr_at_5": 0.9132591093117408, "mrr_at_10": 0.9144640447272027, "mrr_at_20": 0.9150498220615949, "mrr_at_50": 0.9156121077361352, "mrr_at_100": 0.9156733347422276, "naucs_at_1_max": 0.6888031159871755, "naucs_at_1_std": -0.008505670520150402, "naucs_at_1_diff1": 0.9449011245519812, "naucs_at_3_max": 0.7723774804158382, "naucs_at_3_std": 0.004652604931807598, "naucs_at_3_diff1": 0.923100327986361, "naucs_at_5_max": 0.905123471497624, "naucs_at_5_std": 0.3134960067202525, "naucs_at_5_diff1": 0.9368196598112606, "naucs_at_10_max": 0.9232252314824217, "naucs_at_10_std": 0.3879086911094102, "naucs_at_10_diff1": 0.9372660570415042, "naucs_at_20_max": 0.9455551744236316, "naucs_at_20_std": 0.44590016086873197, "naucs_at_20_diff1": 0.9564661819784096, "naucs_at_50_max": 0.9813426494193118, "naucs_at_50_std": 0.7183015996994482, "naucs_at_50_diff1": 0.9626852988386237, "naucs_at_100_max": 1.0, "naucs_at_100_std": 0.8979159520443043, "naucs_at_100_diff1": 0.9673496364838197 }, "./data_dir/eval_vidore/arxivqa_test_subsampled": { "ndcg_at_1": 0.822, "ndcg_at_3": 0.86707, "ndcg_at_5": 0.87533, "ndcg_at_10": 0.88497, "ndcg_at_20": 0.89022, "ndcg_at_50": 0.8951, "ndcg_at_100": 0.8951, "map_at_1": 0.822, "map_at_3": 0.85633, "map_at_5": 0.86093, "map_at_10": 0.86488, "map_at_20": 0.86642, "map_at_50": 0.86725, "map_at_100": 0.86725, "recall_at_1": 0.822, "recall_at_3": 0.898, "recall_at_5": 0.918, "recall_at_10": 0.948, "recall_at_20": 0.968, "recall_at_50": 0.992, "recall_at_100": 0.992, "precision_at_1": 0.822, "precision_at_3": 0.29933, "precision_at_5": 0.1836, "precision_at_10": 0.0948, "precision_at_20": 0.0484, "precision_at_50": 0.01984, "precision_at_100": 0.00992, "mrr_at_1": 0.824, "mrr_at_3": 0.858, "mrr_at_5": 0.8622000000000001, "mrr_at_10": 0.8664793650793651, "mrr_at_20": 0.8681266741428506, "mrr_at_50": 0.8688783518847116, "mrr_at_100": 0.8688783518847116, "naucs_at_1_max": 0.7534804527092446, "naucs_at_1_std": -0.09711881948385728, "naucs_at_1_diff1": 0.950227578762283, "naucs_at_3_max": 0.8118793707898164, "naucs_at_3_std": 0.022246287696627846, "naucs_at_3_diff1": 0.907169471925993, "naucs_at_5_max": 0.7977955409806211, "naucs_at_5_std": -0.04402086037667595, "naucs_at_5_diff1": 0.921887454168659, "naucs_at_10_max": 0.9135423400129307, "naucs_at_10_std": 0.09272426919485754, "naucs_at_10_diff1": 0.9340120663650082, "naucs_at_20_max": 0.9210725957049468, "naucs_at_20_std": 0.19062208216619833, "naucs_at_20_diff1": 0.9264705882352913, "naucs_at_50_max": 0.9673202614378978, "naucs_at_50_std": 0.20354808590102869, "naucs_at_50_diff1": 0.9346405228758211, "naucs_at_100_max": 0.9673202614378978, "naucs_at_100_std": 0.20354808590102869, "naucs_at_100_diff1": 0.9346405228758211 }, "./data_dir/eval_vidore/tabfquad_test_subsampled": { "ndcg_at_1": 0.84286, "ndcg_at_3": 0.89141, "ndcg_at_5": 0.89847, "ndcg_at_10": 0.90417, "ndcg_at_20": 0.91057, "ndcg_at_50": 0.9134, "ndcg_at_100": 0.9134, "map_at_1": 0.84286, "map_at_3": 0.87976, "map_at_5": 0.88351, "map_at_10": 0.88582, "map_at_20": 0.88761, "map_at_50": 0.88806, "map_at_100": 0.88806, "recall_at_1": 0.84286, "recall_at_3": 0.925, "recall_at_5": 0.94286, "recall_at_10": 0.96071, "recall_at_20": 0.98571, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.84286, "precision_at_3": 0.30833, "precision_at_5": 0.18857, "precision_at_10": 0.09607, "precision_at_20": 0.04929, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.8428571428571429, "mrr_at_3": 0.8797619047619047, "mrr_at_5": 0.8827976190476191, "mrr_at_10": 0.8856746031746031, "mrr_at_20": 0.887474271759986, "mrr_at_50": 0.8879308740437567, "mrr_at_100": 0.8879308740437567, "naucs_at_1_max": 0.5389336957490425, "naucs_at_1_std": 0.2014303538009097, "naucs_at_1_diff1": 0.8885014704571788, "naucs_at_3_max": 0.6380329909741672, "naucs_at_3_std": 0.4563603219065409, "naucs_at_3_diff1": 0.8615223867324734, "naucs_at_5_max": 0.5759803921568608, "naucs_at_5_std": 0.40143557422968984, "naucs_at_5_diff1": 0.8345880018674129, "naucs_at_10_max": 0.823444529326883, "naucs_at_10_std": 0.626602156013919, "naucs_at_10_diff1": 0.8355827179356624, "naucs_at_20_max": 0.967320261437913, "naucs_at_20_std": 0.8284313725490264, "naucs_at_20_diff1": 0.9346405228758147, "naucs_at_50_max": 1.0, "naucs_at_50_std": 1.0, "naucs_at_50_diff1": 1.0, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0 } } }