xuandin commited on
Commit
aec55f7
·
verified ·
1 Parent(s): 39a91f2

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +18 -147
README.md CHANGED
@@ -90,7 +90,6 @@ print(evidence)
90
  <tr>
91
  <th colspan="2">Method</th>
92
  <th colspan="4">ViWikiFC</th>
93
- <th colspan="4">ISE-DSC01</th>
94
  </tr>
95
  <tr>
96
  <th>ER</th>
@@ -99,10 +98,6 @@ print(evidence)
99
  <th>VC Acc</th>
100
  <th>ER Acc</th>
101
  <th>Time (s)</th>
102
- <th>Strict Acc</th>
103
- <th>VC Acc</th>
104
- <th>ER Acc</th>
105
- <th>Time (s)</th>
106
  </tr>
107
  </thead>
108
  <tbody>
@@ -113,10 +108,6 @@ print(evidence)
113
  <td>82.21</td>
114
  <td>90.15</td>
115
  <td>131</td>
116
- <td>73.59</td>
117
- <td>78.08</td>
118
- <td>76.61</td>
119
- <td>378</td>
120
  </tr>
121
  <tr>
122
  <td>XLM-R<sub>large</sub></td>
@@ -124,10 +115,6 @@ print(evidence)
124
  <td>82.78</td>
125
  <td>90.15</td>
126
  <td>134</td>
127
- <td>75.61</td>
128
- <td>80.50</td>
129
- <td>78.58</td>
130
- <td>366</td>
131
  </tr>
132
  <tr>
133
  <td>Ernie-M<sub>large</sub></td>
@@ -135,10 +122,6 @@ print(evidence)
135
  <td>81.83</td>
136
  <td>90.15</td>
137
  <td>144</td>
138
- <td>78.19</td>
139
- <td>81.69</td>
140
- <td>80.65</td>
141
- <td>403</td>
142
  </tr>
143
  <tr>
144
  <td rowspan="3">BM25</td>
@@ -147,10 +130,6 @@ print(evidence)
147
  <td>79.01</td>
148
  <td>83.50</td>
149
  <td>130</td>
150
- <td>72.09</td>
151
- <td>77.37</td>
152
- <td>75.04</td>
153
- <td>320</td>
154
  </tr>
155
  <tr>
156
  <td>XLM-R<sub>large</sub></td>
@@ -158,10 +137,6 @@ print(evidence)
158
  <td>78.91</td>
159
  <td>83.50</td>
160
  <td>132</td>
161
- <td>73.94</td>
162
- <td>79.37</td>
163
- <td>76.95</td>
164
- <td>333</td>
165
  </tr>
166
  <tr>
167
  <td>Ernie-M<sub>large</sub></td>
@@ -169,10 +144,6 @@ print(evidence)
169
  <td>78.29</td>
170
  <td>83.50</td>
171
  <td>141</td>
172
- <td>76.58</td>
173
- <td>80.76</td>
174
- <td>79.02</td>
175
- <td>381</td>
176
  </tr>
177
  <tr>
178
  <td rowspan="3">SBert</td>
@@ -181,10 +152,6 @@ print(evidence)
181
  <td>81.59</td>
182
  <td>89.72</td>
183
  <td>195</td>
184
- <td>71.20</td>
185
- <td>76.59</td>
186
- <td>74.15</td>
187
- <td>915</td>
188
  </tr>
189
  <tr>
190
  <td>XLM-R<sub>large</sub></td>
@@ -192,10 +159,6 @@ print(evidence)
192
  <td>82.35</td>
193
  <td>89.72</td>
194
  <td>194</td>
195
- <td>72.85</td>
196
- <td>78.78</td>
197
- <td>75.89</td>
198
- <td>835</td>
199
  </tr>
200
  <tr>
201
  <td>Ernie-M<sub>large</sub></td>
@@ -203,15 +166,11 @@ print(evidence)
203
  <td>81.44</td>
204
  <td>89.72</td>
205
  <td>203</td>
206
- <td>75.46</td>
207
- <td>79.89</td>
208
- <td>77.91</td>
209
- <td>920</td>
210
  </tr>
211
  <tr>
212
  <th colspan="1">QA-based approaches</th>
213
  <th colspan="1">VC</th>
214
- <th colspan="8"></th>
215
  </tr>
216
  <tr>
217
  <td rowspan="3">ViMRC<sub>large</sub></td>
@@ -220,10 +179,6 @@ print(evidence)
220
  <td>81.97</td>
221
  <td>92.49</td>
222
  <td>3778</td>
223
- <td>54.36</td>
224
- <td>64.14</td>
225
- <td>56.84</td>
226
- <td>9798</td>
227
  </tr>
228
  <tr>
229
  <td>XLM-R<sub>large</sub></td>
@@ -231,10 +186,6 @@ print(evidence)
231
  <td>82.83</td>
232
  <td>92.49</td>
233
  <td>3824</td>
234
- <td>53.98</td>
235
- <td>66.70</td>
236
- <td>57.77</td>
237
- <td>9809</td>
238
  </tr>
239
  <tr>
240
  <td>Ernie-M<sub>large</sub></td>
@@ -242,10 +193,6 @@ print(evidence)
242
  <td>81.92</td>
243
  <td>92.49</td>
244
  <td>3785</td>
245
- <td>56.62</td>
246
- <td>62.19</td>
247
- <td>58.91</td>
248
- <td>9833</td>
249
  </tr>
250
  <tr>
251
  <td rowspan="3">InfoXLM<sub>large</sub></td>
@@ -254,10 +201,6 @@ print(evidence)
254
  <td>82.07</td>
255
  <td>93.45</td>
256
  <td>4092</td>
257
- <td>53.50</td>
258
- <td>63.83</td>
259
- <td>56.17</td>
260
- <td>10057</td>
261
  </tr>
262
  <tr>
263
  <td>XLM-R<sub>large</sub></td>
@@ -265,10 +208,6 @@ print(evidence)
265
  <td>83.07</td>
266
  <td>93.45</td>
267
  <td>4096</td>
268
- <td>53.32</td>
269
- <td>66.70</td>
270
- <td>57.25</td>
271
- <td>10066</td>
272
  </tr>
273
  <tr>
274
  <td>Ernie-M<sub>large</sub></td>
@@ -276,14 +215,10 @@ print(evidence)
276
  <td>82.21</td>
277
  <td>93.45</td>
278
  <td>4102</td>
279
- <td>56.34</td>
280
- <td>62.36</td>
281
- <td>58.69</td>
282
- <td>10078</td>
283
  </tr>
284
  <tr>
285
  <th colspan="2">LLM</th>
286
- <th colspan="8"></th>
287
  </tr>
288
  <tr>
289
  <td colspan="2">Qwen2.5-1.5B-Instruct</td>
@@ -291,10 +226,6 @@ print(evidence)
291
  <td>65.18</td>
292
  <td>78.96</td>
293
  <td>7665</td>
294
- <td>59.23</td>
295
- <td>66.68</td>
296
- <td>65.51</td>
297
- <td>19780</td>
298
  </tr>
299
  <tr>
300
  <td colspan="2">Qwen2.5-3B-Instruct</td>
@@ -302,15 +233,11 @@ print(evidence)
302
  <td>62.31</td>
303
  <td>71.35</td>
304
  <td>12123</td>
305
- <td>60.87</td>
306
- <td>66.92</td>
307
- <td>66.10</td>
308
- <td>31284</td>
309
  </tr>
310
  <tr>
311
  <th colspan="1">LLM</th>
312
  <th colspan="1">VC</th>
313
- <th colspan="8"></th>
314
  </tr>
315
  <tr>
316
  <td rowspan="3">Qwen2.5-1.5B-Instruct</td>
@@ -319,10 +246,6 @@ print(evidence)
319
  <td>76.47</td>
320
  <td>78.96</td>
321
  <td>7788</td>
322
- <td>64.40</td>
323
- <td>68.37</td>
324
- <td>66.49</td>
325
- <td>19970</td>
326
  </tr>
327
  <tr>
328
  <td>XLM-R<sub>large</sub></td>
@@ -330,10 +253,6 @@ print(evidence)
330
  <td>78.10</td>
331
  <td>78.96</td>
332
  <td>7789</td>
333
- <td>64.66</td>
334
- <td>69.63</td>
335
- <td>66.72</td>
336
- <td>19976</td>
337
  </tr>
338
  <tr>
339
  <td>Ernie-M<sub>large</sub></td>
@@ -341,10 +260,6 @@ print(evidence)
341
  <td>76.52</td>
342
  <td>78.96</td>
343
  <td>7794</td>
344
- <td>65.70</td>
345
- <td>68.37</td>
346
- <td>67.33</td>
347
- <td>20003</td>
348
  </tr>
349
  <tr>
350
  <td rowspan="3">Qwen2.5-3B-Instruct</td>
@@ -353,10 +268,6 @@ print(evidence)
353
  <td>72.50</td>
354
  <td>71.35</td>
355
  <td>12246</td>
356
- <td>65.72</td>
357
- <td>69.66</td>
358
- <td>67.51</td>
359
- <td>31477</td>
360
  </tr>
361
  <tr>
362
  <td>XLM-R<sub>large</sub></td>
@@ -364,10 +275,6 @@ print(evidence)
364
  <td>73.08</td>
365
  <td>71.35</td>
366
  <td>12246</td>
367
- <td>66.12</td>
368
- <td>70.44</td>
369
- <td>67.83</td>
370
- <td>31483</td>
371
  </tr>
372
  <tr>
373
  <td>Ernie-M<sub>large</sub></td>
@@ -375,44 +282,32 @@ print(evidence)
375
  <td>72.21</td>
376
  <td>71.35</td>
377
  <td>12251</td>
378
- <td>67.48</td>
379
- <td>70.77</td>
380
- <td>68.75</td>
381
- <td>31512</td>
382
  </tr>
383
  <tr>
384
  <th colspan="1">SER Faster (ours)</th>
385
  <th colspan="1">TVC (ours)</th>
386
- <th colspan="8"></th>
387
  </tr>
388
  <tr>
389
  <td>TF-IDF + ViMRC<sub>large</sub></td>
390
  <td>Ernie-M<sub>large</sub></td>
391
- <td>79.44</td>
392
- <td>82.93</td>
393
- <td>94.60</td>
394
- <td>410</td>
395
- <td>78.32</td>
396
- <td>81.91</td>
397
- <td>80.26</td>
398
- <td>995</td>
399
  </tr>
400
  <tr>
401
  <td>TF-IDF + InfoXLM<sub>large</sub></td>
402
  <td>Ernie-M<sub>large</sub></td>
403
- <td>79.77</td>
404
- <td>83.07</td>
405
- <td>95.03</td>
406
- <td>487</td>
407
- <td>78.37</td>
408
- <td>81.91</td>
409
- <td>80.32</td>
410
- <td>925</td>
411
  </tr>
412
  <tr>
413
  <th colspan="1">SER (ours)</th>
414
  <th colspan="1">TVC (ours)</th>
415
- <th colspan="8"></th>
416
  </tr>
417
  <tr>
418
  <td rowspan="3">TF-IDF + ViMRC<sub>large</sub></td>
@@ -421,10 +316,6 @@ print(evidence)
421
  <td>83.84</td>
422
  <td>94.69</td>
423
  <td>2731</td>
424
- <td>75.13</td>
425
- <td>79.54</td>
426
- <td>76.87</td>
427
- <td>5191</td>
428
  </tr>
429
  <tr>
430
  <td>XLM-R<sub>large</sub></td>
@@ -432,10 +323,6 @@ print(evidence)
432
  <td>83.64</td>
433
  <td>94.69</td>
434
  <td>2733</td>
435
- <td>76.71</td>
436
- <td>81.65</td>
437
- <td>78.91</td>
438
- <td>5219</td>
439
  </tr>
440
  <tr>
441
  <td>Ernie-M<sub>large</sub></td>
@@ -443,44 +330,28 @@ print(evidence)
443
  <td>82.97</td>
444
  <td>94.69</td>
445
  <td>2733</td>
446
- <td>78.97</td>
447
- <td>82.54</td>
448
- <td>80.91</td>
449
- <td>5225</td>
450
  </tr>
451
  <tr>
452
  <td rowspan="3">TF-IDF + InfoXLM<sub>large</sub></td>
453
  <td>InfoXLM<sub>large</sub></td>
454
  <td>80.68</td>
455
- <td>83.98</td>
456
- <td>95.31</td>
457
  <td>3860</td>
458
- <td>75.13</td>
459
- <td>79.60</td>
460
- <td>76.87</td>
461
- <td>5175</td>
462
  </tr>
463
  <tr>
464
  <td>XLM-R<sub>large</sub></td>
465
- <td>80.82</td>
466
  <td>83.88</td>
467
- <td>95.31</td>
468
  <td>3843</td>
469
- <td>76.74</td>
470
- <td>81.71</td>
471
- <td>78.95</td>
472
- <td>5200</td>
473
  </tr>
474
  <tr>
475
  <td>Ernie-M<sub>large</sub></td>
476
  <td>80.06</td>
477
  <td>83.17</td>
478
- <td>95.31</td>
479
  <td>3891</td>
480
- <td>78.97</td>
481
- <td>82.49</td>
482
- <td>80.91</td>
483
- <td>5297</td>
484
  </tr>
485
  </tbody>
486
  </table>
 
90
  <tr>
91
  <th colspan="2">Method</th>
92
  <th colspan="4">ViWikiFC</th>
 
93
  </tr>
94
  <tr>
95
  <th>ER</th>
 
98
  <th>VC Acc</th>
99
  <th>ER Acc</th>
100
  <th>Time (s)</th>
 
 
 
 
101
  </tr>
102
  </thead>
103
  <tbody>
 
108
  <td>82.21</td>
109
  <td>90.15</td>
110
  <td>131</td>
 
 
 
 
111
  </tr>
112
  <tr>
113
  <td>XLM-R<sub>large</sub></td>
 
115
  <td>82.78</td>
116
  <td>90.15</td>
117
  <td>134</td>
 
 
 
 
118
  </tr>
119
  <tr>
120
  <td>Ernie-M<sub>large</sub></td>
 
122
  <td>81.83</td>
123
  <td>90.15</td>
124
  <td>144</td>
 
 
 
 
125
  </tr>
126
  <tr>
127
  <td rowspan="3">BM25</td>
 
130
  <td>79.01</td>
131
  <td>83.50</td>
132
  <td>130</td>
 
 
 
 
133
  </tr>
134
  <tr>
135
  <td>XLM-R<sub>large</sub></td>
 
137
  <td>78.91</td>
138
  <td>83.50</td>
139
  <td>132</td>
 
 
 
 
140
  </tr>
141
  <tr>
142
  <td>Ernie-M<sub>large</sub></td>
 
144
  <td>78.29</td>
145
  <td>83.50</td>
146
  <td>141</td>
 
 
 
 
147
  </tr>
148
  <tr>
149
  <td rowspan="3">SBert</td>
 
152
  <td>81.59</td>
153
  <td>89.72</td>
154
  <td>195</td>
 
 
 
 
155
  </tr>
156
  <tr>
157
  <td>XLM-R<sub>large</sub></td>
 
159
  <td>82.35</td>
160
  <td>89.72</td>
161
  <td>194</td>
 
 
 
 
162
  </tr>
163
  <tr>
164
  <td>Ernie-M<sub>large</sub></td>
 
166
  <td>81.44</td>
167
  <td>89.72</td>
168
  <td>203</td>
 
 
 
 
169
  </tr>
170
  <tr>
171
  <th colspan="1">QA-based approaches</th>
172
  <th colspan="1">VC</th>
173
+ <th colspan="4"></th>
174
  </tr>
175
  <tr>
176
  <td rowspan="3">ViMRC<sub>large</sub></td>
 
179
  <td>81.97</td>
180
  <td>92.49</td>
181
  <td>3778</td>
 
 
 
 
182
  </tr>
183
  <tr>
184
  <td>XLM-R<sub>large</sub></td>
 
186
  <td>82.83</td>
187
  <td>92.49</td>
188
  <td>3824</td>
 
 
 
 
189
  </tr>
190
  <tr>
191
  <td>Ernie-M<sub>large</sub></td>
 
193
  <td>81.92</td>
194
  <td>92.49</td>
195
  <td>3785</td>
 
 
 
 
196
  </tr>
197
  <tr>
198
  <td rowspan="3">InfoXLM<sub>large</sub></td>
 
201
  <td>82.07</td>
202
  <td>93.45</td>
203
  <td>4092</td>
 
 
 
 
204
  </tr>
205
  <tr>
206
  <td>XLM-R<sub>large</sub></td>
 
208
  <td>83.07</td>
209
  <td>93.45</td>
210
  <td>4096</td>
 
 
 
 
211
  </tr>
212
  <tr>
213
  <td>Ernie-M<sub>large</sub></td>
 
215
  <td>82.21</td>
216
  <td>93.45</td>
217
  <td>4102</td>
 
 
 
 
218
  </tr>
219
  <tr>
220
  <th colspan="2">LLM</th>
221
+ <th colspan="4"></th>
222
  </tr>
223
  <tr>
224
  <td colspan="2">Qwen2.5-1.5B-Instruct</td>
 
226
  <td>65.18</td>
227
  <td>78.96</td>
228
  <td>7665</td>
 
 
 
 
229
  </tr>
230
  <tr>
231
  <td colspan="2">Qwen2.5-3B-Instruct</td>
 
233
  <td>62.31</td>
234
  <td>71.35</td>
235
  <td>12123</td>
 
 
 
 
236
  </tr>
237
  <tr>
238
  <th colspan="1">LLM</th>
239
  <th colspan="1">VC</th>
240
+ <th colspan="4"></th>
241
  </tr>
242
  <tr>
243
  <td rowspan="3">Qwen2.5-1.5B-Instruct</td>
 
246
  <td>76.47</td>
247
  <td>78.96</td>
248
  <td>7788</td>
 
 
 
 
249
  </tr>
250
  <tr>
251
  <td>XLM-R<sub>large</sub></td>
 
253
  <td>78.10</td>
254
  <td>78.96</td>
255
  <td>7789</td>
 
 
 
 
256
  </tr>
257
  <tr>
258
  <td>Ernie-M<sub>large</sub></td>
 
260
  <td>76.52</td>
261
  <td>78.96</td>
262
  <td>7794</td>
 
 
 
 
263
  </tr>
264
  <tr>
265
  <td rowspan="3">Qwen2.5-3B-Instruct</td>
 
268
  <td>72.50</td>
269
  <td>71.35</td>
270
  <td>12246</td>
 
 
 
 
271
  </tr>
272
  <tr>
273
  <td>XLM-R<sub>large</sub></td>
 
275
  <td>73.08</td>
276
  <td>71.35</td>
277
  <td>12246</td>
 
 
 
 
278
  </tr>
279
  <tr>
280
  <td>Ernie-M<sub>large</sub></td>
 
282
  <td>72.21</td>
283
  <td>71.35</td>
284
  <td>12251</td>
 
 
 
 
285
  </tr>
286
  <tr>
287
  <th colspan="1">SER Faster (ours)</th>
288
  <th colspan="1">TVC (ours)</th>
289
+ <th colspan="4"></th>
290
  </tr>
291
  <tr>
292
  <td>TF-IDF + ViMRC<sub>large</sub></td>
293
  <td>Ernie-M<sub>large</sub></td>
294
+ <td style="color:blue">79.44</td>
295
+ <td style="color:blue">82.93</td>
296
+ <td style="color:blue">94.60</td>
297
+ <td style="color:blue">410</td>
 
 
 
 
298
  </tr>
299
  <tr>
300
  <td>TF-IDF + InfoXLM<sub>large</sub></td>
301
  <td>Ernie-M<sub>large</sub></td>
302
+ <td style="color:blue">79.77</td>
303
+ <td style="color:blue">83.07</td>
304
+ <td style="color:blue">95.03</td>
305
+ <td style="color:blue">487</td>
 
 
 
 
306
  </tr>
307
  <tr>
308
  <th colspan="1">SER (ours)</th>
309
  <th colspan="1">TVC (ours)</th>
310
+ <th colspan="4"></th>
311
  </tr>
312
  <tr>
313
  <td rowspan="3">TF-IDF + ViMRC<sub>large</sub></td>
 
316
  <td>83.84</td>
317
  <td>94.69</td>
318
  <td>2731</td>
 
 
 
 
319
  </tr>
320
  <tr>
321
  <td>XLM-R<sub>large</sub></td>
 
323
  <td>83.64</td>
324
  <td>94.69</td>
325
  <td>2733</td>
 
 
 
 
326
  </tr>
327
  <tr>
328
  <td>Ernie-M<sub>large</sub></td>
 
330
  <td>82.97</td>
331
  <td>94.69</td>
332
  <td>2733</td>
 
 
 
 
333
  </tr>
334
  <tr>
335
  <td rowspan="3">TF-IDF + InfoXLM<sub>large</sub></td>
336
  <td>InfoXLM<sub>large</sub></td>
337
  <td>80.68</td>
338
+ <td><strong>83.98</strong></td>
339
+ <td><strong>95.31</strong></td>
340
  <td>3860</td>
 
 
 
 
341
  </tr>
342
  <tr>
343
  <td>XLM-R<sub>large</sub></td>
344
+ <td><strong>80.82</strong></td>
345
  <td>83.88</td>
346
+ <td><strong>95.31</strong></td>
347
  <td>3843</td>
 
 
 
 
348
  </tr>
349
  <tr>
350
  <td>Ernie-M<sub>large</sub></td>
351
  <td>80.06</td>
352
  <td>83.17</td>
353
+ <td><strong>95.31</strong></td>
354
  <td>3891</td>
 
 
 
 
355
  </tr>
356
  </tbody>
357
  </table>