callanwu commited on
Commit
3cc52f3
·
1 Parent(s): 0b51b5a
Files changed (1) hide show
  1. deepsearch_result.jsonl +9 -7
deepsearch_result.jsonl CHANGED
@@ -1,11 +1,11 @@
1
  {"org": "RUC","link": "https://github.com/RUC-NLPIR/WebThinker","method": "WebThinker-Base", "model": "qwq-32B", "overall": 0.419}
2
  {"org": "RUC","link": "https://github.com/RUC-NLPIR/WebThinker","method": "WebThinker-RL", "model": "qwq-32B", "overall": 0.465}
3
- {"org": "Alibaba","link": "https://github.com/Alibaba-NLP/WebAgent","method": "WebDancer", "model": "qwen2.5-7b-instruct", "overall": 0.36}
4
- {"org": "Alibaba","link": "https://github.com/Alibaba-NLP/WebAgent","method": "WebDancer", "model": "qwen2.5-32b-instruct", "overall": 0.384}
5
- {"org": "Alibaba","link": "https://github.com/Alibaba-NLP/WebAgent","method": "WebDancer", "model": "qwq-32b", "overall": 0.479}
6
- {"org": "Alibaba","link": "https://github.com/Alibaba-NLP/WebAgent","method": "WebShaper", "model": "qwen2.5-32b-instruct", "overall": 0.514}
7
- {"org": "Alibaba","link": "https://github.com/Alibaba-NLP/WebAgent","method": "WebShaper", "model": "qwq-32b", "overall": 0.497}
8
- {"org": "Alibaba","link": "https://github.com/Alibaba-NLP/WebAgent","method": "WebShaper", "model": "qwen2.5-72b-instruct", "overall": 0.522}
9
  {"org": "Tencent","link": "https://github.com/TencentCloudADP/youtu-agent","method": "Youtu-agent", "model": "deepseek-v3.1", "overall": 0.7147}
10
  {"org": "Tencent","link": "https://github.com/TencentCloudADP/youtu-agent","method": "Youtu-agent", "model": "deepseek-v3-0324", "overall": 0.6071}
11
  {"org": "Miromind","link": "https://github.com/MiroMindAI/MiroThinker","method": "MiroThinker-SFT-v0.1", "model": "qwen3-8b", "overall": 0.413}
@@ -14,4 +14,6 @@
14
  {"org": "Miromind","link": "https://github.com/MiroMindAI/MiroThinker","method": "MiroThinker-DPO-v0.1", "model": "qwen3-32b", "overall": 0.493}
15
  {"org": "OPPO","link": "https://github.com/OPPO-PersonalAI/Agent_Foundation_Models","method": "AFM-SFT", "model": "qwen2.5-32b-instruct", "overall": 0.615}
16
  {"org": "OPPO","link": "https://github.com/OPPO-PersonalAI/Agent_Foundation_Models","method": "AFM-RL", "model": "qwen2.5-32b-instruct", "overall": 0.630}
17
- {"org": "OPPO","link": "https://github.com/OPPO-PersonalAI/Agent_Foundation_Models","method": "AFM-RL", "model": "qwen2.5-7b-instruct", "overall": 0.556}
 
 
 
1
  {"org": "RUC","link": "https://github.com/RUC-NLPIR/WebThinker","method": "WebThinker-Base", "model": "qwq-32B", "overall": 0.419}
2
  {"org": "RUC","link": "https://github.com/RUC-NLPIR/WebThinker","method": "WebThinker-RL", "model": "qwq-32B", "overall": 0.465}
3
+ {"org": "Alibaba","link": "https://github.com/Alibaba-NLP/DeepResearch","method": "WebDancer", "model": "qwen2.5-7b-instruct", "overall": 0.36}
4
+ {"org": "Alibaba","link": "https://github.com/Alibaba-NLP/DeepResearch","method": "WebDancer", "model": "qwen2.5-32b-instruct", "overall": 0.384}
5
+ {"org": "Alibaba","link": "https://github.com/Alibaba-NLP/DeepResearch","method": "WebDancer", "model": "qwq-32b", "overall": 0.479}
6
+ {"org": "Alibaba","link": "https://github.com/Alibaba-NLP/DeepResearch","method": "WebShaper", "model": "qwen2.5-32b-instruct", "overall": 0.514}
7
+ {"org": "Alibaba","link": "https://github.com/Alibaba-NLP/DeepResearch","method": "WebShaper", "model": "qwq-32b", "overall": 0.497}
8
+ {"org": "Alibaba","link": "https://github.com/Alibaba-NLP/DeepResearch","method": "WebShaper", "model": "qwen2.5-72b-instruct", "overall": 0.522}
9
  {"org": "Tencent","link": "https://github.com/TencentCloudADP/youtu-agent","method": "Youtu-agent", "model": "deepseek-v3.1", "overall": 0.7147}
10
  {"org": "Tencent","link": "https://github.com/TencentCloudADP/youtu-agent","method": "Youtu-agent", "model": "deepseek-v3-0324", "overall": 0.6071}
11
  {"org": "Miromind","link": "https://github.com/MiroMindAI/MiroThinker","method": "MiroThinker-SFT-v0.1", "model": "qwen3-8b", "overall": 0.413}
 
14
  {"org": "Miromind","link": "https://github.com/MiroMindAI/MiroThinker","method": "MiroThinker-DPO-v0.1", "model": "qwen3-32b", "overall": 0.493}
15
  {"org": "OPPO","link": "https://github.com/OPPO-PersonalAI/Agent_Foundation_Models","method": "AFM-SFT", "model": "qwen2.5-32b-instruct", "overall": 0.615}
16
  {"org": "OPPO","link": "https://github.com/OPPO-PersonalAI/Agent_Foundation_Models","method": "AFM-RL", "model": "qwen2.5-32b-instruct", "overall": 0.630}
17
+ {"org": "OPPO","link": "https://github.com/OPPO-PersonalAI/Agent_Foundation_Models","method": "AFM-RL", "model": "qwen2.5-7b-instruct", "overall": 0.556}
18
+ {"org": "Alibaba","link": "https://github.com/Alibaba-NLP/DeepResearch", "method": "AgentFounder", "model": " qwen3-30b-a3b", "overall": 0.719}
19
+ {"org": "Alibaba","link": "https://github.com/Alibaba-NLP/DeepResearch", "method": "Tongyi DeepResearch", "model": " qwen3-30b-a3b", "overall": 0.72.2}