Update Readme
Browse files
README.md
CHANGED
@@ -21,6 +21,13 @@ A GRPO-fine-tuned version of Qwen2.5-3B trained on the MATH dataset.
|
|
21 |
## Citation
|
22 |
|
23 |
```bibtex
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
@article{sha2024deepseekmath,
|
25 |
title = {DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models},
|
26 |
author = {Shao, Zhihong and Wang, Peiyi and Zhu, Qihao and Xu, Runxin and Song, Junxiao and Bi, Xiao and … Guo, Daya},
|
|
|
21 |
## Citation
|
22 |
|
23 |
```bibtex
|
24 |
+
@article{zhao2025learning,
|
25 |
+
title={Learning to Reason without External Rewards},
|
26 |
+
author={Zhao, Xuandong and Kang, Zhewei and Feng, Aosong and Levine, Sergey and Song, Dawn},
|
27 |
+
journal={arXiv preprint arXiv:2505.19590},
|
28 |
+
year={2025}
|
29 |
+
}
|
30 |
+
|
31 |
@article{sha2024deepseekmath,
|
32 |
title = {DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models},
|
33 |
author = {Shao, Zhihong and Wang, Peiyi and Zhu, Qihao and Xu, Runxin and Song, Junxiao and Bi, Xiao and … Guo, Daya},
|