| { | |
| "algorithm": { | |
| "command": null, | |
| "id": 4, | |
| "name": "Gensim Continuous Bag-of-Words", | |
| "tool": "Gensim", | |
| "url": "https://github.com/RaRe-Technologies/gensim", | |
| "version": "3.8" | |
| }, | |
| "contents": [ | |
| { | |
| "filename": "model.txt", | |
| "format": "text" | |
| }, | |
| { | |
| "filename": "model.bin", | |
| "format": "data" | |
| }, | |
| { | |
| "filename": "meta.json", | |
| "format": "json" | |
| } | |
| ], | |
| "corpus": [ | |
| { | |
| "NER": true, | |
| "case preserved": false, | |
| "description": "Russian National Corpus", | |
| "id": 88, | |
| "language": "rus", | |
| "lemmatized": true, | |
| "public": false, | |
| "stop words removal": null, | |
| "tagger": "UDPipe 1.2", | |
| "tagset": "UPoS", | |
| "tokens": 270000000, | |
| "url": "http://ruscorpora.ru/" | |
| }, | |
| { | |
| "NER": true, | |
| "case preserved": false, | |
| "description": "Russian Wikipedia dump of December 2018", | |
| "id": 91, | |
| "language": "rus", | |
| "lemmatized": true, | |
| "public": true, | |
| "stop words removal": null, | |
| "tagger": "UDPipe 1.2", | |
| "tagset": "UPoS", | |
| "tokens": 518531000, | |
| "tool": "https://github.com/RaRe-Technologies/gensim/blob/master/gensim/scripts/segment_wiki.py", | |
| "url": "https://dumps.wikimedia.org/" | |
| }, | |
| { | |
| "NER": true, | |
| "case preserved": false, | |
| "description": "Russian News from Dialogue Evaluation 2020", | |
| "id": 114, | |
| "language": "rus", | |
| "lemmatized": true, | |
| "public": true, | |
| "stop words removal": null, | |
| "tagger": "UDPipe 1.2", | |
| "tagset": "UPoS", | |
| "tokens": 1321489104, | |
| "url": "https://competitions.codalab.org/competitions/22168" | |
| }, | |
| { | |
| "NER": true, | |
| "case preserved": false, | |
| "description": "Araneum Russicum Maximum", | |
| "id": 115, | |
| "language": "rus", | |
| "lemmatized": true, | |
| "public": true, | |
| "stop words removal": "functional PoS", | |
| "tagger": "MyStem", | |
| "tagset": "UPoS", | |
| "tokens": 10000000000, | |
| "url": "https://rusvectores.org/en/models/" | |
| } | |
| ], | |
| "creators": [ | |
| { | |
| "email": "[email protected]", | |
| "name": "Andrey Kutuzov" | |
| }, | |
| { | |
| "email": "[email protected]", | |
| "name": "Maria Kunilovskaya" | |
| } | |
| ], | |
| "dimensions": 300, | |
| "documentation": [ | |
| "https://github.com/kunilovskaya/hypohyper/" | |
| ], | |
| "external_id": "ruscorporawikiaraneumnews_mwe_upos_cbow_300_2_2020", | |
| "handle": "http://vectors.nlpl.eu/repository/20/204.zip", | |
| "id": 204, | |
| "iterations": 3, | |
| "vocabulary size": 998459, | |
| "window": 2 | |
| } |