pczarnik
/

herbert-base-ner

@@ -54,7 +54,7 @@ You can use this model with Transformers *pipeline* for NER.
 from transformers import AutoTokenizer, AutoModelForTokenClassification
 from transformers import pipeline
-model_checkpoint = "pietruszkowiec/herbert-base-ner"
 tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
 model = AutoModelForTokenClassification.from_pretrained(model_checkpoint)
@@ -65,6 +65,28 @@ example = "Nazywam się Grzegorz Brzęszczyszczykiewicz, pochodzę "\
 ner_results = nlp(example)
 print(ner_results)
 ```
 ### BibTeX entry and citation info
@@ -101,6 +123,5 @@ print(ner_results)
     url = "https://www.aclweb.org/anthology/P17-1178",
     doi = "10.18653/v1/P17-1178",
     pages = "1946--1958",
-    abstract = "The ambitious goal of this work is to develop a cross-lingual name tagging and linking framework for 282 languages that exist in Wikipedia. Given a document in any of these languages, our framework is able to identify name mentions, assign a coarse-grained or fine-grained type to each mention, and link it to an English Knowledge Base (KB) if it is linkable. We achieve this goal by performing a series of new KB mining methods: generating {``}silver-standard{''} annotations by transferring annotations from English to other languages through cross-lingual links and KB properties, refining annotations through self-training and topic selection, deriving language-specific morphology features from anchor links, and mining word translation pairs from cross-lingual links. Both name tagging and linking results for 282 languages are promising on Wikipedia data and on-Wikipedia data.",
 }
 ```

 from transformers import AutoTokenizer, AutoModelForTokenClassification
 from transformers import pipeline
+model_checkpoint = "pczarnik/herbert-base-ner"
 tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
 model = AutoModelForTokenClassification.from_pretrained(model_checkpoint)
 ner_results = nlp(example)
 print(ner_results)
 ```
+```python
+[{'entity': 'B-PER', 'score': 0.99451494, 'index': 4, 'word': 'Grzegorz</w>', 'start': 12, 'end': 20},
+ {'entity': 'I-PER', 'score': 0.99758506, 'index': 5, 'word': 'B', 'start': 21, 'end': 22},
+ {'entity': 'I-PER', 'score': 0.99749386, 'index': 6, 'word': 'rzę', 'start': 22, 'end': 25},
+ {'entity': 'I-PER', 'score': 0.9973041, 'index': 7, 'word': 'szczy', 'start': 25, 'end': 30},
+ {'entity': 'I-PER', 'score': 0.99682057, 'index': 8, 'word': 'szczy', 'start': 30, 'end': 35},
+ {'entity': 'I-PER', 'score': 0.9964832, 'index': 9, 'word': 'kiewicz</w>', 'start': 35, 'end': 42},
+ {'entity': 'B-LOC', 'score': 0.99427444, 'index': 14, 'word': 'Chrzą', 'start': 55, 'end': 60},
+ {'entity': 'I-LOC', 'score': 0.99143463, 'index': 15, 'word': 'szczy', 'start': 60, 'end': 65},
+ {'entity': 'I-LOC', 'score': 0.9922201, 'index': 16, 'word': 'że', 'start': 65, 'end': 67},
+ {'entity': 'I-LOC', 'score': 0.9918464, 'index': 17, 'word': 'wo', 'start': 67, 'end': 69},
+ {'entity': 'I-LOC', 'score': 0.9900766, 'index': 18, 'word': 'szczy', 'start': 69, 'end': 74},
+ {'entity': 'I-LOC', 'score': 0.98823845, 'index': 19, 'word': 'c</w>', 'start': 74, 'end': 75},
+ {'entity': 'B-ORG', 'score': 0.6808262, 'index': 23, 'word': 'Łę', 'start': 87, 'end': 89},
+ {'entity': 'I-ORG', 'score': 0.7763973, 'index': 24, 'word': 'ko', 'start': 89, 'end': 91},
+ {'entity': 'I-ORG', 'score': 0.77731717, 'index': 25, 'word': 'ło', 'start': 91, 'end': 93},
+ {'entity': 'I-ORG', 'score': 0.9108255, 'index': 26, 'word': 'dzkim</w>', 'start': 93, 'end': 98},
+ {'entity': 'I-ORG', 'score': 0.98050755, 'index': 27, 'word': 'Urzędzie</w>', 'start': 99, 'end': 107},
+ {'entity': 'I-ORG', 'score': 0.9789752, 'index': 28, 'word': 'Powiatowym</w>', 'start': 108, 'end': 118}]
+```
 ### BibTeX entry and citation info
     url = "https://www.aclweb.org/anthology/P17-1178",
     doi = "10.18653/v1/P17-1178",
     pages = "1946--1958",
 }
 ```