Update README.md
Browse files
README.md
CHANGED
@@ -54,7 +54,7 @@ You can use this model with Transformers *pipeline* for NER.
|
|
54 |
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
55 |
from transformers import pipeline
|
56 |
|
57 |
-
model_checkpoint = "
|
58 |
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
|
59 |
model = AutoModelForTokenClassification.from_pretrained(model_checkpoint)
|
60 |
|
@@ -65,6 +65,28 @@ example = "Nazywam się Grzegorz Brzęszczyszczykiewicz, pochodzę "\
|
|
65 |
ner_results = nlp(example)
|
66 |
print(ner_results)
|
67 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
### BibTeX entry and citation info
|
70 |
|
@@ -101,6 +123,5 @@ print(ner_results)
|
|
101 |
url = "https://www.aclweb.org/anthology/P17-1178",
|
102 |
doi = "10.18653/v1/P17-1178",
|
103 |
pages = "1946--1958",
|
104 |
-
abstract = "The ambitious goal of this work is to develop a cross-lingual name tagging and linking framework for 282 languages that exist in Wikipedia. Given a document in any of these languages, our framework is able to identify name mentions, assign a coarse-grained or fine-grained type to each mention, and link it to an English Knowledge Base (KB) if it is linkable. We achieve this goal by performing a series of new KB mining methods: generating {``}silver-standard{''} annotations by transferring annotations from English to other languages through cross-lingual links and KB properties, refining annotations through self-training and topic selection, deriving language-specific morphology features from anchor links, and mining word translation pairs from cross-lingual links. Both name tagging and linking results for 282 languages are promising on Wikipedia data and on-Wikipedia data.",
|
105 |
}
|
106 |
```
|
|
|
54 |
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
55 |
from transformers import pipeline
|
56 |
|
57 |
+
model_checkpoint = "pczarnik/herbert-base-ner"
|
58 |
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
|
59 |
model = AutoModelForTokenClassification.from_pretrained(model_checkpoint)
|
60 |
|
|
|
65 |
ner_results = nlp(example)
|
66 |
print(ner_results)
|
67 |
```
|
68 |
+
```python
|
69 |
+
[{'entity': 'B-PER', 'score': 0.99451494, 'index': 4, 'word': 'Grzegorz</w>', 'start': 12, 'end': 20},
|
70 |
+
{'entity': 'I-PER', 'score': 0.99758506, 'index': 5, 'word': 'B', 'start': 21, 'end': 22},
|
71 |
+
{'entity': 'I-PER', 'score': 0.99749386, 'index': 6, 'word': 'rzę', 'start': 22, 'end': 25},
|
72 |
+
{'entity': 'I-PER', 'score': 0.9973041, 'index': 7, 'word': 'szczy', 'start': 25, 'end': 30},
|
73 |
+
{'entity': 'I-PER', 'score': 0.99682057, 'index': 8, 'word': 'szczy', 'start': 30, 'end': 35},
|
74 |
+
{'entity': 'I-PER', 'score': 0.9964832, 'index': 9, 'word': 'kiewicz</w>', 'start': 35, 'end': 42},
|
75 |
+
{'entity': 'B-LOC', 'score': 0.99427444, 'index': 14, 'word': 'Chrzą', 'start': 55, 'end': 60},
|
76 |
+
{'entity': 'I-LOC', 'score': 0.99143463, 'index': 15, 'word': 'szczy', 'start': 60, 'end': 65},
|
77 |
+
{'entity': 'I-LOC', 'score': 0.9922201, 'index': 16, 'word': 'że', 'start': 65, 'end': 67},
|
78 |
+
{'entity': 'I-LOC', 'score': 0.9918464, 'index': 17, 'word': 'wo', 'start': 67, 'end': 69},
|
79 |
+
{'entity': 'I-LOC', 'score': 0.9900766, 'index': 18, 'word': 'szczy', 'start': 69, 'end': 74},
|
80 |
+
{'entity': 'I-LOC', 'score': 0.98823845, 'index': 19, 'word': 'c</w>', 'start': 74, 'end': 75},
|
81 |
+
{'entity': 'B-ORG', 'score': 0.6808262, 'index': 23, 'word': 'Łę', 'start': 87, 'end': 89},
|
82 |
+
{'entity': 'I-ORG', 'score': 0.7763973, 'index': 24, 'word': 'ko', 'start': 89, 'end': 91},
|
83 |
+
{'entity': 'I-ORG', 'score': 0.77731717, 'index': 25, 'word': 'ło', 'start': 91, 'end': 93},
|
84 |
+
{'entity': 'I-ORG', 'score': 0.9108255, 'index': 26, 'word': 'dzkim</w>', 'start': 93, 'end': 98},
|
85 |
+
{'entity': 'I-ORG', 'score': 0.98050755, 'index': 27, 'word': 'Urzędzie</w>', 'start': 99, 'end': 107},
|
86 |
+
{'entity': 'I-ORG', 'score': 0.9789752, 'index': 28, 'word': 'Powiatowym</w>', 'start': 108, 'end': 118}]
|
87 |
+
```
|
88 |
+
|
89 |
+
|
90 |
|
91 |
### BibTeX entry and citation info
|
92 |
|
|
|
123 |
url = "https://www.aclweb.org/anthology/P17-1178",
|
124 |
doi = "10.18653/v1/P17-1178",
|
125 |
pages = "1946--1958",
|
|
|
126 |
}
|
127 |
```
|