Commit
·
5d335a2
1
Parent(s):
198304b
add comp back
Browse files- generic_ner.py +16 -32
generic_ner.py
CHANGED
@@ -263,46 +263,30 @@ def conflicting_context(comp_entity, target_entity):
|
|
263 |
def extract_name_from_text(text, partial_name):
|
264 |
"""
|
265 |
Extracts the full name from the entity's text based on the partial name.
|
266 |
-
This function
|
267 |
-
|
268 |
"""
|
|
|
|
|
|
|
269 |
|
270 |
-
|
|
|
|
|
|
|
|
|
271 |
|
272 |
-
|
273 |
-
|
|
|
|
|
|
|
274 |
|
275 |
-
|
276 |
-
|
277 |
-
if name_start_idx != -1:
|
278 |
-
# Split the text into words
|
279 |
-
words = text.split()
|
280 |
-
|
281 |
-
# Find the position of the partial name in the word list
|
282 |
-
for i, word in enumerate(words):
|
283 |
-
if partial_name in word:
|
284 |
-
# Initialize full name with the partial name
|
285 |
-
full_name = [word]
|
286 |
-
|
287 |
-
# Check previous words to find the first capitalized word (include all in between)
|
288 |
-
j = i - 1
|
289 |
-
while j >= 0:
|
290 |
-
# As soon as we find a capitalized word, stop going backward
|
291 |
-
if words[j][0].isupper():
|
292 |
-
full_name.insert(0, words[j])
|
293 |
-
else:
|
294 |
-
# Continue adding lowercase words like titles (e.g., 'chancelier')
|
295 |
-
full_name.insert(0, words[j])
|
296 |
-
j -= 1
|
297 |
-
|
298 |
-
return " ".join(
|
299 |
-
full_name
|
300 |
-
).strip() # Join the words to form the full name
|
301 |
|
302 |
# If not found, return the original text (as a fallback)
|
303 |
return text.strip()
|
304 |
|
305 |
-
|
306 |
def repair_names_in_entities(entities):
|
307 |
"""
|
308 |
This function repairs the names in the entities by extracting the full name
|
|
|
263 |
def extract_name_from_text(text, partial_name):
|
264 |
"""
|
265 |
Extracts the full name from the entity's text based on the partial name.
|
266 |
+
This function assumes that the full name starts with capitalized letters and goes backward
|
267 |
+
to include all capitalized words leading up to the partial name.
|
268 |
"""
|
269 |
+
# Split the text and partial name into words
|
270 |
+
words = text.split()
|
271 |
+
partial_words = partial_name.split()
|
272 |
|
273 |
+
# Find the position of the partial name in the word list
|
274 |
+
for i, word in enumerate(words):
|
275 |
+
if words[i:i + len(partial_words)] == partial_words:
|
276 |
+
# Initialize full name with the partial name
|
277 |
+
full_name = partial_words[:]
|
278 |
|
279 |
+
# Check previous words and only add capitalized words (skip lowercase words)
|
280 |
+
j = i - 1
|
281 |
+
while j >= 0 and words[j][0].isupper():
|
282 |
+
full_name.insert(0, words[j])
|
283 |
+
j -= 1
|
284 |
|
285 |
+
return ' '.join(full_name).strip() # Join the words to form the full name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
286 |
|
287 |
# If not found, return the original text (as a fallback)
|
288 |
return text.strip()
|
289 |
|
|
|
290 |
def repair_names_in_entities(entities):
|
291 |
"""
|
292 |
This function repairs the names in the entities by extracting the full name
|