emanuelaboros commited on
Commit
5d335a2
·
1 Parent(s): 198304b

add comp back

Browse files
Files changed (1) hide show
  1. generic_ner.py +16 -32
generic_ner.py CHANGED
@@ -263,46 +263,30 @@ def conflicting_context(comp_entity, target_entity):
263
  def extract_name_from_text(text, partial_name):
264
  """
265
  Extracts the full name from the entity's text based on the partial name.
266
- This function starts from the partial name, checks backward for the last capitalized word,
267
- and includes all words from that point up to the partial name, including any lowercase words in between.
268
  """
 
 
 
269
 
270
- print("text", text, "partial_name", partial_name)
 
 
 
 
271
 
272
- # Find the position of the partial name in the text
273
- name_start_idx = text.find(partial_name)
 
 
 
274
 
275
- print("textsss", text[:name_start_idx])
276
-
277
- if name_start_idx != -1:
278
- # Split the text into words
279
- words = text.split()
280
-
281
- # Find the position of the partial name in the word list
282
- for i, word in enumerate(words):
283
- if partial_name in word:
284
- # Initialize full name with the partial name
285
- full_name = [word]
286
-
287
- # Check previous words to find the first capitalized word (include all in between)
288
- j = i - 1
289
- while j >= 0:
290
- # As soon as we find a capitalized word, stop going backward
291
- if words[j][0].isupper():
292
- full_name.insert(0, words[j])
293
- else:
294
- # Continue adding lowercase words like titles (e.g., 'chancelier')
295
- full_name.insert(0, words[j])
296
- j -= 1
297
-
298
- return " ".join(
299
- full_name
300
- ).strip() # Join the words to form the full name
301
 
302
  # If not found, return the original text (as a fallback)
303
  return text.strip()
304
 
305
-
306
  def repair_names_in_entities(entities):
307
  """
308
  This function repairs the names in the entities by extracting the full name
 
263
  def extract_name_from_text(text, partial_name):
264
  """
265
  Extracts the full name from the entity's text based on the partial name.
266
+ This function assumes that the full name starts with capitalized letters and goes backward
267
+ to include all capitalized words leading up to the partial name.
268
  """
269
+ # Split the text and partial name into words
270
+ words = text.split()
271
+ partial_words = partial_name.split()
272
 
273
+ # Find the position of the partial name in the word list
274
+ for i, word in enumerate(words):
275
+ if words[i:i + len(partial_words)] == partial_words:
276
+ # Initialize full name with the partial name
277
+ full_name = partial_words[:]
278
 
279
+ # Check previous words and only add capitalized words (skip lowercase words)
280
+ j = i - 1
281
+ while j >= 0 and words[j][0].isupper():
282
+ full_name.insert(0, words[j])
283
+ j -= 1
284
 
285
+ return ' '.join(full_name).strip() # Join the words to form the full name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
 
287
  # If not found, return the original text (as a fallback)
288
  return text.strip()
289
 
 
290
  def repair_names_in_entities(entities):
291
  """
292
  This function repairs the names in the entities by extracting the full name