emanuelaboros commited on
Commit
862237e
·
1 Parent(s): 95747e4

add comp back

Browse files
Files changed (1) hide show
  1. generic_ner.py +18 -26
generic_ner.py CHANGED
@@ -297,29 +297,30 @@ def attach_comp_to_closest(entities):
297
  closest_entity = None
298
  min_distance = float("inf")
299
 
300
- # Find the closest non-"comp.function" entity that is valid for attaching
301
  for other_entity in other_entities:
302
- distance = abs(
303
- comp_entity["lOffset"] - other_entity["rOffset"]
304
- ) # Calculate the distance
 
 
 
 
305
 
306
- # Ensure that the other entity's type is valid for the attachment
307
  if (
308
  distance < min_distance
309
  and other_entity["type"].split(".")[0] in valid_entity_types
310
  ):
311
- # Additional context check: Ensure the comp_entity's text doesn't conflict with the target entity's text
312
- if not conflicting_context(comp_entity, other_entity):
313
- min_distance = distance
314
- closest_entity = other_entity
315
 
316
- # Attach the "comp.function" entity using the suffix of 'entity' field if a valid entity is found
317
  if closest_entity:
318
- # Extract the suffix (e.g., "comp.title" becomes "title")
319
- suffix = comp_entity["type"].split(".")[-1]
320
- closest_entity[suffix] = comp_entity[
321
- "surface"
322
- ] # Attach the text using the suffix as the key
323
 
324
  return other_entities
325
 
@@ -329,21 +330,12 @@ def conflicting_context(comp_entity, target_entity):
329
  Determines if there is a conflict between the comp_entity and the target entity.
330
  Prevents incorrect name and function attachments by using a rule-based approach.
331
  """
332
- comp_text = comp_entity["surface"].lower()
333
- target_text = target_entity["surface"].lower()
334
-
335
- # Case 1: Check if the comp.name is already part of the entity's text.
336
- # if "pers" in target_entity["type"] and comp_entity["type"].startswith("comp.name"):
337
- # # Avoid attaching a name if it's already part of the entity's surface text.
338
- # if comp_text in target_text:
339
- # return True # Conflict: Name is already part of the target entity's text
340
-
341
- # Case 2: Check for correct function attachment to person or organization entities
342
  if comp_entity["type"].startswith("comp.function"):
343
  if not ("pers" in target_entity["type"] or "org" in target_entity["type"]):
344
  return True # Conflict: Function should only attach to persons or organizations
345
 
346
- # Case 3: Avoid attaching comp.* entities to non-person, non-organization types (like locations)
347
  if "loc" in target_entity["type"]:
348
  return True # Conflict: comp.* entities should not attach to locations or similar types
349
 
 
297
  closest_entity = None
298
  min_distance = float("inf")
299
 
300
+ # Find the closest non-"comp" entity that is valid for attaching
301
  for other_entity in other_entities:
302
+ # Calculate distance between the comp entity and the other entity
303
+ if comp_entity["lOffset"] > other_entity["rOffset"]:
304
+ distance = comp_entity["lOffset"] - other_entity["rOffset"]
305
+ elif comp_entity["rOffset"] < other_entity["lOffset"]:
306
+ distance = other_entity["lOffset"] - comp_entity["rOffset"]
307
+ else:
308
+ distance = 0 # They overlap or touch
309
 
310
+ # Ensure the entity type is valid and check for minimal distance
311
  if (
312
  distance < min_distance
313
  and other_entity["type"].split(".")[0] in valid_entity_types
314
  ):
315
+ min_distance = distance
316
+ closest_entity = other_entity
 
 
317
 
318
+ # Attach the "comp.function" or "comp.name" if a valid entity is found
319
  if closest_entity:
320
+ suffix = comp_entity["type"].split(".")[
321
+ -1
322
+ ] # Extract the suffix (e.g., 'name', 'function')
323
+ closest_entity[suffix] = comp_entity["surface"] # Attach the text
 
324
 
325
  return other_entities
326
 
 
330
  Determines if there is a conflict between the comp_entity and the target entity.
331
  Prevents incorrect name and function attachments by using a rule-based approach.
332
  """
333
+ # Case 1: Check for correct function attachment to person or organization entities
 
 
 
 
 
 
 
 
 
334
  if comp_entity["type"].startswith("comp.function"):
335
  if not ("pers" in target_entity["type"] or "org" in target_entity["type"]):
336
  return True # Conflict: Function should only attach to persons or organizations
337
 
338
+ # Case 2: Avoid attaching comp.* entities to non-person, non-organization types (like locations)
339
  if "loc" in target_entity["type"]:
340
  return True # Conflict: comp.* entities should not attach to locations or similar types
341