Commit
·
862237e
1
Parent(s):
95747e4
add comp back
Browse files- generic_ner.py +18 -26
generic_ner.py
CHANGED
@@ -297,29 +297,30 @@ def attach_comp_to_closest(entities):
|
|
297 |
closest_entity = None
|
298 |
min_distance = float("inf")
|
299 |
|
300 |
-
# Find the closest non-"comp
|
301 |
for other_entity in other_entities:
|
302 |
-
distance
|
303 |
-
|
304 |
-
|
|
|
|
|
|
|
|
|
305 |
|
306 |
-
# Ensure
|
307 |
if (
|
308 |
distance < min_distance
|
309 |
and other_entity["type"].split(".")[0] in valid_entity_types
|
310 |
):
|
311 |
-
|
312 |
-
|
313 |
-
min_distance = distance
|
314 |
-
closest_entity = other_entity
|
315 |
|
316 |
-
# Attach the "comp.function"
|
317 |
if closest_entity:
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
] # Attach the text using the suffix as the key
|
323 |
|
324 |
return other_entities
|
325 |
|
@@ -329,21 +330,12 @@ def conflicting_context(comp_entity, target_entity):
|
|
329 |
Determines if there is a conflict between the comp_entity and the target entity.
|
330 |
Prevents incorrect name and function attachments by using a rule-based approach.
|
331 |
"""
|
332 |
-
|
333 |
-
target_text = target_entity["surface"].lower()
|
334 |
-
|
335 |
-
# Case 1: Check if the comp.name is already part of the entity's text.
|
336 |
-
# if "pers" in target_entity["type"] and comp_entity["type"].startswith("comp.name"):
|
337 |
-
# # Avoid attaching a name if it's already part of the entity's surface text.
|
338 |
-
# if comp_text in target_text:
|
339 |
-
# return True # Conflict: Name is already part of the target entity's text
|
340 |
-
|
341 |
-
# Case 2: Check for correct function attachment to person or organization entities
|
342 |
if comp_entity["type"].startswith("comp.function"):
|
343 |
if not ("pers" in target_entity["type"] or "org" in target_entity["type"]):
|
344 |
return True # Conflict: Function should only attach to persons or organizations
|
345 |
|
346 |
-
# Case
|
347 |
if "loc" in target_entity["type"]:
|
348 |
return True # Conflict: comp.* entities should not attach to locations or similar types
|
349 |
|
|
|
297 |
closest_entity = None
|
298 |
min_distance = float("inf")
|
299 |
|
300 |
+
# Find the closest non-"comp" entity that is valid for attaching
|
301 |
for other_entity in other_entities:
|
302 |
+
# Calculate distance between the comp entity and the other entity
|
303 |
+
if comp_entity["lOffset"] > other_entity["rOffset"]:
|
304 |
+
distance = comp_entity["lOffset"] - other_entity["rOffset"]
|
305 |
+
elif comp_entity["rOffset"] < other_entity["lOffset"]:
|
306 |
+
distance = other_entity["lOffset"] - comp_entity["rOffset"]
|
307 |
+
else:
|
308 |
+
distance = 0 # They overlap or touch
|
309 |
|
310 |
+
# Ensure the entity type is valid and check for minimal distance
|
311 |
if (
|
312 |
distance < min_distance
|
313 |
and other_entity["type"].split(".")[0] in valid_entity_types
|
314 |
):
|
315 |
+
min_distance = distance
|
316 |
+
closest_entity = other_entity
|
|
|
|
|
317 |
|
318 |
+
# Attach the "comp.function" or "comp.name" if a valid entity is found
|
319 |
if closest_entity:
|
320 |
+
suffix = comp_entity["type"].split(".")[
|
321 |
+
-1
|
322 |
+
] # Extract the suffix (e.g., 'name', 'function')
|
323 |
+
closest_entity[suffix] = comp_entity["surface"] # Attach the text
|
|
|
324 |
|
325 |
return other_entities
|
326 |
|
|
|
330 |
Determines if there is a conflict between the comp_entity and the target entity.
|
331 |
Prevents incorrect name and function attachments by using a rule-based approach.
|
332 |
"""
|
333 |
+
# Case 1: Check for correct function attachment to person or organization entities
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
334 |
if comp_entity["type"].startswith("comp.function"):
|
335 |
if not ("pers" in target_entity["type"] or "org" in target_entity["type"]):
|
336 |
return True # Conflict: Function should only attach to persons or organizations
|
337 |
|
338 |
+
# Case 2: Avoid attaching comp.* entities to non-person, non-organization types (like locations)
|
339 |
if "loc" in target_entity["type"]:
|
340 |
return True # Conflict: comp.* entities should not attach to locations or similar types
|
341 |
|