Spaces:

Remsky
/

triplex-knowledge-graph-visualizer

Runtime error

App Files Files Community

Remsky commited on Jul 28, 2024

Commit

25b0fd7

verified ·

1 Parent(s): d0f6106

Update lib/graph_extract.py

Browse files

Formatting, prompt adherence, exception handling, flash attn attempt

Files changed (1) hide show

lib/graph_extract.py +25 -18

lib/graph_extract.py CHANGED Viewed

@@ -43,7 +43,7 @@ model = AutoModelForCausalLM.from_pretrained(
 tokenizer = AutoTokenizer.from_pretrained(
     "sciphi/triplex",
     trust_remote_code=True,
-    attn_implementation="flash_attention_2",
         torch_dtype=torch.bfloat16,
     )
@@ -59,10 +59,13 @@ generation_config.pad_token_id = tokenizer.eos_token_id
 @spaces.GPU
 def triplextract(text, entity_types, predicates):
     input_format = """Perform Named Entity Recognition (NER) and extract knowledge graph triplets from the text. NER identifies named entities of given entity types, and triple extraction identifies relationships between entities using specified predicates. Return the result as a JSON object with an "entities_and_triples" key containing an array of entities and triples.
         **Entity Types:**
         {entity_types}
         **Predicates:**
         {predicates}
         **Text:**
         {text}
         """
@@ -103,7 +106,7 @@ def triplextract(text, entity_types, predicates):
         return "Error: CUDA out of memory."
     except Exception as e:
         print(f"Error in generation: {e}")
-        return f"Error in generation: {str(e)}"
 def parse_triples(prediction):
     entities = {}
@@ -125,20 +128,24 @@ def parse_triples(prediction):
     for item in items:
         if isinstance(item, str):
-            if ":" in item:
-                id, entity = item.split(",", 1)
-                id = id.strip("[]").strip()
-                entity_type, entity_value = entity.split(":", 1)
-                entities[id] = {
-                    "type": entity_type.strip(),
-                    "value": entity_value.strip(),
-                }
-            else:
-                parts = item.split()
-                if len(parts) >= 3:
-                    source = parts[0].strip("[]")
-                    relation = " ".join(parts[1:-1])
-                    target = parts[-1].strip("[]")
-                    relationships.append((source, relation.strip(), target))
     return entities, relationships

 tokenizer = AutoTokenizer.from_pretrained(
     "sciphi/triplex",
     trust_remote_code=True,
+    attn_implementation="flash_attention_2" if flash_attn_installed else None,
         torch_dtype=torch.bfloat16,
     )
 @spaces.GPU
 def triplextract(text, entity_types, predicates):
     input_format = """Perform Named Entity Recognition (NER) and extract knowledge graph triplets from the text. NER identifies named entities of given entity types, and triple extraction identifies relationships between entities using specified predicates. Return the result as a JSON object with an "entities_and_triples" key containing an array of entities and triples.
         **Entity Types:**
         {entity_types}
         **Predicates:**
         {predicates}
         **Text:**
         {text}
         """
         return "Error: CUDA out of memory."
     except Exception as e:
         print(f"Error in generation: {e}")
+        return f"Error in generation, please try again: {str(e)}"
 def parse_triples(prediction):
     entities = {}
     for item in items:
         if isinstance(item, str):
+            try:
+                if ":" in item:
+                    id, entity = item.split(",", 1)
+                    id = id.strip("[]").strip()
+                    entity_type, entity_value = entity.split(":", 1)
+                    entities[id] = {
+                        "type": entity_type.strip(),
+                        "value": entity_value.strip(),
+                    }
+                else:
+                    parts = item.split()
+                    if len(parts) >= 3:
+                        source = parts[0].strip("[]")
+                        relation = " ".join(parts[1:-1])
+                        target = parts[-1].strip("[]")
+                        relationships.append((source, relation.strip(), target))
+            except Exception as e:
+                # TODO: Handle gracefully
+                print(f"Error in processing: {item}: {e}")
     return entities, relationships