Spaces:

Testys
/

YorubaCNN

Sleeping

App Files Files Community

Testys commited on Aug 25

Commit

2210a0e

•

1 Parent(s): 1549ba5

Update main.py

Browse files

Files changed (1) hide show

main.py +40 -30

main.py CHANGED Viewed

@@ -36,40 +36,50 @@ sentiment_model = SentimentCNNModel(
 sentiment_model.load_state_dict(torch.load(sentiment_model_name, map_location=torch.device('cpu')))
 sentiment_model.eval()
-def analyze_text(text):
-    # Tokenize input text for NER
-    ner_inputs = ner_tokenizer(text, return_tensors="pt")
-    input_ids = ner_inputs['input_ids']
-    # Converting token IDs back to tokens
-    tokens = [ner_tokenizer.convert_ids_to_tokens(id) for id in input_ids.squeeze().tolist()]
-    # Perform Named Entity Recognition
-    with torch.no_grad():
-        ner_outputs = ner_model(**ner_inputs)
-    ner_predictions = torch.argmax(ner_outputs, dim=-1)[0]
-    ner_labels = ner_predictions.tolist()
-    ner_labels = [ner_config["id2labels"][str(label)] for label in ner_labels]
-    #matching the tokens with the labels
-    ner_labels = [f"{token}: {label}" for token, label in zip(tokens, ner_labels)]
-    # Tokenize input text for sentiment analysis
-    sentiment_inputs = sentiment_tokenizer(text, return_tensors="pt")
-    # Perform sentiment analysis
-    with torch.no_grad():
-        sentiment_outputs = sentiment_model(**sentiment_inputs)
-    sentiment_probabilities = torch.argmax(sentiment_outputs, dim=1)
-    sentiment_scores = sentiment_probabilities.tolist()
-    sentiment_id = sentiment_scores[0]
-    sentiment = sentiment_config["id2label"][str(sentiment_id)]
-    return ner_labels, sentiment
 def main():
     st.set_page_config(page_title="YorubaCNN for NER and Sentiment Analysis", layout="wide")
@@ -139,4 +149,4 @@ def main():
         """, unsafe_allow_html=True)
 if __name__ == "__main__":
-    main()

 sentiment_model.load_state_dict(torch.load(sentiment_model_name, map_location=torch.device('cpu')))
 sentiment_model.eval()
+def analyze_text(text, window_size=512, stride=256):
+    # Initialize results
+    all_ner_labels = []
+    all_sentiments = []
+    # Process text in windows
+    for i in range(0, len(text), stride):
+        window = text[i:i+window_size]
+        # Tokenize input text for NER
+        ner_inputs = ner_tokenizer(window, return_tensors="pt", truncation=True, padding=True, max_length=window_size)
+        input_ids = ner_inputs['input_ids']
+        tokens = [ner_tokenizer.convert_ids_to_tokens(id) for id in input_ids.squeeze().tolist()]
+        # Perform Named Entity Recognition
+        with torch.no_grad():
+            ner_outputs = ner_model(**ner_inputs)
+        ner_predictions = torch.argmax(ner_outputs, dim=-1)[0]
+        ner_labels = ner_predictions.tolist()
+        ner_labels = [ner_config["id2labels"][str(label)] for label in ner_labels]
+        ner_labels = [f"{token}: {label}" for token, label in zip(tokens, ner_labels)]
+        all_ner_labels.extend(ner_labels)  # Adjust logic to merge overlapping windows appropriately
+        # Tokenize input text for sentiment analysis
+        sentiment_inputs = sentiment_tokenizer(window, return_tensors="pt", truncation=True, padding=True, max_length=window_size)
+        # Perform sentiment analysis
+        with torch.no_grad():
+            sentiment_outputs = sentiment_model(**sentiment_inputs)
+        sentiment_probabilities = torch.argmax(sentiment_outputs, dim=1)
+        sentiment_scores = sentiment_probabilities.tolist()
+        sentiment_id = sentiment_scores[0]
+        sentiment = sentiment_config["id2label"][str(sentiment_id)]
+        all_sentiments.append(sentiment)  # This needs logic to combine sentiment over windows
+    # For simplicity, aggregate sentiments by majority vote
+    from collections import Counter
+    sentiment_counts = Counter(all_sentiments)
+    final_sentiment = sentiment_counts.most_common(1)[0][0]
+    return all_ner_labels, final_sentiment
 def main():
     st.set_page_config(page_title="YorubaCNN for NER and Sentiment Analysis", layout="wide")
         """, unsafe_allow_html=True)
 if __name__ == "__main__":
+    main()