Spaces:

szili2011
/

polyglot-code-classifier

Sleeping

App Files Files Community

szili2011 commited on Jun 9

Commit

0f412e0

verified ·

1 Parent(s): a61bd5c

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -8

app.py CHANGED Viewed

@@ -3,15 +3,44 @@ import torch.nn as nn
 import json
 import gradio as gr
-# --- Step 1: Load the vocabularies CORRECTLY ---
-with open('char_to_int.json', 'r', encoding='utf-8') as f:
-    char_to_int = json.load(f)
-# This loads your ACTUAL int_to_lang.json file: {"0": "C#", "1": "C++", ...}
-# The FIX is to correctly convert the string keys "0", "1", etc., to integer keys 0, 1, etc.
-with open('int_to_lang.json', 'r', encoding='utf-8') as f:
-    int_to_lang = {int(k): v for k, v in json.load(f).items()}
 # --- Step 2: Re-define the Model Architecture ---
 # This MUST be the exact same architecture as the one you trained.
@@ -58,7 +87,7 @@ def classify_code(code_snippet):
     probabilities = torch.softmax(prediction, dim=1)
     top5_probs, top5_indices = torch.topk(probabilities, 5)
-    # This lookup is now guaranteed to work with the correctly loaded dictionary.
     confidences = {int_to_lang[idx.item()]: prob.item() for idx, prob in zip(top5_indices[0], top5_probs[0])}
     return confidences

 import json
 import gradio as gr
+# --- Step 1: Create a "Smart" Vocabulary Loader ---
+# This function will load the vocabularies and automatically fix any format mismatches.
+def load_vocabularies():
+    """
+    Loads vocabularies and intelligently determines the correct format,
+    preventing crashes due to misnamed files.
+    """
+    with open('char_to_int.json', 'r', encoding='utf-8') as f:
+        char_to_int_map = json.load(f)
+    # Load the file the user has named 'int_to_lang.json'.
+    with open('int_to_lang.json', 'r', encoding='utf-8') as f:
+        language_vocab = json.load(f)
+    # Get the first key to check the format (e.g., is it "0" or "C#")
+    first_key = next(iter(language_vocab))
+    int_to_lang_map = {}
+    try:
+        # Try to convert the first key to an integer.
+        int(first_key)
+        # If this SUCCEEDS, the file is in the correct {"0": "Language"} format.
+        print("[INFO] Detected int->lang format. Loading directly.")
+        int_to_lang_map = {int(k): v for k, v in language_vocab.items()}
+    except ValueError:
+        # If this FAILS, the file is in the {"Language": 0} format.
+        # We must reverse it to create the correct int->lang map.
+        print("[INFO] Detected lang->int format. Reversing dictionary to fix.")
+        int_to_lang_map = {v: k for k, v in language_vocab.items()}
+    return char_to_int_map, int_to_lang_map
+# Load the vocabularies using our smart function
+char_to_int, int_to_lang = load_vocabularies()
 # --- Step 2: Re-define the Model Architecture ---
 # This MUST be the exact same architecture as the one you trained.
     probabilities = torch.softmax(prediction, dim=1)
     top5_probs, top5_indices = torch.topk(probabilities, 5)
+    # This lookup will now work regardless of the original file format.
     confidences = {int_to_lang[idx.item()]: prob.item() for idx, prob in zip(top5_indices[0], top5_probs[0])}
     return confidences