Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,15 +3,44 @@ import torch.nn as nn
|
|
3 |
import json
|
4 |
import gradio as gr
|
5 |
|
6 |
-
# --- Step 1:
|
|
|
7 |
|
8 |
-
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
-
# This loads your ACTUAL int_to_lang.json file: {"0": "C#", "1": "C++", ...}
|
12 |
-
# The FIX is to correctly convert the string keys "0", "1", etc., to integer keys 0, 1, etc.
|
13 |
-
with open('int_to_lang.json', 'r', encoding='utf-8') as f:
|
14 |
-
int_to_lang = {int(k): v for k, v in json.load(f).items()}
|
15 |
|
16 |
# --- Step 2: Re-define the Model Architecture ---
|
17 |
# This MUST be the exact same architecture as the one you trained.
|
@@ -58,7 +87,7 @@ def classify_code(code_snippet):
|
|
58 |
probabilities = torch.softmax(prediction, dim=1)
|
59 |
top5_probs, top5_indices = torch.topk(probabilities, 5)
|
60 |
|
61 |
-
# This lookup
|
62 |
confidences = {int_to_lang[idx.item()]: prob.item() for idx, prob in zip(top5_indices[0], top5_probs[0])}
|
63 |
|
64 |
return confidences
|
|
|
3 |
import json
|
4 |
import gradio as gr
|
5 |
|
6 |
+
# --- Step 1: Create a "Smart" Vocabulary Loader ---
|
7 |
+
# This function will load the vocabularies and automatically fix any format mismatches.
|
8 |
|
9 |
+
def load_vocabularies():
|
10 |
+
"""
|
11 |
+
Loads vocabularies and intelligently determines the correct format,
|
12 |
+
preventing crashes due to misnamed files.
|
13 |
+
"""
|
14 |
+
with open('char_to_int.json', 'r', encoding='utf-8') as f:
|
15 |
+
char_to_int_map = json.load(f)
|
16 |
+
|
17 |
+
# Load the file the user has named 'int_to_lang.json'.
|
18 |
+
with open('int_to_lang.json', 'r', encoding='utf-8') as f:
|
19 |
+
language_vocab = json.load(f)
|
20 |
+
|
21 |
+
# Get the first key to check the format (e.g., is it "0" or "C#")
|
22 |
+
first_key = next(iter(language_vocab))
|
23 |
+
|
24 |
+
int_to_lang_map = {}
|
25 |
+
|
26 |
+
try:
|
27 |
+
# Try to convert the first key to an integer.
|
28 |
+
int(first_key)
|
29 |
+
# If this SUCCEEDS, the file is in the correct {"0": "Language"} format.
|
30 |
+
print("[INFO] Detected int->lang format. Loading directly.")
|
31 |
+
int_to_lang_map = {int(k): v for k, v in language_vocab.items()}
|
32 |
+
|
33 |
+
except ValueError:
|
34 |
+
# If this FAILS, the file is in the {"Language": 0} format.
|
35 |
+
# We must reverse it to create the correct int->lang map.
|
36 |
+
print("[INFO] Detected lang->int format. Reversing dictionary to fix.")
|
37 |
+
int_to_lang_map = {v: k for k, v in language_vocab.items()}
|
38 |
+
|
39 |
+
return char_to_int_map, int_to_lang_map
|
40 |
+
|
41 |
+
# Load the vocabularies using our smart function
|
42 |
+
char_to_int, int_to_lang = load_vocabularies()
|
43 |
|
|
|
|
|
|
|
|
|
44 |
|
45 |
# --- Step 2: Re-define the Model Architecture ---
|
46 |
# This MUST be the exact same architecture as the one you trained.
|
|
|
87 |
probabilities = torch.softmax(prediction, dim=1)
|
88 |
top5_probs, top5_indices = torch.topk(probabilities, 5)
|
89 |
|
90 |
+
# This lookup will now work regardless of the original file format.
|
91 |
confidences = {int_to_lang[idx.item()]: prob.item() for idx, prob in zip(top5_indices[0], top5_probs[0])}
|
92 |
|
93 |
return confidences
|