szili2011 commited on
Commit
0f412e0
·
verified ·
1 Parent(s): a61bd5c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -8
app.py CHANGED
@@ -3,15 +3,44 @@ import torch.nn as nn
3
  import json
4
  import gradio as gr
5
 
6
- # --- Step 1: Load the vocabularies CORRECTLY ---
 
7
 
8
- with open('char_to_int.json', 'r', encoding='utf-8') as f:
9
- char_to_int = json.load(f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- # This loads your ACTUAL int_to_lang.json file: {"0": "C#", "1": "C++", ...}
12
- # The FIX is to correctly convert the string keys "0", "1", etc., to integer keys 0, 1, etc.
13
- with open('int_to_lang.json', 'r', encoding='utf-8') as f:
14
- int_to_lang = {int(k): v for k, v in json.load(f).items()}
15
 
16
  # --- Step 2: Re-define the Model Architecture ---
17
  # This MUST be the exact same architecture as the one you trained.
@@ -58,7 +87,7 @@ def classify_code(code_snippet):
58
  probabilities = torch.softmax(prediction, dim=1)
59
  top5_probs, top5_indices = torch.topk(probabilities, 5)
60
 
61
- # This lookup is now guaranteed to work with the correctly loaded dictionary.
62
  confidences = {int_to_lang[idx.item()]: prob.item() for idx, prob in zip(top5_indices[0], top5_probs[0])}
63
 
64
  return confidences
 
3
  import json
4
  import gradio as gr
5
 
6
+ # --- Step 1: Create a "Smart" Vocabulary Loader ---
7
+ # This function will load the vocabularies and automatically fix any format mismatches.
8
 
9
+ def load_vocabularies():
10
+ """
11
+ Loads vocabularies and intelligently determines the correct format,
12
+ preventing crashes due to misnamed files.
13
+ """
14
+ with open('char_to_int.json', 'r', encoding='utf-8') as f:
15
+ char_to_int_map = json.load(f)
16
+
17
+ # Load the file the user has named 'int_to_lang.json'.
18
+ with open('int_to_lang.json', 'r', encoding='utf-8') as f:
19
+ language_vocab = json.load(f)
20
+
21
+ # Get the first key to check the format (e.g., is it "0" or "C#")
22
+ first_key = next(iter(language_vocab))
23
+
24
+ int_to_lang_map = {}
25
+
26
+ try:
27
+ # Try to convert the first key to an integer.
28
+ int(first_key)
29
+ # If this SUCCEEDS, the file is in the correct {"0": "Language"} format.
30
+ print("[INFO] Detected int->lang format. Loading directly.")
31
+ int_to_lang_map = {int(k): v for k, v in language_vocab.items()}
32
+
33
+ except ValueError:
34
+ # If this FAILS, the file is in the {"Language": 0} format.
35
+ # We must reverse it to create the correct int->lang map.
36
+ print("[INFO] Detected lang->int format. Reversing dictionary to fix.")
37
+ int_to_lang_map = {v: k for k, v in language_vocab.items()}
38
+
39
+ return char_to_int_map, int_to_lang_map
40
+
41
+ # Load the vocabularies using our smart function
42
+ char_to_int, int_to_lang = load_vocabularies()
43
 
 
 
 
 
44
 
45
  # --- Step 2: Re-define the Model Architecture ---
46
  # This MUST be the exact same architecture as the one you trained.
 
87
  probabilities = torch.softmax(prediction, dim=1)
88
  top5_probs, top5_indices = torch.topk(probabilities, 5)
89
 
90
+ # This lookup will now work regardless of the original file format.
91
  confidences = {int_to_lang[idx.item()]: prob.item() for idx, prob in zip(top5_indices[0], top5_probs[0])}
92
 
93
  return confidences