szili2011 commited on
Commit
a61bd5c
·
verified ·
1 Parent(s): 298e502

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -3,11 +3,14 @@ import torch.nn as nn
3
  import json
4
  import gradio as gr
5
 
6
- # --- Step 1: Load the vocabularies ---
7
- with open('char_to_int.json', 'r') as f:
 
8
  char_to_int = json.load(f)
9
- with open('int_to_lang.json', 'r') as f:
10
- # ROBUSTNESS FIX: Convert JSON string keys ("0", "1") to integer keys (0, 1)
 
 
11
  int_to_lang = {int(k): v for k, v in json.load(f).items()}
12
 
13
  # --- Step 2: Re-define the Model Architecture ---
@@ -18,7 +21,7 @@ class CodeClassifierRNN(nn.Module):
18
  self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx)
19
  self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers, bidirectional=bidirectional, dropout=dropout if n_layers > 1 else 0, batch_first=True)
20
  self.dropout = nn.Dropout(dropout)
21
- self.fc = nn.Linear(hidden_dim * 2, output_dim)
22
  def forward(self, text):
23
  embedded = self.embedding(text)
24
  _, (hidden, _) = self.lstm(embedded)
@@ -55,7 +58,7 @@ def classify_code(code_snippet):
55
  probabilities = torch.softmax(prediction, dim=1)
56
  top5_probs, top5_indices = torch.topk(probabilities, 5)
57
 
58
- # ROBUSTNESS FIX: Simplified lookup using integer keys
59
  confidences = {int_to_lang[idx.item()]: prob.item() for idx, prob in zip(top5_indices[0], top5_probs[0])}
60
 
61
  return confidences
 
3
  import json
4
  import gradio as gr
5
 
6
+ # --- Step 1: Load the vocabularies CORRECTLY ---
7
+
8
+ with open('char_to_int.json', 'r', encoding='utf-8') as f:
9
  char_to_int = json.load(f)
10
+
11
+ # This loads your ACTUAL int_to_lang.json file: {"0": "C#", "1": "C++", ...}
12
+ # The FIX is to correctly convert the string keys "0", "1", etc., to integer keys 0, 1, etc.
13
+ with open('int_to_lang.json', 'r', encoding='utf-8') as f:
14
  int_to_lang = {int(k): v for k, v in json.load(f).items()}
15
 
16
  # --- Step 2: Re-define the Model Architecture ---
 
21
  self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx)
22
  self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers, bidirectional=bidirectional, dropout=dropout if n_layers > 1 else 0, batch_first=True)
23
  self.dropout = nn.Dropout(dropout)
24
+ self.fc = nn.Linear(hidden_dim * 2, output_dim) # * 2 for bidirectional
25
  def forward(self, text):
26
  embedded = self.embedding(text)
27
  _, (hidden, _) = self.lstm(embedded)
 
58
  probabilities = torch.softmax(prediction, dim=1)
59
  top5_probs, top5_indices = torch.topk(probabilities, 5)
60
 
61
+ # This lookup is now guaranteed to work with the correctly loaded dictionary.
62
  confidences = {int_to_lang[idx.item()]: prob.item() for idx, prob in zip(top5_indices[0], top5_probs[0])}
63
 
64
  return confidences