Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,11 +3,14 @@ import torch.nn as nn
|
|
3 |
import json
|
4 |
import gradio as gr
|
5 |
|
6 |
-
# --- Step 1: Load the vocabularies ---
|
7 |
-
|
|
|
8 |
char_to_int = json.load(f)
|
9 |
-
|
10 |
-
|
|
|
|
|
11 |
int_to_lang = {int(k): v for k, v in json.load(f).items()}
|
12 |
|
13 |
# --- Step 2: Re-define the Model Architecture ---
|
@@ -18,7 +21,7 @@ class CodeClassifierRNN(nn.Module):
|
|
18 |
self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx)
|
19 |
self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers, bidirectional=bidirectional, dropout=dropout if n_layers > 1 else 0, batch_first=True)
|
20 |
self.dropout = nn.Dropout(dropout)
|
21 |
-
self.fc = nn.Linear(hidden_dim * 2, output_dim)
|
22 |
def forward(self, text):
|
23 |
embedded = self.embedding(text)
|
24 |
_, (hidden, _) = self.lstm(embedded)
|
@@ -55,7 +58,7 @@ def classify_code(code_snippet):
|
|
55 |
probabilities = torch.softmax(prediction, dim=1)
|
56 |
top5_probs, top5_indices = torch.topk(probabilities, 5)
|
57 |
|
58 |
-
#
|
59 |
confidences = {int_to_lang[idx.item()]: prob.item() for idx, prob in zip(top5_indices[0], top5_probs[0])}
|
60 |
|
61 |
return confidences
|
|
|
3 |
import json
|
4 |
import gradio as gr
|
5 |
|
6 |
+
# --- Step 1: Load the vocabularies CORRECTLY ---
|
7 |
+
|
8 |
+
with open('char_to_int.json', 'r', encoding='utf-8') as f:
|
9 |
char_to_int = json.load(f)
|
10 |
+
|
11 |
+
# This loads your ACTUAL int_to_lang.json file: {"0": "C#", "1": "C++", ...}
|
12 |
+
# The FIX is to correctly convert the string keys "0", "1", etc., to integer keys 0, 1, etc.
|
13 |
+
with open('int_to_lang.json', 'r', encoding='utf-8') as f:
|
14 |
int_to_lang = {int(k): v for k, v in json.load(f).items()}
|
15 |
|
16 |
# --- Step 2: Re-define the Model Architecture ---
|
|
|
21 |
self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx)
|
22 |
self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers, bidirectional=bidirectional, dropout=dropout if n_layers > 1 else 0, batch_first=True)
|
23 |
self.dropout = nn.Dropout(dropout)
|
24 |
+
self.fc = nn.Linear(hidden_dim * 2, output_dim) # * 2 for bidirectional
|
25 |
def forward(self, text):
|
26 |
embedded = self.embedding(text)
|
27 |
_, (hidden, _) = self.lstm(embedded)
|
|
|
58 |
probabilities = torch.softmax(prediction, dim=1)
|
59 |
top5_probs, top5_indices = torch.topk(probabilities, 5)
|
60 |
|
61 |
+
# This lookup is now guaranteed to work with the correctly loaded dictionary.
|
62 |
confidences = {int_to_lang[idx.item()]: prob.item() for idx, prob in zip(top5_indices[0], top5_probs[0])}
|
63 |
|
64 |
return confidences
|