Spaces:
Running
Running
Upload app.py
Browse files
app.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import torch
|
3 |
+
import torch.nn as nn
|
4 |
+
from transformers import AutoTokenizer, AutoModel
|
5 |
+
import gradio as gr
|
6 |
+
|
7 |
+
class CustomTinyBERTClassifier(nn.Module):
|
8 |
+
def __init__(self, model_name='huawei-noah/TinyBERT_General_4L_312D', extra_feat_dim=4, num_labels=2):
|
9 |
+
super().__init__()
|
10 |
+
self.bert = AutoModel.from_pretrained(model_name)
|
11 |
+
self.dropout = nn.Dropout(0.3)
|
12 |
+
hidden_size = self.bert.config.hidden_size
|
13 |
+
self.classifier = nn.Linear(hidden_size + extra_feat_dim, num_labels)
|
14 |
+
|
15 |
+
def forward(self, input_ids, attention_mask, additional_features):
|
16 |
+
outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
|
17 |
+
cls_output = outputs.last_hidden_state[:, 0]
|
18 |
+
combined = torch.cat((cls_output, additional_features), dim=1)
|
19 |
+
combined = self.dropout(combined)
|
20 |
+
logits = self.classifier(combined)
|
21 |
+
return logits
|
22 |
+
|
23 |
+
model_name = "huawei-noah/TinyBERT_General_4L_312D"
|
24 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
25 |
+
|
26 |
+
model = CustomTinyBERTClassifier(model_name=model_name, extra_feat_dim=4, num_labels=2)
|
27 |
+
model.load_state_dict(torch.load("custom_fake_job_model.pt", map_location=torch.device("cpu")))
|
28 |
+
model.eval()
|
29 |
+
|
30 |
+
def predict_job(text, telecommuting, has_logo, has_questions, employment_type):
|
31 |
+
encoded = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
|
32 |
+
input_ids = encoded["input_ids"]
|
33 |
+
attention_mask = encoded["attention_mask"]
|
34 |
+
additional_features = torch.tensor([[telecommuting, has_logo, has_questions, employment_type]], dtype=torch.float32)
|
35 |
+
|
36 |
+
with torch.no_grad():
|
37 |
+
logits = model(input_ids=input_ids, attention_mask=attention_mask, additional_features=additional_features)
|
38 |
+
probs = torch.softmax(logits, dim=1)
|
39 |
+
pred = torch.argmax(probs, dim=1).item()
|
40 |
+
confidence = probs[0][pred].item()
|
41 |
+
|
42 |
+
label = "🚨 Fake Job" if pred == 1 else "✅ Legit Job"
|
43 |
+
result = (
|
44 |
+
f"{label} (Confidence: {confidence:.2f})\n"
|
45 |
+
f"Probabilities - Legit: {probs[0][0]:.3f}, Fake: {probs[0][1]:.3f}\n"
|
46 |
+
f"Raw logits: {logits.squeeze().tolist()}"
|
47 |
+
)
|
48 |
+
return result
|
49 |
+
|
50 |
+
demo = gr.Interface(
|
51 |
+
fn=predict_job,
|
52 |
+
inputs=[
|
53 |
+
gr.Textbox(lines=5, label="Job Description"),
|
54 |
+
gr.Slider(0, 1, step=1, label="Telecommuting (0 = No, 1 = Yes)"),
|
55 |
+
gr.Slider(0, 1, step=1, label="Has Company Logo (0 = No, 1 = Yes)"),
|
56 |
+
gr.Slider(0, 1, step=1, label="Has Questions (0 = No, 1 = Yes)"),
|
57 |
+
gr.Slider(0, 5, step=1, label="Employment Type (0–5)")
|
58 |
+
],
|
59 |
+
outputs=gr.Textbox(label="Prediction"),
|
60 |
+
title="Fake Job Detector",
|
61 |
+
description="Detects fake job postings using TinyBERT and additional features"
|
62 |
+
)
|
63 |
+
|
64 |
+
demo.launch()
|