Spaces:

adechandra1987
/

eyebotIndoBert

Running

App Files Files Community

eyebotIndoBert / app.py

adechandra1987

Upload 20 files

403d36c verified 13 days ago

raw

history blame contribute delete

5.64 kB

	from flask import Flask, render_template, request, jsonify
	from chat_indobert import get_response
	import json
	import torch
	import numpy as np
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	from torch.utils.data import Dataset, DataLoader
	from sklearn.metrics import precision_recall_fscore_support, classification_report

	# Membuat instance dari Flask
	app = Flask(__name__)

	# Load dataset untuk mendapatkan daftar tags (intents)
	with open('datasets_new.json', 'r') as f:
	datasets = json.load(f)

	tags = sorted(set(dataset['tag'] for dataset in datasets['intents']))

	# Load dataset uji mandiri dari datasets_uji_mandiri.json
	with open('datasets_uji_mandiri.json', 'r') as f:
	test_data = json.load(f)

	# Ekstrak teks dan intent yang benar dari datasets_uji_mandiri.json
	texts_test = [item['text'] for item in test_data]
	true_intents = [item['true_intent'] for item in test_data]

	# Konversi intent ke indeks label berdasarkan tags
	labels_test = [tags.index(intent) for intent in true_intents if intent in tags]

	# Periksa apakah ada intent yang tidak ada dalam tags
	missing_intents = set(true_intents) - set(tags)
	if missing_intents:
	print(f"Peringatan: Intent {missing_intents} tidak ditemukan dalam tags. Pastikan datasets_new.json mencakup semua intent.")

	# Load IndoBERT model dan tokenizer
	tokenizer = AutoTokenizer.from_pretrained("indobert_model")
	model = AutoModelForSequenceClassification.from_pretrained("indobert_model")

	# Gunakan GPU kalau ada
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	model.to(device)
	model.eval()

	# Dataset class untuk evaluasi
	class IndoBERTDataset(Dataset):
	def __init__(self, texts, labels):
	self.texts = texts
	self.labels = labels

	def __getitem__(self, idx):
	encoding = tokenizer(self.texts[idx], padding='max_length', truncation=True, max_length=20, return_tensors='pt')
	return {
	'input_ids': encoding['input_ids'].squeeze(),
	'attention_mask': encoding['attention_mask'].squeeze(),
	'labels': torch.tensor(self.labels[idx], dtype=torch.long)
	}

	def __len__(self):
	return len(self.texts)

	# Buat dataset dan dataloader untuk data uji mandiri
	test_dataset = IndoBERTDataset(texts_test, labels_test)
	test_loader = DataLoader(test_dataset, batch_size=8)

	# Evaluasi model pada data uji mandiri
	test_correct = 0
	test_total = 0
	test_preds = []
	test_labels = []
	with torch.no_grad():
	for batch in test_loader:
	inputs = {k: v.to(device) for k, v in batch.items() if k != 'labels'}
	labels = batch['labels'].to(device)
	outputs = model(**inputs, labels=labels)
	_, predicted = torch.max(outputs.logits, 1)
	test_total += labels.size(0)
	test_correct += (predicted == labels).sum().item()
	test_preds.extend(predicted.cpu().numpy())
	test_labels.extend(labels.cpu().numpy())

	# Hitung metrik evaluasi
	test_accuracy = 100 * test_correct / test_total
	test_precision, test_recall, test_f1, _ = precision_recall_fscore_support(test_labels, test_preds, average='weighted', zero_division=0)

	# Laporan klasifikasi untuk data uji
	unique_test_labels = sorted(set(test_labels))
	filtered_test_tags = [tags[i] for i in unique_test_labels]
	classification_report_str = classification_report(test_labels, test_preds, labels=unique_test_labels, target_names=filtered_test_tags, zero_division=0)

	# Simpan hasil evaluasi dalam dictionary
	evaluation_results = {
	'accuracy': test_accuracy,
	'precision': test_precision,
	'recall': test_recall,
	'f1_score': test_f1,
	'classification_report': classification_report_str
	}

	# Mendefinisikan rute untuk halaman utama dengan metode GET
	@app.get("/")
	def index_get():
	return render_template("base.html")

	# Mendefinisikan rute untuk memproses permintaan prediksi dengan metode POST
	@app.post("/predict")
	def predict():
	text = request.get_json().get("message")
	if not text:
	return jsonify({"answer": "Mohon masukkan pesan."})

	# Tokenisasi input pengguna
	encoding = tokenizer(text, padding='max_length', truncation=True, max_length=20, return_tensors='pt')
	input_ids = encoding['input_ids'].to(device)
	attention_mask = encoding['attention_mask'].to(device)

	# Dapatkan output dari model
	with torch.no_grad():
	outputs = model(input_ids, attention_mask=attention_mask)
	logits = outputs.logits
	probabilities = torch.softmax(logits, dim=1).cpu().numpy()[0]

	# Temukan tag yang diprediksi
	predicted_index = np.argmax(probabilities)
	predicted_tag = tags[predicted_index]

	# Ambil respons dari dataset berdasarkan tag yang diprediksi
	for intent in datasets['intents']:
	if intent['tag'] == predicted_tag:
	response = intent['responses'][0] # Ambil respons pertama
	break
	else:
	response = "Maaf, saya tidak mengerti pertanyaan Anda."

	# Siapkan data respons dengan probabilitas untuk semua tag
	message = {
	"answer": response,
	"predicted_tag": predicted_tag,
	"probabilities": {tag: float(prob) for tag, prob in zip(tags, probabilities)}
	}
	return jsonify(message)

	# Mendefinisikan rute untuk menampilkan hasil akurasi
	@app.get("/accuracy")
	def show_accuracy():
	return render_template("accuracy.html", evaluation=evaluation_results)

	# Menjalankan aplikasi Flask dalam mode debug
	if __name__ == "__main__":
	app.run(debug=True)