tasal9's picture
Add demo.launch call
ce56720 verified
import gradio as gr
import time
import threading
import random
from datetime import datetime
from datasets import load_dataset
import pandas as pd
import spaces
import io
# Global state
class TrainingState:
def __init__(self):
self.status = "idle"
self.progress = 0
self.logs = ["✅ System initialized"]
self.start_time = None
self.model_name = "tasal9/pashto-base-bloom"
self.active_process = None
self.dataset_loaded = False
self.dataset_info = "No dataset loaded"
self.dataset_sample = pd.DataFrame()
def load_dataset(self):
try:
self.logs.append("⏳ Loading dataset: tasal9/ZamAi-Pashto-Datasets-V2")
dataset = load_dataset("tasal9/ZamAi-Pashto-Datasets-V2")
self.dataset_loaded = True
self.dataset_info = f"✅ Dataset loaded!\nName: ZamAi-Pashto-Datasets-V2\nSize: {len(dataset['train'])} examples"
self.dataset_sample = pd.DataFrame(dataset['train'].select(range(5)))
self.logs.append(f"📊 {len(dataset['train'])} Pashto examples loaded")
return True
except Exception as e:
self.logs.append(f"❌ Error loading dataset: {str(e)}")
self.dataset_info = f"Error: {str(e)}"
return False
def load_local_file(self, file):
try:
ext = file.name.split('.')[-1]
if ext == "csv":
df = pd.read_csv(file.name)
elif ext == "json":
df = pd.read_json(file.name)
elif ext == "txt":
df = pd.DataFrame({"text": open(file.name).read().splitlines()})
else:
raise ValueError("Unsupported file format")
self.dataset_sample = df.head(5)
self.dataset_info = f"✅ Local file loaded: {file.name}"
self.dataset_loaded = True
self.logs.append(f"📁 Local dataset loaded: {file.name}")
return True
except Exception as e:
self.dataset_info = f"❌ Error loading file: {str(e)}"
self.logs.append(self.dataset_info)
return False
def start_training(self, size):
self.status = "training"
self.progress = 0
self.logs = [f"🏋️ Training started at {datetime.now().strftime('%H:%M:%S')}"]
self.logs.append(f"📝 Data size: {size} characters")
self.start_time = time.time()
def start_finetuning(self, size):
self.status = "fine-tuning"
self.progress = 0
self.logs = [f"🎯 Fine-tuning started at {datetime.now().strftime('%H:%M:%S')}"]
self.logs.append(f"📝 Data size: {size} characters")
self.start_time = time.time()
def update_progress(self, progress):
self.progress = min(100, max(0, progress))
if progress >= 100:
self.complete_process()
def add_log(self, msg):
self.logs.append(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}")
if len(self.logs) > 15:
self.logs.pop(0)
def get_status(self):
return self.status
@spaces.GPU
def complete_process(self):
elapsed = time.time() - self.start_time
self.add_log(f"🏁 {self.status.capitalize()} completed in {elapsed:.1f}s")
self.status = "idle"
self.progress = 100
state = TrainingState()
translations = {
"English": {
"title": "🌸 Pashto-Base-Bloom Trainer",
"load_dataset": "Load Dataset",
"upload_file": "Upload Local File",
"status": "Status",
"preview": "Sample Preview",
"test_input": "Input",
"test_output": "Output",
"test": "Test",
"train_data": "Training Data",
"train": "Start Training",
"finetune_data": "Fine-tuning Data",
"finetune": "Start Fine-tuning",
"current_status": "Current Status",
"progress": "Progress",
"logs": "Logs",
"refresh": "🔄 Refresh",
"export_logs": "📥 Export Logs",
"language": "Language"
},
"پښتو": {
"title": "🌸 پښتو-بیس-بلوم روزونکی",
"load_dataset": "ډیټاسټ لوډ کړئ",
"upload_file": "محلي فایل اپلوډ کړئ",
"status": "حالت",
"preview": "نمونه ښودنه",
"test_input": "ورودی",
"test_output": "وتی",
"test": "ازموینه",
"train_data": "د روزنې معلومات",
"train": "روزنه پیل کړئ",
"finetune_data": "د فاین ټیون معلومات",
"finetune": "فاین ټیون پیل کړئ",
"current_status": "اوسنی حالت",
"progress": "پرمختګ",
"logs": "لاګونه",
"refresh": "🔄 تازه کړئ",
"export_logs": "📥 لاګونه ډاونلوډ کړئ",
"language": "ژبه"
}
}
def test_model(text):
if not text.strip():
return "❗ Enter text to test."
options = [
f"Processed: '{text}'",
f"Model response to: {text}",
f"Pashto analysis: {len(text)} characters",
f"✅ Got it: {text}",
f"Generated: {text}... [simulated]",
f"🔍 Words: {len(text.split())}"
]
return random.choice(options)
@spaces.GPU
def simulate_process(duration, process_type, data_size):
if process_type == "train":
state.start_training(data_size)
else:
state.start_finetuning(data_size)
steps = 10
for i in range(steps + 1):
time.sleep(duration / steps)
state.update_progress(int((i / steps) * 100))
if i % 3 == 0:
state.add_log(random.choice([
f"Batch {i}/{steps}",
f"Loss: {random.uniform(0.1, 1.0):.3f}",
f"LR: {random.uniform(1e-5, 1e-3):.6f}",
f"GPU: {random.randint(60, 95)}% (sim)",
]))
state.complete_process()
def train_model(text):
if not text.strip():
return "❌ Add training data.", ""
if not state.dataset_loaded:
return "❌ Load dataset first.", ""
if state.status != "idle":
return "⏳ Wait for current process.", ""
threading.Thread(target=simulate_process, args=(15, "train", len(text)), daemon=True).start()
return "✅ Training started", ""
def finetune_model(text):
if not text.strip():
return "❌ Add fine-tuning data.", ""
if not state.dataset_loaded:
return "❌ Load dataset first.", ""
if state.status != "idle":
return "⏳ Wait for current process.", ""
threading.Thread(target=simulate_process, args=(10, "fine-tune", len(text)), daemon=True).start()
return "✅ Fine-tuning started", ""
def load_hf_dataset():
ok = state.load_dataset()
return state.dataset_info, state.dataset_sample if ok else pd.DataFrame()
def load_local_dataset(file):
ok = state.load_local_file(file)
return state.dataset_info, state.dataset_sample if ok else pd.DataFrame()
def get_current_status():
return state.get_status(), state.progress / 100, "\n".join(state.logs)
def export_logs():
df = pd.DataFrame({"Logs": state.logs})
buffer = io.StringIO()
df.to_csv(buffer, index=False)
buffer.seek(0)
return buffer.getvalue()
with gr.Blocks(title="Pashto Base Bloom Trainer", theme="soft") as demo:
lang_selector = gr.Dropdown(choices=["English", "پښتو"], value="English", label="Language")
labels = translations["English"]
gr.Markdown(f"# {labels['title']}")
with gr.Tab(labels["load_dataset"]):
gr.Markdown(f"### {labels['load_dataset']}")
with gr.Row():
dataset_btn = gr.Button(labels["load_dataset"])
dataset_status = gr.Textbox(label=labels["status"], lines=2, interactive=False)
if __name__ == "__main__":
demo.launch()