Spaces:
Sleeping
Sleeping
| # app.py (Part 1 of 2) | |
| import os | |
| import json | |
| import time | |
| import datetime | |
| import logging | |
| import gradio as gr | |
| from transformers import ( | |
| AutoTokenizer, AutoModelForSequenceClassification, | |
| Trainer, TrainingArguments | |
| ) | |
| from datasets import load_dataset | |
| import torch | |
| # ========================= | |
| # Ensure directories exist | |
| # ========================= | |
| os.makedirs("trained_models", exist_ok=True) | |
| os.makedirs("logs", exist_ok=True) | |
| os.makedirs("memory", exist_ok=True) | |
| # ========================= | |
| # Logging Setup | |
| # ========================= | |
| logging.basicConfig( | |
| filename=os.path.join("logs", "app.log"), | |
| level=logging.INFO, | |
| format="%(asctime)s [%(levelname)s] %(message)s" | |
| ) | |
| log = logging.getLogger(__name__) | |
| def log_event(event: str): | |
| """Append event to logs/events.log and console.""" | |
| log_file = os.path.join("logs", "events.log") | |
| with open(log_file, "a") as f: | |
| f.write(f"[{datetime.datetime.now()}] {event}\n") | |
| print(event) | |
| # ========================= | |
| # Memory System | |
| # ========================= | |
| def get_memory_file(model_run: str): | |
| return os.path.join("memory", f"memory_{model_run}.json") | |
| def load_memory(model_run: str): | |
| file = get_memory_file(model_run) | |
| if os.path.exists(file): | |
| with open(file, "r") as f: | |
| return json.load(f) | |
| return [] | |
| def save_memory(model_run: str, conversation: dict): | |
| file = get_memory_file(model_run) | |
| memory = load_memory(model_run) | |
| memory.append(conversation) | |
| with open(file, "w") as f: | |
| json.dump(memory, f, indent=2) | |
| # ========================= | |
| # Helper: List Trained Models | |
| # ========================= | |
| def list_trained_models(): | |
| """Scan trained_models/ and return available model runs.""" | |
| models = [] | |
| for d in os.listdir("trained_models"): | |
| full_path = os.path.join("trained_models", d) | |
| if os.path.isdir(full_path): | |
| models.append(d) | |
| return models if models else ["❌ No trained models yet"] | |
| # ========================= | |
| # Training Pipeline | |
| # ========================= | |
| def train_model(base_model, dataset_name, custom_name, epochs): | |
| """Train Hugging Face model with progress + logging.""" | |
| try: | |
| # ------------------------- | |
| # Prepare run identifiers | |
| # ------------------------- | |
| run_id = f"{base_model.replace('/', '_')}__{custom_name.strip()}" | |
| save_dir = os.path.join("trained_models", run_id) | |
| os.makedirs(save_dir, exist_ok=True) | |
| progress = {"status": "starting", "loss": [], "accuracy": [], "time": 0} | |
| log_event(f"🚀 Starting training run: {run_id} on dataset {dataset_name}") | |
| # ------------------------- | |
| # Load dataset | |
| # ------------------------- | |
| parts = dataset_name.split(" ") | |
| if len(parts) == 2: | |
| dataset_repo, dataset_config = parts | |
| dataset = load_dataset(dataset_repo, dataset_config, split="train[:200]") # CPU-friendly | |
| else: | |
| dataset = load_dataset(dataset_name, split="train[:200]") | |
| log_event("📂 Dataset loaded successfully") | |
| # ------------------------- | |
| # Tokenizer + Model | |
| # ------------------------- | |
| tokenizer = AutoTokenizer.from_pretrained(base_model) | |
| model = AutoModelForSequenceClassification.from_pretrained(base_model, num_labels=2) | |
| def tokenize_fn(examples): | |
| return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=128) | |
| dataset = dataset.map(tokenize_fn, batched=True) | |
| dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"]) | |
| # ------------------------- | |
| # Training Args | |
| # ------------------------- | |
| training_args = TrainingArguments( | |
| output_dir=save_dir, | |
| overwrite_output_dir=True, | |
| evaluation_strategy="epoch", | |
| save_strategy="epoch", | |
| num_train_epochs=int(epochs), | |
| per_device_train_batch_size=4, | |
| logging_dir="./logs", | |
| logging_steps=10, | |
| report_to="none", | |
| no_cuda=True # force CPU | |
| ) | |
| # ------------------------- | |
| # Metrics | |
| # ------------------------- | |
| def compute_metrics(eval_pred): | |
| logits, labels = eval_pred | |
| preds = logits.argmax(-1) | |
| acc = (preds == labels).astype(float).mean().item() | |
| return {"accuracy": acc} | |
| # ------------------------- | |
| # Custom Progress Callback | |
| # ------------------------- | |
| start_time = time.time() | |
| def log_callback(trainer, state, control, **kwargs): | |
| if state.is_local_process_zero and state.log_history: | |
| last_log = state.log_history[-1] | |
| if "loss" in last_log: | |
| progress["status"] = "running" | |
| progress["loss"].append(last_log["loss"]) | |
| progress["time"] = round(time.time() - start_time, 2) | |
| log_event(f"📊 Epoch {state.epoch} - Loss: {last_log['loss']}") | |
| # ------------------------- | |
| # Trainer | |
| # ------------------------- | |
| trainer = Trainer( | |
| model=model, | |
| args=training_args, | |
| train_dataset=dataset, | |
| tokenizer=tokenizer, | |
| compute_metrics=compute_metrics, | |
| callbacks=[log_callback] | |
| ) | |
| trainer.train() | |
| # Save artifacts | |
| model.save_pretrained(save_dir) | |
| tokenizer.save_pretrained(save_dir) | |
| progress["status"] = "done" | |
| log_event(f"✅ Training finished: model saved at {save_dir}") | |
| return f"✅ Training complete: {run_id}", progress | |
| except Exception as e: | |
| log_event(f"❌ Training failed: {e}") | |
| return f"Error during training: {e}", {"status": "error"} | |
| # app.py (Part 2 of 2) | |
| # ========================= | |
| # Inference (Testing / Chat) | |
| # ========================= | |
| def chat_with_model(model_run, user_input): | |
| """Run inference on a trained model run.""" | |
| model_dir = os.path.join("trained_models", model_run) | |
| if not os.path.exists(model_dir): | |
| return "❌ Model not trained yet. Train it first." | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained(model_dir) | |
| model = AutoModelForSequenceClassification.from_pretrained(model_dir) | |
| inputs = tokenizer(user_input, return_tensors="pt", truncation=True, padding=True) | |
| outputs = model(**inputs) | |
| prediction = torch.argmax(outputs.logits, dim=-1).item() | |
| # Save memory | |
| conversation = {"input": user_input, "prediction": prediction} | |
| save_memory(model_run, conversation) | |
| return f"🔮 Prediction: {prediction}" | |
| except Exception as e: | |
| log_event(f"❌ Inference failed: {e}") | |
| return f"Error during inference: {e}" | |
| # ========================= | |
| # View Memory | |
| # ========================= | |
| def view_memory(model_run): | |
| memory = load_memory(model_run) | |
| if not memory: | |
| return "📭 No memory yet for this model." | |
| return json.dumps(memory, indent=2) | |
| # ========================= | |
| # View Logs | |
| # ========================= | |
| def view_logs(): | |
| log_file = os.path.join("logs", "events.log") | |
| if not os.path.exists(log_file): | |
| return "📭 No logs yet." | |
| with open(log_file, "r") as f: | |
| return f.read() | |
| # ========================= | |
| # User Guide / Manual | |
| # ========================= | |
| USER_GUIDE = """ | |
| # 📘 AI Model Builder Guide | |
| Welcome to your **all-in-one AI Model Builder**. | |
| This app allows you to **train, fine-tune, test, and manage AI models** directly in a Hugging Face Space. | |
| --- | |
| ## 🔹 Step 1: Training a Model | |
| 1. Go to the **Training Tab**. | |
| 2. Select a **base model** (from dropdown or enter manually). | |
| 3. Select a **dataset** (from dropdown or enter manually). | |
| 4. Enter a **custom run name** (to keep multiple versions without overwriting). | |
| 5. Choose the number of **epochs**. | |
| 6. Click **Start Training**. | |
| 7. Training progress will appear, and the model will be saved under `trained_models/{run_id}`. | |
| --- | |
| ## 🔹 Step 2: Testing Your Model | |
| 1. Switch to the **Testing Tab**. | |
| 2. Select a trained model run. | |
| 3. Enter any input text. | |
| 4. The app will return a **prediction**. | |
| 5. Each chat is saved in per-model **memory**. | |
| --- | |
| ## 🔹 Step 3: Viewing Memory | |
| - Go to the **Memory Tab**. | |
| - Select a trained model run. | |
| - View past chats + predictions. | |
| --- | |
| ## 🔹 Step 4: Viewing Logs | |
| - All activity is logged. | |
| - Open the **Logs Tab** to see training sessions, progress, and errors. | |
| --- | |
| ## 🔹 Technical Notes | |
| - Training runs on **CPU** (slower but free). | |
| - Uses Hugging Face **Transformers + Datasets**. | |
| - Stores: | |
| - Models → `trained_models/{run_id}` | |
| - Logs → `logs/events.log` | |
| - Memory → `memory/memory_{run_id}.json` | |
| """ | |
| # ========================= | |
| # UI Defaults | |
| # ========================= | |
| TOP_MODELS = [ | |
| "distilbert-base-uncased", "bert-base-uncased", "roberta-base", | |
| "google/electra-base-discriminator", "albert-base-v2", | |
| "facebook/bart-base", "gpt2", "t5-small", | |
| "microsoft/deberta-base", "xlnet-base-cased" | |
| ] | |
| TOP_DATASETS = [ | |
| "imdb", "ag_news", "yelp_polarity", | |
| "dbpedia_14", "amazon_polarity", | |
| "tweet_eval", "glue", "sst2", | |
| "cnn_dailymail", "emotion" | |
| ] | |
| # ========================= | |
| # Gradio UI | |
| # ========================= | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# 🧠 AI Model Builder\nTrain, Fine-tune, Test, and Manage Your Own AI Models") | |
| # ---- Training Tab ---- | |
| with gr.Tab("🛠️ Training"): | |
| with gr.Row(): | |
| model_dropdown = gr.Dropdown(choices=TOP_MODELS, label="Select Base Model", interactive=True) | |
| model_textbox = gr.Textbox(label="Or enter custom model ID") | |
| with gr.Row(): | |
| dataset_dropdown = gr.Dropdown(choices=TOP_DATASETS, label="Select Dataset", interactive=True) | |
| dataset_textbox = gr.Textbox(label="Or enter custom dataset ID") | |
| run_name = gr.Textbox(label="Custom Run Name (required)") | |
| epochs = gr.Slider(1, 5, value=1, step=1, label="Epochs (Training Cycles)") | |
| train_button = gr.Button("🚀 Start Training") | |
| train_output = gr.Textbox(label="Training Status") | |
| progress_output = gr.JSON(label="Progress Details") | |
| def run_training(model_dropdown, model_textbox, dataset_dropdown, dataset_textbox, run_name, epochs): | |
| base_model = model_textbox if model_textbox else model_dropdown | |
| dataset_name = dataset_textbox if dataset_textbox else dataset_dropdown | |
| if not base_model or not dataset_name or not run_name: | |
| return "❌ Please provide base model, dataset, and run name", {"status": "error"} | |
| return train_model(base_model, dataset_name, run_name, epochs) | |
| train_button.click( | |
| run_training, | |
| inputs=[model_dropdown, model_textbox, dataset_dropdown, dataset_textbox, run_name, epochs], | |
| outputs=[train_output, progress_output] | |
| ) | |
| # ---- Testing Tab ---- | |
| with gr.Tab("💬 Testing"): | |
| test_model_dropdown = gr.Dropdown(choices=list_trained_models(), label="Select Trained Model Run", interactive=True) | |
| refresh_button = gr.Button("🔄 Refresh Model List") | |
| test_input = gr.Textbox(label="Your Message") | |
| test_button = gr.Button("💡 Predict") | |
| test_output = gr.Textbox(label="Model Response") | |
| refresh_button.click(lambda: gr.update(choices=list_trained_models()), None, test_model_dropdown) | |
| test_button.click(chat_with_model, inputs=[test_model_dropdown, test_input], outputs=test_output) | |
| # ---- Memory Tab ---- | |
| with gr.Tab("🧾 Memory"): | |
| mem_model_dropdown = gr.Dropdown(choices=list_trained_models(), label="Select Trained Model Run", interactive=True) | |
| mem_refresh = gr.Button("🔄 Refresh Model List") | |
| mem_button = gr.Button("📂 Load Memory") | |
| mem_output = gr.Textbox(label="Conversation Memory", lines=15) | |
| mem_refresh.click(lambda: gr.update(choices=list_trained_models()), None, mem_model_dropdown) | |
| mem_button.click(view_memory, inputs=mem_model_dropdown, outputs=mem_output) | |
| # ---- Logs Tab ---- | |
| with gr.Tab("📜 Logs"): | |
| log_button = gr.Button("📖 Show Logs") | |
| log_output = gr.Textbox(label="Logs", lines=20) | |
| log_button.click(view_logs, outputs=log_output) | |
| # ---- Guide Tab ---- | |
| with gr.Tab("📘 Guide"): | |
| gr.Markdown(USER_GUIDE) | |
| # ========================= | |
| # Launch App | |
| # ========================= | |
| if __name__ == "__main__": | |
| demo.launch() |