Spaces:

MrUnknown420
/

my-ai-model-builder

Sleeping

App Files Files Community

MrUnknown420 commited on Aug 25

Commit

4b1000b

verified ·

1 Parent(s): 89250c2

Update app.py (#8)

Browse files

- Update app.py (2fcb8ba9234a1098de23f41b3c0dbc83a4a44629)

Files changed (1) hide show

app.py +208 -138

app.py CHANGED Viewed

@@ -1,156 +1,226 @@
 import os
 import json
 import gradio as gr
-from datetime import datetime
-from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
 from datasets import load_dataset
-# ========= MEMORY MANAGEMENT =========
-MEMORY_DIR = "memories"
-MODEL_DIR = "models"
-os.makedirs(MEMORY_DIR, exist_ok=True)
-os.makedirs(MODEL_DIR, exist_ok=True)
 def get_memory_file(model_name):
-    safe_name = model_name.replace("/", "_")
-    return os.path.join(MEMORY_DIR, f"{safe_name}_memory.json")
 def load_memory(model_name):
-    filepath = get_memory_file(model_name)
-    if os.path.exists(filepath):
-        with open(filepath, "r") as f:
             return json.load(f)
     return []
-def save_memory(model_name, memory_data):
-    filepath = get_memory_file(model_name)
-    with open(filepath, "w") as f:
-        json.dump(memory_data, f, indent=2)
-def append_memory(model_name, role, content):
-    memory = load_memory(model_name)
-    memory.append({
-        "timestamp": datetime.now().isoformat(),
-        "role": role,
-        "content": content
-    })
-    save_memory(model_name, memory)
-def clear_memory(model_name):
-    filepath = get_memory_file(model_name)
-    if os.path.exists(filepath):
-        os.remove(filepath)
-    return f"Memory cleared for {model_name}."
-def download_memory(model_name):
-    filepath = get_memory_file(model_name)
-    if os.path.exists(filepath):
-        return filepath
-    return None
-def upload_memory(model_name, file_obj):
-    if file_obj is None:
-        return "No file uploaded."
-    new_data = json.load(open(file_obj.name))
-    save_memory(model_name, new_data)
-    return f"Memory replaced for {model_name}."
-def merge_memory(model_name, file_obj):
-    if file_obj is None:
-        return "No file uploaded."
-    current = load_memory(model_name)
-    new_data = json.load(open(file_obj.name))
-    merged = current + new_data
-    save_memory(model_name, merged)
-    return f"Memory merged for {model_name}."
-# ========= MODEL MANAGEMENT =========
-def train_model(model_name, dataset_name, epochs, output_dir):
-    try:
-        dataset = load_dataset(dataset_name)
-        tokenizer = AutoTokenizer.from_pretrained(model_name)
-        model = AutoModelForCausalLM.from_pretrained(model_name)
-        def tokenize(batch):
-            return tokenizer(batch["text"], truncation=True, padding="max_length", max_length=128)
-        dataset = dataset.map(tokenize, batched=True)
-        training_args = TrainingArguments(
-            output_dir=output_dir,
-            overwrite_output_dir=True,
-            per_device_train_batch_size=2,
-            num_train_epochs=int(epochs),
-            save_strategy="epoch",
-            logging_dir=f"{output_dir}/logs"
         )
-        trainer = Trainer(model=model, args=training_args, train_dataset=dataset["train"])
-        trainer.train()
-        model.save_pretrained(output_dir)
-        tokenizer.save_pretrained(output_dir)
-        return f"Training complete. Model saved to {output_dir}"
-    except Exception as e:
-        return f"Error: {str(e)}"
-def chat_with_model(model_name, prompt):
-    try:
-        model_path = os.path.join(MODEL_DIR, model_name.replace("/", "_"))
-        if os.path.exists(model_path):
-            model = AutoModelForCausalLM.from_pretrained(model_path)
-            tokenizer = AutoTokenizer.from_pretrained(model_path)
-        else:
-            model = AutoModelForCausalLM.from_pretrained(model_name)
-            tokenizer = AutoTokenizer.from_pretrained(model_name)
-        inputs = tokenizer(prompt, return_tensors="pt")
-        outputs = model.generate(**inputs, max_length=256, do_sample=True, temperature=0.7)
-        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        append_memory(model_name, "user", prompt)
-        append_memory(model_name, "assistant", response)
-        return response
-    except Exception as e:
-        return f"Error: {str(e)}"
-# ========= INTERFACE =========
 with gr.Blocks() as demo:
-    gr.Markdown("# 🤖 My AI Model Builder\nTrain, fine-tune, test, and manage AI models with memory.")
-    with gr.Tab("Train Model"):
-        model_name = gr.Textbox(label="Base Model (Hugging Face Hub ID)", value="gpt2")
-        dataset_name = gr.Textbox(label="Dataset Name (Hugging Face Dataset ID)", value="wikitext")
-        epochs = gr.Number(label="Epochs", value=1, precision=0)
-        output_dir = gr.Textbox(label="Output Directory", value="models/custom_model")
-        train_btn = gr.Button("Train Model")
-        train_output = gr.Textbox(label="Training Status")
-        train_btn.click(train_model, inputs=[model_name, dataset_name, epochs, output_dir], outputs=train_output)
-    with gr.Tab("Test Models / Chat"):
-        chat_model = gr.Textbox(label="Model Name", value="gpt2")
-        user_prompt = gr.Textbox(label="Enter Prompt")
-        chat_btn = gr.Button("Chat")
-        chat_output = gr.Textbox(label="Response")
-        chat_btn.click(chat_with_model, inputs=[chat_model, user_prompt], outputs=chat_output)
-    with gr.Tab("Memory Management"):
-        mem_model = gr.Textbox(label="Model Name", value="gpt2")
-        view_btn = gr.Button("View Memory")
-        memory_output = gr.JSON(label="Memory Log")
-        view_btn.click(load_memory, inputs=[mem_model], outputs=memory_output)
-        with gr.Row():
-            dl_btn = gr.Button("Download Memory")
-            up_btn = gr.File(label="Upload Memory JSON")
-            merge_btn = gr.File(label="Merge Memory JSON")
-        dl_file = gr.File()
-        dl_btn.click(download_memory, inputs=[mem_model], outputs=dl_file)
-        up_btn.upload(upload_memory, inputs=[mem_model, up_btn], outputs=memory_output)
-        merge_btn.upload(merge_memory, inputs=[mem_model, merge_btn], outputs=memory_output)
-        clear_btn = gr.Button("Clear Memory")
-        clear_btn.click(clear_memory, inputs=[mem_model], outputs=memory_output)
 demo.launch()

 import os
 import json
+import datetime
 import gradio as gr
 from datasets import load_dataset
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    Trainer,
+    TrainingArguments,
+    DataCollatorForLanguageModeling,
+)
+import torch
+# ==============================
+# Paths & Storage Setup
+# ==============================
+BASE_DIR = "storage"
+MODELS_DIR = os.path.join(BASE_DIR, "models")
+MEMORY_DIR = os.path.join(BASE_DIR, "memory")
+os.makedirs(MODELS_DIR, exist_ok=True)
+os.makedirs(MEMORY_DIR, exist_ok=True)
+# ==============================
+# Global State
+# ==============================
+loaded_models = {}   # cache for loaded models
+chat_sessions = {}   # memory per model & session
+# ==============================
+# Helper Functions
+# ==============================
 def get_memory_file(model_name):
+    """Return path to memory file for a given model"""
+    return os.path.join(MEMORY_DIR, f"{model_name}_memory.json")
 def load_memory(model_name):
+    """Load chat memory from file"""
+    path = get_memory_file(model_name)
+    if os.path.exists(path):
+        with open(path, "r") as f:
             return json.load(f)
     return []
+def save_memory(model_name, history):
+    """Save chat memory to file"""
+    path = get_memory_file(model_name)
+    with open(path, "w") as f:
+        json.dump(history, f, indent=2)
+def load_model(model_name):
+    """Load model + tokenizer (cached if already loaded)"""
+    if model_name in loaded_models:
+        return loaded_models[model_name]
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+    model = AutoModelForCausalLM.from_pretrained(model_name)
+    model.to("cuda" if torch.cuda.is_available() else "cpu")
+    loaded_models[model_name] = (tokenizer, model)
+    return tokenizer, model
+# ==============================
+# Chat / Test Tab
+# ==============================
+def chat_with_model(model_name, user_message, session_id="default"):
+    if not model_name:
+        return "⚠️ Please select a model.", []
+    tokenizer, model = load_model(model_name)
+    # Load session memory
+    session_key = f"{model_name}_{session_id}"
+    if session_key not in chat_sessions:
+        chat_sessions[session_key] = load_memory(model_name)
+    history = chat_sessions[session_key]
+    history.append({"role": "user", "content": user_message})
+    # Prepare input for model
+    context = "\n".join([f"{h['role']}: {h['content']}" for h in history])
+    inputs = tokenizer(context, return_tensors="pt").to(model.device)
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_length=512,
+            pad_token_id=tokenizer.eos_token_id,
         )
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    response = response[len(context):].strip()
+    history.append({"role": "assistant", "content": response})
+    # Save memory
+    save_memory(model_name, history)
+    chat_sessions[session_key] = history
+    display_history = [(h["content"] if h["role"]=="user" else None,
+                        h["content"] if h["role"]=="assistant" else None) for h in history]
+    return response, display_history
+# ==============================
+# Training Tab
+# ==============================
+def train_model(model_name, dataset_name, epochs, output_dir):
+    if not model_name or not dataset_name:
+        return "⚠️ Please provide model & dataset."
+    tokenizer, model = load_model(model_name)
+    dataset = load_dataset(dataset_name)
+    def tokenize_function(examples):
+        return tokenizer(examples["text"], truncation=True, padding="max_length")
+    tokenized = dataset.map(tokenize_function, batched=True, remove_columns=["text"])
+    data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
+    args = TrainingArguments(
+        output_dir=output_dir,
+        overwrite_output_dir=True,
+        evaluation_strategy="epoch",
+        save_strategy="epoch",
+        num_train_epochs=int(epochs),
+        per_device_train_batch_size=2,
+        save_total_limit=2,
+        logging_dir="./logs",
+        logging_steps=10,
+    )
+    trainer = Trainer(
+        model=model,
+        args=args,
+        train_dataset=tokenized["train"],
+        eval_dataset=tokenized.get("test"),
+        data_collator=data_collator,
+        tokenizer=tokenizer,
+    )
+    trainer.train()
+    model.save_pretrained(output_dir)
+    tokenizer.save_pretrained(output_dir)
+    return f"✅ Training finished. Model saved to {output_dir}"
+# ==============================
+# Guide / Manual Tab
+# ==============================
+BEGINNER_GUIDE = """
+# Beginner Guide: My AI Model Builder
+1. **Choose a model** → Select a base Hugging Face model to load.
+2. **Train / Fine-Tune** → Pick a dataset and train it for X epochs.
+3. **Test / Chat** → Go to the Chat tab, type prompts, and interact with your model.
+4. **Memory** → Your chats are saved per model automatically.
+5. **Downloads** → You can export models and memory from the Downloads tab.
+That’s it! 🚀
+"""
+TECHNICAL_GUIDE = """
+# Technical Manual: My AI Model Builder
+- **Storage**: All models and memory are saved in `/storage/`.
+- **Memory**: Each model has its own JSON memory file.
+- **Trainer**: Uses Hugging Face `Trainer` with language modeling.
+- **Customization**: You can swap base models, datasets, epochs, etc.
+- **Sessions**: Each model can have multiple session IDs for separate conversations.
+- **Reliability**: App checks if memory/model files exist before creating new ones.
+"""
+# ==============================
+# Build Gradio UI
+# ==============================
 with gr.Blocks() as demo:
+    gr.Markdown("# 🛠️ My AI Model Builder")
+    with gr.Tabs():
+        # Chat / Test Tab
+        with gr.Tab("💬 Chat / Test"):
+            model_name = gr.Textbox(label="Model Name", placeholder="e.g. gpt2")
+            session_id = gr.Textbox(label="Session ID (optional)", value="default")
+            chatbot = gr.Chatbot()
+            msg = gr.Textbox(label="Your message")
+            send = gr.Button("Send")
+            send.click(
+                fn=chat_with_model,
+                inputs=[model_name, msg, session_id],
+                outputs=[msg, chatbot],
+            )
+        # Training Tab
+        with gr.Tab("📚 Training / Fine-Tuning"):
+            base_model = gr.Textbox(label="Base Model", placeholder="e.g. gpt2")
+            dataset_name = gr.Textbox(label="Dataset (HF Hub)", placeholder="e.g. wikitext")
+            epochs = gr.Number(label="Epochs", value=1)
+            output_dir = gr.Textbox(label="Output Dir", value="./storage/models/new_model")
+            train_btn = gr.Button("Start Training")
+            train_output = gr.Textbox(label="Training Log")
+            train_btn.click(
+                fn=train_model,
+                inputs=[base_model, dataset_name, epochs, output_dir],
+                outputs=train_output,
+            )
+        # Guide Tab
+        with gr.Tab("📖 Guide / Manual"):
+            gr.Markdown("## Beginner Walkthrough")
+            gr.Markdown(BEGINNER_GUIDE)
+            gr.Markdown("## Technical Reference")
+            gr.Markdown(TECHNICAL_GUIDE)
 demo.launch()