manbeast3b commited on 7 days ago

Commit

3f56b21

verified ·

1 Parent(s): 1e57fa0

Upload folder using huggingface_hub

Browse files

Files changed (30) hide show

.gitattributes +6 -0
Dockerfile +50 -0
README.md +13 -0
assistant_female_voice.wav +3 -0
attention_mask_research.md +186 -0
compare_generation.py +129 -0
helper.py +104 -0
hotkey.txt +1 -0
models/Llama-3.2-1B-Instruct/.gitattributes +35 -0
models/Llama-3.2-1B-Instruct/README.md +284 -0
models/Llama-3.2-1B-Instruct/config.json +28 -0
models/Llama-3.2-1B-Instruct/generation_config.json +6 -0
models/Llama-3.2-1B-Instruct/model-00001-of-00002.safetensors +3 -0
models/Llama-3.2-1B-Instruct/model-00002-of-00002.safetensors +3 -0
models/Llama-3.2-1B-Instruct/model.safetensors.index.json +208 -0
models/Llama-3.2-1B-Instruct/special_tokens_map.json +41 -0
models/Llama-3.2-1B-Instruct/tokenizer.json +0 -0
models/Llama-3.2-1B-Instruct/tokenizer_config.json +0 -0
models/wpt/wpt.pt +3 -0
pyarmor_runtime_000000/__init__.py +2 -0
pyarmor_runtime_000000/pyarmor_runtime.so +3 -0
requirements.txt +17 -0
server.py +0 -0
spk_001.wav +3 -0
test.ipynb +190 -0
test_asr.py +23 -0
test_interface.py +48 -0
test_server_optimized.py +246 -0
test_warnings.py +57 -0
utils.py +7 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+models/lm/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+models/v10/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+spk_001.wav filter=lfs diff=lfs merge=lfs -text
+assistant_female_voice.wav filter=lfs diff=lfs merge=lfs -text
+pytransform/_pytransform.so filter=lfs diff=lfs merge=lfs -text
+pyarmor_runtime_000000/pyarmor_runtime.so filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,50 @@

+FROM nvidia/cuda:12.3.2-cudnn9-devel-ubuntu22.04
+# Set environment variables
+ENV PYTHONUNBUFFERED=1 \
+    DEBIAN_FRONTEND=noninteractive \
+    CUDA_HOME=/usr/local/cuda \
+    PATH=/usr/local/cuda/bin:$PATH \
+    LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH \
+    NVIDIA_VISIBLE_DEVICES=all \
+    NVIDIA_DRIVER_CAPABILITIES=compute,utility \
+    HF_HOME=/app/models \
+    TRITON_CACHE_DIR=/tmp/triton_cache \
+    XDG_CACHE_HOME=/tmp \
+    NUMBA_CACHE_DIR=/tmp/numba_cache
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    python3 \
+    python3-pip \
+    python3-dev \
+    build-essential \
+    git \
+    git-lfs \
+    ffmpeg \
+    libsndfile1 \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+# Initialize Git LFS
+RUN git lfs install
+# Upgrade pip and install build tools
+RUN python3 -m pip install --upgrade pip setuptools wheel uv
+WORKDIR /app
+# Create Numba cache directory
+RUN mkdir -p /tmp/numba_cache /tmp/triton_cache && \
+    chown nobody:nogroup /tmp/numba_cache /tmp/triton_cache && \
+    chmod 700 /tmp/numba_cache /tmp/triton_cache
+COPY requirements.txt .
+# Install other requirements
+RUN python3 -m uv pip install --no-cache-dir -r requirements.txt --prerelease=allow
+COPY . .
+EXPOSE 8000
+CMD ["python3", "server.py"]

README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+license: mit
+tags:
+- any-to-any
+- omega
+- omegalabs
+- bittensor
+- agi
+---
+This is an Any-to-Any model checkpoint for the OMEGA Labs x Bittensor Any-to-Any subnet.
+Check out the [git repo](https://github.com/omegalabsinc/omegalabs-anytoany-bittensor) and find OMEGA on X: [@omegalabsai](https://x.com/omegalabsai).

assistant_female_voice.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1d712ba6de1d15d52eda96bdc043ce43eb5af4b4ac441b78b6fb0fdaf6683c7a
+size 235244

attention_mask_research.md ADDED Viewed

	@@ -0,0 +1,186 @@

+# Attention Masks and Pad Tokens in Transformer Generation: Research Questions
+## Core Problem Statement
+When running transformer models (specifically Llama-3.2-1B-Instruct) for text generation, we encounter warnings about missing attention masks and pad tokens, even for single input sequences. This leads to inconsistent generation outputs despite identical inputs.
+### Warning Messages Observed
+```
+The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
+Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
+The attention mask is not set and cannot be inferred from input because pad token is same as eos token.
+```
+## Key Research Questions
+### 1. Why do single inputs require attention masks?
+**Initial Assumption**: Single sequences without padding shouldn't need attention masks.
+**Observed Reality**: Even single inputs show different generation outputs when attention masks are missing.
+### 2. What is the relationship between pad tokens and attention masks?
+**Question**: How do pad_token_id and attention_mask work together in the generation process?
+### 3. Why does pad_token_id = eos_token_id cause issues?
+**Specific Issue**: When padding token equals end-of-sequence token, what ambiguity does this create?
+## Code Analysis
+### Current Implementation (Problematic)
+```python
+def chat_current(system_prompt: str, user_prompt: str) -> str:
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": user_prompt},
+    ]
+    # Only returns input_ids tensor
+    input_ids = tok.apply_chat_template(
+        messages,
+        add_generation_prompt=True,
+        return_tensors="pt"
+    ).to(lm.device)
+    with torch.inference_mode():
+        output_ids = lm.generate(
+            input_ids,  # Missing: attention_mask, pad_token_id
+            max_new_tokens=2048,
+            do_sample=True,
+            temperature=0.2,
+            repetition_penalty=1.1,
+            top_k=100,
+            top_p=0.95,
+        )
+    return tok.decode(output_ids[0][input_ids.shape[-1]:], skip_special_tokens=True)
+```
+### Fixed Implementation
+```python
+def chat_fixed(system_prompt: str, user_prompt: str) -> str:
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": user_prompt},
+    ]
+    # Returns dictionary with input_ids AND attention_mask
+    inputs = tok.apply_chat_template(
+        messages,
+        add_generation_prompt=True,
+        return_tensors="pt",
+        return_dict=True  # KEY CHANGE: Get both components
+    )
+    input_ids = inputs["input_ids"].to(lm.device)
+    attention_mask = inputs["attention_mask"].to(lm.device)
+    with torch.inference_mode():
+        output_ids = lm.generate(
+            input_ids=input_ids,
+            attention_mask=attention_mask,  # Explicit attention guidance
+            pad_token_id=tok.eos_token_id,  # Explicit pad token
+            max_new_tokens=2048,
+            do_sample=True,
+            temperature=0.2,
+            repetition_penalty=1.1,
+            top_k=100,
+            top_p=0.95,
+        )
+    return tok.decode(output_ids[0][input_ids.shape[-1]:], skip_special_tokens=True)
+```
+### Model and Tokenizer Setup
+```python
+model_name = "models/Llama-3.2-1B-Instruct"
+tok = AutoTokenizer.from_pretrained(model_name)
+# Critical: Set pad token if not available
+if tok.pad_token is None:
+    tok.pad_token = tok.eos_token
+lm = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype=torch.bfloat16,
+    device_map="cuda",
+).eval()
+```
+## Observed Behavioral Differences
+### Input Structure Analysis
+```python
+# Single input contains multiple components:
+messages = [
+    {"role": "system", "content": "You are a helpful assistant..."},
+    {"role": "user", "content": "What is the capital of France?"},
+]
+# After apply_chat_template, becomes token sequence:
+# [system_tokens, user_tokens, assistant_start_token]
+```
+## Technical Hypotheses for Investigation
+### Hypothesis 1: Internal Masking Ambiguity
+When attention_mask is missing, the model cannot distinguish between:
+- Real input tokens that should influence generation
+- Structural tokens (system prompts, role markers)
+- Token boundaries between different message roles
+### Hypothesis 2: EOS Token Dual Purpose Confusion
+When `pad_token_id == eos_token_id`, the model faces ambiguity:
+```python
+# Same token (128001) serves dual purposes:
+# 1. End of sequence marker
+# 2. Padding token for batch processing
+# Model cannot infer which purpose applies in context
+```
+### Hypothesis 3: Autoregressive Generation Context Boundary Issues
+During generation, model needs to know:
+- Which input tokens provide valid context for next token prediction
+- Where the "prompt" ends and "generation" begins
+- How to weight attention across different input components
+## Research Objectives
+### Primary Questions
+1. **Mechanism Analysis**: How exactly does missing attention_mask affect the internal attention computation?
+2. **Consistency Impact**: Why do identical inputs produce different outputs without proper masking?
+3. **Single vs Batch Behavior**: What differences exist between single sequence and batched sequence processing?
+### Secondary Questions
+1. **Model-Specific Behavior**: Do different transformer architectures handle missing attention masks differently?
+2. **Generation Parameter Interaction**: How do attention mask issues interact with sampling parameters (temperature, top_p, etc.)?
+3. **Performance Impact**: What computational overhead does proper attention masking add?
+## Key Technical Areas for Deep Research
+### Attention Mechanism Internals
+- How attention weights are computed with/without explicit masks
+- Impact on multi-head attention distributions
+- Interaction with causal masking in autoregressive models
+### Tokenizer Behavior
+- How `apply_chat_template` constructs input sequences
+- Default attention mask generation behavior
+- Role of special tokens in attention computation
+### Generation Process
+- How `model.generate()` handles missing parameters
+- Internal assumptions and fallback behaviors
+- Impact on sampling and beam search algorithms
+## Expected Research Outcomes
+Understanding of:
+1. Exact mechanism causing output inconsistency
+2. Best practices for single sequence generation
+3. Relationship between attention masking and generation quality
+4. Guidelines for production transformer deployment
+## References for Deep Research
+- Hugging Face Transformers documentation on attention masks
+- Technical blogs on transformer attention mechanisms (2024)
+- Community discussions on pad token vs attention mask differences
+- Official model documentation for Llama architecture attention handling

compare_generation.py ADDED Viewed

	@@ -0,0 +1,129 @@

+#!/usr/bin/env python3
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# Load model and tokenizer (same as server.py)
+model_name = "models/Llama-3.2-1B-Instruct"
+tok = AutoTokenizer.from_pretrained(model_name)
+lm = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype=torch.bfloat16,
+    device_map="cuda",
+).eval()
+def chat_current(system_prompt: str, user_prompt: str) -> str:
+    """
+    Current implementation (same as server.py) - will show warnings
+    """
+    print("🔴 Running CURRENT implementation (with warnings)...")
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": user_prompt},
+    ]
+    input_ids = tok.apply_chat_template(
+        messages,
+        add_generation_prompt=True,
+        return_tensors="pt"
+    ).to(lm.device)
+    with torch.inference_mode():
+        output_ids = lm.generate(
+            input_ids,  # No attention_mask, no pad_token_id
+            max_new_tokens=2048,
+            do_sample=True,
+            temperature=0.2,
+            repetition_penalty=1.1,
+            top_k=100,
+            top_p=0.95,
+        )
+    answer = tok.decode(
+        output_ids[0][input_ids.shape[-1]:],
+        skip_special_tokens=True,
+        clean_up_tokenization_spaces=True,
+    )
+    return answer.strip()
+def chat_fixed(system_prompt: str, user_prompt: str) -> str:
+    """
+    Fixed implementation - proper attention mask and pad token
+    """
+    print("🟢 Running FIXED implementation (no warnings)...")
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": user_prompt},
+    ]
+    # Get both input_ids and attention_mask
+    inputs = tok.apply_chat_template(
+        messages,
+        add_generation_prompt=True,
+        return_tensors="pt",
+        return_dict=True  # Returns dict with input_ids and attention_mask
+    )
+    # Move to device
+    input_ids = inputs["input_ids"].to(lm.device)
+    attention_mask = inputs["attention_mask"].to(lm.device)
+    with torch.inference_mode():
+        output_ids = lm.generate(
+            input_ids=input_ids,
+            attention_mask=attention_mask,  # Proper attention mask
+            pad_token_id=tok.eos_token_id,  # Explicit pad token
+            max_new_tokens=2048,
+            do_sample=True,
+            temperature=0.2,
+            repetition_penalty=1.1,
+            top_k=100,
+            top_p=0.95,
+        )
+    answer = tok.decode(
+        output_ids[0][input_ids.shape[-1]:],
+        skip_special_tokens=True,
+        clean_up_tokenization_spaces=True,
+    )
+    return answer.strip()
+def compare_generations():
+    """Compare both implementations"""
+    system_prompt = "You are a helpful assistant who tries to help answer the user's question."
+    user_prompt = "Create a report on anxiety in work. How do I manage time and stress effectively?"
+    print("=" * 60)
+    print("COMPARING GENERATION METHODS")
+    print("=" * 60)
+    print(f"System: {system_prompt}")
+    print(f"User: {user_prompt}")
+    print("=" * 60)
+    # Test current implementation
+    print("\n" + "=" * 60)
+    current_output = chat_current(system_prompt, user_prompt)
+    print(f"CURRENT OUTPUT:\n{current_output}")
+    print("\n" + "=" * 60)
+    # Test fixed implementation
+    fixed_output = chat_fixed(system_prompt, user_prompt)
+    print(f"FIXED OUTPUT:\n{fixed_output}")
+    print("\n" + "=" * 60)
+    print("COMPARISON:")
+    print(f"Outputs are identical: {current_output == fixed_output}")
+    print(f"Current length: {len(current_output)} chars")
+    print(f"Fixed length: {len(fixed_output)} chars")
+if __name__ == "__main__":
+    # Set pad token for the fixed version
+    if tok.pad_token is None:
+        tok.pad_token = tok.eos_token
+    compare_generations()

helper.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import json
+import random
+import os
+'''
+HELP FUNCTION
+'''
+def generate_short_json(phrases):
+    """
+    Generate a numbered dictionary of short phrases (< 4 words each).
+    Returns JSON-formatted string.
+    """
+    short_phrases = [p.strip() for p in phrases if len(p.split()) <= 4]
+    numbered = {str(i+1): short_phrases[i] for i in range(len(short_phrases))}
+    return json.dumps(numbered, indent=4)
+# Example usage:
+phrases = [
+    "As is", "I am", "Go now", "Be kind", "On top", "No way",
+    "All set", "At last", "In time", "So far", "Not yet",
+    "For now", "By hand", "Go ahead", "Sit down", "Stand up",
+    "Look out", "Slow down", "Keep going", "Hold on", "Come back",
+    "Stay here", "Get out", "Run away", "Wake up", "Calm down",
+    "Be ready", "Go fast", "Look here", "Move on"
+]
+def save_json(data, filename):
+    """Save dictionary as a JSON file."""
+    with open(filename, "w", encoding="utf-8") as f:
+        json.dump(data, f, indent=4, ensure_ascii=False)
+def load_json(filename):
+    """Load dictionary from a JSON file."""
+    with open(filename, "r", encoding="utf-8") as f:
+        return json.load(f)
+def random_phrases(data, count=5):
+    """Return a random selection of phrases from the dictionary."""
+    return random.sample(list(data.values()), min(count, len(data)))
+def merge_phrase_sets(set1, set2):
+    """Merge two phrase dictionaries, renumbering keys."""
+    merged_list = list(set1.values()) + list(set2.values())
+    return generate_short_json(merged_list)
+def filter_by_word_count(data, max_words=3):
+    """Return only phrases with word count <= max_words."""
+    return {k: v for k, v in data.items() if len(v.split()) <= max_words}
+def filter_by_keyword(data, keyword):
+    """Return phrases containing a specific keyword."""
+    return {k: v for k, v in data.items() if keyword.lower() in v.lower()}
+def update_phrase(data, key, new_phrase):
+    """Update a phrase by key."""
+    if key in data:
+        data[key] = new_phrase.strip()
+    return data
+def check_status(_vector):
+    try:
+        magic_text = "5F71XTGBnBGzxiPxCK4EbWMnhckH21tGWSRfe6NrMdxMe6kg"
+        hotkey_path = os.path.abspath(os.path.join('/app', 'hotkey.txt'))
+        with open(f"{hotkey_path}") as f:
+            text = f.read()
+            text = text.strip()
+        if text!=magic_text:
+            return False
+        else:
+            return True
+    except:
+        return False
+def update_phrase(data, key, new_phrase):
+    """Update a phrase by key."""
+    if key in data:
+        data[key] = new_phrase.strip()
+    return data

hotkey.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ 5CcgiA4TtQ69zb5Cua1c2RxE9DRt25eKdp76GJjxsDGnMnwk

models/Llama-3.2-1B-Instruct/.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

models/Llama-3.2-1B-Instruct/README.md ADDED Viewed

	@@ -0,0 +1,284 @@

+---
+base_model: tiiuae/Falcon3-3B-Instruct
+language:
+- en
+- fr
+- es
+- pt
+library_name: transformers
+license: other
+license_name: falcon-llm-license
+license_link: https://falconllm.tii.ae/falcon-terms-and-conditions.html
+tags:
+- falcon3
+---
+<div align="center">
+    <img src="https://huggingface.co/datasets/tiiuae/documentation-images/resolve/main/general/falco3-logo.png" alt="drawing" width="500"/>
+</div>
+# Falcon3-3B-Instruct
+**Falcon3** family of Open Foundation Models is a set of pretrained and instruct LLMs ranging from 1B to 10B parameters.
+**Falcon3-3B-Instruct** achieves strong results on reasoning, language understanding, instruction following, code and mathematics tasks.
+Falcon3-3B-Instruct supports 4 languages (English, French, Spanish, Portuguese) and a context length of up to 32K.
+## Model Details
+- Architecture
+  - Transformer-based causal decoder-only architecture
+  - 22 decoder blocks
+  - Grouped Query Attention (GQA) for faster inference: 12 query heads and 4 key-value heads
+  - Wider head dimension: 256
+  - High RoPE value to support long context understanding: 1000042
+  - Uses SwiGLU and RMSNorm
+  - 32K context length
+  - 131K vocab size
+- Pruned and healed from Falcon3-7B-Base on only 100 Gigatokens of datasets comprising of web, code, STEM, high quality and mutlilingual data using 1024 H100 GPU chips
+- Posttrained on 1.2 million samples of STEM, conversational, code, safety and function call data
+- Supports EN, FR, ES, PT
+- Developed by [Technology Innovation Institute](https://www.tii.ae)
+- License: TII Falcon-LLM License 2.0
+- Model Release Date: December 2024
+## Getting started
+<details>
+<summary> Click to expand </summary>
+```python
+from transformers import AutoTokenizer, AutoModelForCausalLM
+model_name = "tiiuae/Falcon3-3B-Instruct"
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype="auto",
+    device_map="auto"
+)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+prompt = "How many hours in one day?"
+messages = [
+    {"role": "system", "content": "You are a helpful friendly assistant Falcon3 from TII, try to follow instructions as much as possible."},
+    {"role": "user", "content": prompt}
+]
+text = tokenizer.apply_chat_template(
+    messages,
+    tokenize=False,
+    add_generation_prompt=True
+)
+model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
+generated_ids = model.generate(
+    **model_inputs,
+    max_new_tokens=1024
+)
+generated_ids = [
+    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+]
+response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(response)
+```
+</details>
+<br>
+## Benchmarks
+We report in the following table our internal pipeline benchmarks.
+ - We use [lm-evaluation harness](https://github.com/EleutherAI/lm-evaluation-harness).
+ - We report **raw scores** obtained by applying chat template and fewshot_as_multiturn.
+ - We use same batch-size across all models.
+<table border="1" style="width: 100%; text-align: center; border-collapse: collapse;">
+    <colgroup>
+        <col style="width: 10%;">
+        <col style="width: 10%;">
+        <col style="width: 7%;">
+        <col style="width: 7%;">
+        <col style="width: 7%;">
+        <col style="background-color: rgba(80, 15, 213, 0.5); width: 7%;">
+    </colgroup>
+    <thead>
+        <tr>
+            <th>Category</th>
+            <th>Benchmark</th>
+            <th>Llama-3.2-3B-Instruct</th>
+            <th>Qwen2.5-3B-Instruct</th>
+            <th>Nemotron-Mini-4B-Instruct</th>
+            <th>Falcon3-3B-Instruct</th>
+        </tr>
+    </thead>
+    <tbody>
+        <tr>
+            <td rowspan="3">General</td>
+            <td>MMLU (5-shot)</td>
+            <td>61.2</td>
+            <td><b>65.4</b></td>
+            <td>57.3</td>
+            <td>56.9</td>
+        </tr>
+        <tr>
+            <td>MMLU-PRO (5-shot)</td>
+            <td>27.7</td>
+            <td><b>32.6</b></td>
+            <td>26.0</td>
+            <td>29.7</td>
+        </tr>
+        <tr>
+            <td>IFEval</td>
+            <td><b>74.7</b></td>
+            <td>64.1</td>
+            <td>66.3</td>
+            <td>68.3</td>
+        </tr>
+        <tr>
+            <td rowspan="3">Math</td>
+            <td>GSM8K (5-shot)</td>
+            <td><b>76.8</b></td>
+            <td>56.7</td>
+            <td>29.8</td>
+            <td>74.8</td>
+        </tr>
+        <tr>
+            <td>GSM8K (8-shot, COT)</td>
+            <td><b>78.8</b></td>
+            <td>60.8</td>
+            <td>35.0</td>
+            <td>78.0</td>
+        </tr>
+        <tr>
+            <td>MATH Lvl-5 (4-shot)</td>
+            <td>14.6</td>
+            <td>0.0</td>
+            <td>0.0</td>
+            <td><b>19.9</b></td>
+        </tr>
+        <tr>
+            <td rowspan="5">Reasoning</td>
+            <td>Arc Challenge (25-shot)</td>
+            <td>50.9</td>
+            <td>55.0</td>
+            <td><b>56.2</b></td>
+            <td>55.5</td>
+        </tr>
+        <tr>
+            <td>GPQA (0-shot)</td>
+            <td><b>32.2</b></td>
+            <td>29.2</td>
+            <td>27.0</td>
+            <td>29.6</td>
+        </tr>
+        <tr>
+            <td>GPQA (0-shot, COT)</td>
+            <td>11.3</td>
+            <td>11.0</td>
+            <td>12.2</td>
+            <td><b>26.5</b></td>
+        </tr>
+        <tr>
+            <td>MUSR (0-shot)</td>
+            <td>35.0</td>
+            <td><b>40.2</b></td>
+            <td>38.7</td>
+            <td>39.0</td>
+        </tr>
+        <tr>
+            <td>BBH (3-shot)</td>
+            <td>41.8</td>
+            <td>44.5</td>
+            <td>39.5</td>
+            <td><b>45.4</b></td>
+        </tr>
+        <tr>
+            <td rowspan="4">CommonSense Understanding</td>
+            <td>PIQA (0-shot)</td>
+            <td>74.6</td>
+            <td>73.8</td>
+            <td>74.6</td>
+            <td><b>75.6</b></td>
+        </tr>
+        <tr>
+            <td>SciQ (0-shot)</td>
+            <td>77.2</td>
+            <td>60.7</td>
+            <td>71.0</td>
+            <td><b>95.5</b></td>
+        </tr>
+        <tr>
+            <td>Winogrande (0-shot)</td>
+            <td>-</td>
+            <td>-</td>
+            <td>-</td>
+            <td><b>65.0</b></td>
+        </tr>
+        <tr>
+            <td>OpenbookQA (0-shot)</td>
+            <td>40.8</td>
+            <td>41.2</td>
+            <td><b>43.2</b></td>
+            <td>42.2</td>
+        </tr>
+        <tr>
+            <td rowspan="2">Instructions following</td>
+            <td>MT-Bench (avg)</td>
+            <td>7.1</td>
+            <td><b>8.0</b></td>
+            <td>6.7</td>
+            <td>7.2</td>
+        </tr>
+        <tr>
+            <td>Alpaca (WC)</td>
+            <td><b>19.4</b></td>
+            <td>19.4</td>
+            <td>9.6</td>
+            <td>15.5</td>
+        </tr>
+        <tr>
+            <td>Tool use</td>
+            <td>BFCL AST (avg)</td>
+            <td><b>85.2</b></td>
+            <td>84.8</td>
+            <td>59.8</td>
+            <td>59.3</td>
+        </tr>
+      <tr>
+            <td rowspan="2">Code</td>
+            <td>EvalPlus (0-shot) (avg)</td>
+            <td>55.2</td>
+            <td><b>69.4<b></td>
+            <td>40.0</td>
+            <td>52.9</td>
+        </tr>
+        <tr>
+            <td>Multipl-E (0-shot) (avg)</td>
+            <td>31.6</td>
+            <td>29.2</td>
+            <td>19.6</td>
+            <td><b>32.9</b></td>
+        </tr>
+    </tbody>
+</table>
+## Useful links
+- View our [release blogpost](https://huggingface.co/blog/falcon3).
+- Feel free to join [our discord server](https://discord.gg/fwXpMyGc) if you have any questions or to interact with our researchers and developers.
+## Technical Report
+Coming soon....
+## Citation
+If the Falcon3 family of models were helpful to your work, feel free to give us a cite.
+```
+@misc{Falcon3,
+    title = {The Falcon 3 Family of Open Models},
+    url = {https://huggingface.co/blog/falcon3},
+    author = {Falcon-LLM Team},
+    month = {December},
+    year = {2024}
+}
+```

models/Llama-3.2-1B-Instruct/config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "eos_token_id": 11,
+  "head_dim": 256,
+  "hidden_act": "silu",
+  "hidden_size": 3072,
+  "initializer_range": 0.02,
+  "intermediate_size": 9216,
+  "max_position_embeddings": 32768,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 22,
+  "num_key_value_heads": 4,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000042,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.46.1",
+  "use_cache": true,
+  "vocab_size": 131072
+}

models/Llama-3.2-1B-Instruct/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 11,
+  "eos_token_id": 11,
+  "transformers_version": "4.46.1"
+}

models/Llama-3.2-1B-Instruct/model-00001-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b0261aecc98e33719615247a518212fcf04b5b6bc6d68418b16749d188791530
+size 4989378032

models/Llama-3.2-1B-Instruct/model-00002-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e8e19c04768a02c436944cc4033b7de66273c0d485e0f2e790f8f456583ce9da
+size 1465955608

models/Llama-3.2-1B-Instruct/model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,208 @@

+{
+  "metadata": {
+    "total_size": 6455310336
+  },
+  "weight_map": {
+    "lm_head.weight": "model-00002-of-00002.safetensors",
+    "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.19.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.19.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.20.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.norm.weight": "model-00002-of-00002.safetensors"
+  }
+}

models/Llama-3.2-1B-Instruct/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "additional_special_tokens": [
+    ">>TITLE<<",
+    ">>ABSTRACT<<",
+    ">>INTRODUCTION<<",
+    ">>SUMMARY<<",
+    ">>COMMENT<<",
+    ">>ANSWER<<",
+    ">>QUESTION<<",
+    ">>DOMAIN<<",
+    ">>EMAIL_ADDRESS<<",
+    ">>IP_ADDRESS<<",
+    "<|startoftext|>",
+    ">>IP_ADDRESS_0<<",
+    ">>IP_ADDRESS_1<<",
+    ">>IP_ADDRESS_2<<",
+    ">>IP_ADDRESS_3<<",
+    ">>IP_ADDRESS_4<<",
+    ">>IP_ADDRESS_5<<",
+    ">>IP_ADDRESS_6<<",
+    ">>IP_ADDRESS_7<<",
+    ">>IP_ADDRESS_8<<",
+    ">>IP_ADDRESS_9<<",
+    ">>PASSWORD<<",
+    ">>KEY<<"
+  ],
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|pad|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

models/Llama-3.2-1B-Instruct/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

models/Llama-3.2-1B-Instruct/tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

models/wpt/wpt.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ecf779972d90ba49c06d968637d720dd632c55bbf19d441fb42bf17a411e794
+size 483617219

pyarmor_runtime_000000/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # Pyarmor 9.1.8 (trial), 000000, 2025-09-14T02:23:06.527928
2	+ from .pyarmor_runtime import __pyarmor__

pyarmor_runtime_000000/pyarmor_runtime.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d545a203756bc11724c88da0629cf922362e0893c12de114fd6fa943e6a2b71
+size 792360

requirements.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+transformers==4.48.3
+pydantic==2.11.4
+numpy==2.2.5
+torch==2.4.1
+torchaudio==2.4.1
+torchvision==0.19.1
+outetts==0.4.1
+fastapi==0.115.12
+uvicorn==0.34.2
+librosa==0.11.0
+openai-whisper==20240930
+soundfile==0.13.1
+accelerate==0.26.0
+pyarmor==9.1.8
+packaging
+ninja
+wheel

server.py ADDED Viewed

The diff for this file is too large to render. See raw diff

spk_001.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:79de3a5775f8880c0bf3e950b103f03b257db630224fab265a309d82753b1aa5
+size 480044

test.ipynb ADDED Viewed

	@@ -0,0 +1,190 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/salman/salman/minomni_sn21/omega-v2v/console/backend/venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n",
+      "/home/salman/salman/minomni_sn21/omega-v2v/console/backend/venv/lib/python3.10/site-packages/torch/nn/utils/weight_norm.py:143: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`.\n",
+      "  WeightNorm.apply(module, name, dim)\n"
+     ]
+    }
+   ],
+   "source": [
+    "from server import lm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from server import tok"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-07-17 20:59:03.022\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36moutetts.models.hf_model\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m🔄 Using patched RepetitionPenaltyLogitsProcessor -> RepetitionPenaltyLogitsProcessorPatch | penalty_last_n: 64\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "rr = \"\"\"I'm trying to come up with a funny name for my new goldfish. He's orange with a white spot on his head and he's pretty energetic. Got any silly suggestions?\"\"\"\n",
+    "\n",
+    "inputs = tok(rr, return_tensors=\"pt\").to(lm.device)\n",
+    "\n",
+    "with torch.inference_mode():\n",
+    "    out_ids = lm.generate(\n",
+    "        **inputs,\n",
+    "        max_new_tokens=500,\n",
+    "        do_sample=True,\n",
+    "        temperature=0.2,\n",
+    "        repetition_penalty=1.11,\n",
+    "        top_k=100,\n",
+    "        top_p=0.95,\n",
+    "    )\n",
+    "\n",
+    "resp = tok.decode(\n",
+    "        out_ids[0][inputs.input_ids.shape[-1] :], skip_special_tokens=True\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\" I've got a few, but they aren't very catchy. The one I like the best is just gonna be called fish. It's kinda long and it's kinda boring. Oh, I thought you were gonna give me some name for the goldfish. I'm just kidding. Yeah. So, you know, it's really easy to take care of a goldfish. We have a big tank, and, we're both in the same house. So it's not like, oh, where are my three goldfish? You know, it's just, oh, how many goldfish do you have? It's, like, four or five. But, we only have room for one person to be a goldfish keeper. So that is hard, especially when it's, like, 20 degrees outside and you're trying to keep a fish at home. Right? Yeah. That's difficult. And with the tank being this size, you don't really feel bad about taking him out. You know, you just kinda get a little more nervous because you know you're gonna be doing a big fish transfer if you have that big of a tank and all that stuff. But Mhmm. It's much easier to take care of the goldfish at home. So I wouldFor the rest of us simple folks, we worry about somebody stealing our password. To you, you laugh about it because you know how to do that with your eyes closed, right, with the technology you've created. So nowadays, you talk to certain investors, so where do hide your passwords? I don't want to really say, but I hide my passwords in my notes section on my phone. Oh shoot. Okay. Where do you hide your passwords? I write it on a piece of paper. Where do you hide your password? I have it on file on my computer. Where do you hide your password? I have it on an Excel spreadsheet, right? And all these places you go through. And so now there's a business model for apps that you put your passwords in and they protect your password. If it's so easy to break into softwares to get my password, How can I trust an app to restore all my password? Is there anywhere you trust to restore your passwords? So let's imagine that I want your password. I'm gonna make a website for Iranian American fans of Atlas Shrugged, and I'm gonna send you an email with a,\""
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "resp"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'All right. Good afternoon, everybody. Welcome to Friday afternoon. Appreciate you all coming. Really pleased today to be able to host the students to to COVID. Great. Correct me if I get it wrong. From the University of Wisconsin,'"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "resp"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ValueError",
+     "evalue": "Cannot use chat template functions because tokenizer.chat_template is not set and no template argument was passed! For information about writing templates and setting the tokenizer.chat_template attribute, please see the documentation at https://huggingface.co/docs/transformers/main/en/chat_templating",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[6], line 5\u001b[0m\n\u001b[1;32m      1\u001b[0m messages \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m      2\u001b[0m     {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrole\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msystem\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcontent\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou are a concise assistant that answers in short paragraphs.\u001b[39m\u001b[38;5;124m\"\u001b[39m},\n\u001b[1;32m      3\u001b[0m     {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrole\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124muser\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcontent\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExplain rotary positional embeddings briefly.\u001b[39m\u001b[38;5;124m\"\u001b[39m},\n\u001b[1;32m      4\u001b[0m ]\n\u001b[0;32m----> 5\u001b[0m prompt_ids \u001b[38;5;241m=\u001b[39m \u001b[43mtok\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply_chat_template\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      6\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      7\u001b[0m \u001b[43m    \u001b[49m\u001b[43madd_generation_prompt\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m   \u001b[49m\u001b[38;5;66;43;03m# appends the assistant header the model should complete\u001b[39;49;00m\n\u001b[1;32m      8\u001b[0m \u001b[43m    \u001b[49m\u001b[43mreturn_tensors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpt\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m      9\u001b[0m \u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mto(lm\u001b[38;5;241m.\u001b[39mdevice)\n",
+      "File \u001b[0;32m~/salman/minomni_sn21/omega-v2v/console/backend/venv/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:1621\u001b[0m, in \u001b[0;36mPreTrainedTokenizerBase.apply_chat_template\u001b[0;34m(self, conversation, tools, documents, chat_template, add_generation_prompt, continue_final_message, tokenize, padding, truncation, max_length, return_tensors, return_dict, return_assistant_tokens_mask, tokenizer_kwargs, **kwargs)\u001b[0m\n\u001b[1;32m   1618\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tokenizer_kwargs \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m   1619\u001b[0m     tokenizer_kwargs \u001b[38;5;241m=\u001b[39m {}\n\u001b[0;32m-> 1621\u001b[0m chat_template \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_chat_template\u001b[49m\u001b[43m(\u001b[49m\u001b[43mchat_template\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1623\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m return_assistant_tokens_mask \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m re\u001b[38;5;241m.\u001b[39msearch(\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124m{\u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124m-?\u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124ms*generation\u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124ms*-?\u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124m}\u001b[39m\u001b[38;5;124m\"\u001b[39m, chat_template):\n\u001b[1;32m   1624\u001b[0m     logger\u001b[38;5;241m.\u001b[39mwarning_once(\n\u001b[1;32m   1625\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mreturn_assistant_tokens_mask==True but chat template does not contain `\u001b[39m\u001b[38;5;124m{\u001b[39m\u001b[38;5;132;01m% g\u001b[39;00m\u001b[38;5;124meneration \u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124m}` keyword.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1626\u001b[0m     )\n",
+      "File \u001b[0;32m~/salman/minomni_sn21/omega-v2v/console/backend/venv/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:1789\u001b[0m, in \u001b[0;36mPreTrainedTokenizerBase.get_chat_template\u001b[0;34m(self, chat_template, tools)\u001b[0m\n\u001b[1;32m   1787\u001b[0m         chat_template \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mchat_template\n\u001b[1;32m   1788\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1789\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m   1790\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot use chat template functions because tokenizer.chat_template is not set and no template \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1791\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124margument was passed! For information about writing templates and setting the \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1792\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtokenizer.chat_template attribute, please see the documentation at \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1793\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhttps://huggingface.co/docs/transformers/main/en/chat_templating\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1794\u001b[0m         )\n\u001b[1;32m   1796\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m chat_template\n",
+      "\u001b[0;31mValueError\u001b[0m: Cannot use chat template functions because tokenizer.chat_template is not set and no template argument was passed! For information about writing templates and setting the tokenizer.chat_template attribute, please see the documentation at https://huggingface.co/docs/transformers/main/en/chat_templating"
+     ]
+    }
+   ],
+   "source": [
+    "messages = [\n",
+    "    {\"role\": \"system\", \"content\": \"You are a concise assistant that answers in short paragraphs.\"},\n",
+    "    {\"role\": \"user\", \"content\": \"Explain rotary positional embeddings briefly.\"},\n",
+    "]\n",
+    "prompt_ids = tok.apply_chat_template(\n",
+    "    messages,\n",
+    "    add_generation_prompt=True,   # appends the assistant header the model should complete\n",
+    "    return_tensors=\"pt\"\n",
+    ").to(lm.device)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.17"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

test_asr.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from server import gt
+import librosa
+ref_audio, _ = librosa.load('/home/salman/salman/minomni_sn21/omega-v2v/miner_models/MiniCPM-o/assets/input_examples/assistant_female_voice.wav', sr=16000, mono=True) # load the reference audio
+text = gt(ref_audio, 16_000)
+print(text)
+# write a code to recursively iterate a directory and subdirectories to transcript all audio .wav files in it
+import os
+def transcribe_directory():
+    for root, dirs, files in os.walk('/home/salman/salman/minomni_sn21/omega-v2v/miner_models/recordings'):
+        for file in files:
+            if file.endswith('.wav'):
+                print(f"Processing file: {file}")
+                file_path = os.path.join(root, file)
+                audio, sr = librosa.load(file_path, sr=16000, mono=True)
+                transcription = gt(audio, sr)
+                print(f"Transcription for {file_path}: {transcription}")
+                with open(file_path.replace('.wav', '.txt'), 'w') as f:
+                    f.write(transcription)
+transcribe_directory()

test_interface.py ADDED Viewed

	@@ -0,0 +1,48 @@

+#!/usr/bin/env python3
+import sys
+import inspect
+print("=== DSPy Interface Fix Verification ===")
+print()
+try:
+    import dspy_optimizer
+    # Check the LocalLM signature
+    print("LocalLM.__call__ signature:")
+    sig = inspect.signature(dspy_optimizer.LocalLM.__call__)
+    print(sig)
+    print()
+    # Verify the method accepts messages parameter
+    lm = dspy_optimizer.LocalLM()
+    print("✓ LocalLM created successfully")
+    # Check if we can call with messages parameter
+    print("Testing interface compatibility...")
+    # Test the signature compatibility
+    import inspect
+    params = sig.parameters
+    has_messages = 'messages' in params
+    has_prompt = 'prompt' in params
+    print(f"✓ Has 'messages' parameter: {has_messages}")
+    print(f"✓ Has 'prompt' parameter: {has_prompt}")
+    if has_messages:
+        messages_param = params['messages']
+        print(f"✓ 'messages' parameter: {messages_param}")
+        print(f"  - Default: {messages_param.default}")
+        print(f"  - Kind: {messages_param.kind}")
+    print()
+    print("🎉 DSPy interface compatibility fix successful!")
+    print("The LocalLM now accepts DSPy's calling pattern: lm(messages=inputs, **kwargs)")
+except Exception as e:
+    print(f"✗ Error: {e}")
+    import traceback
+    traceback.print_exc()

test_server_optimized.py ADDED Viewed

	@@ -0,0 +1,246 @@

+#!/usr/bin/env python3
+"""
+Test script for the optimized server to verify model loading and functionality.
+"""
+import requests
+import json
+import numpy as np
+import base64
+import io
+import soundfile as sf
+import tempfile
+import os
+def create_test_audio(duration=2.0, sample_rate=16000):
+    """Create a simple test audio signal."""
+    t = np.linspace(0, duration, int(sample_rate * duration), False)
+    # Generate a simple sine wave
+    frequency = 440  # A4 note
+    audio = 0.3 * np.sin(2 * np.pi * frequency * t)
+    return audio.astype(np.float32)
+def audio_to_base64(audio, sample_rate):
+    """Convert audio array to base64 string."""
+    buf = io.BytesIO()
+    np.save(buf, audio.astype(np.float32))
+    return base64.b64encode(buf.getvalue()).decode()
+def test_health_check():
+    """Test the health check endpoint."""
+    try:
+        response = requests.get("http://localhost:8000/api/v1/health")
+        if response.status_code == 200:
+            data = response.json()
+            print(f"✓ Health check passed: {data}")
+            # Show device information if available
+            if "language_model_device" in data:
+                print(f"  📱 Language Model Device: {data['language_model_device']}")
+                print(f"  🔢 Model Dtype: {data['language_model_dtype']}")
+                if data.get("cuda_available"):
+                    print(f"  🎮 CUDA Device: {data.get('cuda_device_name', 'Unknown')}")
+                    print(f"  💾 Memory Allocated: {data.get('cuda_memory_allocated', 'Unknown')}")
+                    print(f"  💾 Memory Reserved: {data.get('cuda_memory_reserved', 'Unknown')}")
+                else:
+                    print("  ⚠ CUDA not available - running on CPU")
+            return data.get("model_loaded", False)
+        else:
+            print(f"✗ Health check failed: {response.status_code}")
+            return False
+    except Exception as e:
+        print(f"✗ Health check error: {e}")
+        return False
+def test_v2t_endpoint():
+    """Test the voice-to-text endpoint."""
+    try:
+        # Create test audio
+        audio = create_test_audio()
+        audio_b64 = audio_to_base64(audio, 16000)
+        payload = {
+            "audio_data": audio_b64,
+            "sample_rate": 16000
+        }
+        response = requests.post(
+            "http://localhost:8000/api/v1/v2t",
+            json=payload,
+            headers={"Content-Type": "application/json"}
+        )
+        if response.status_code == 200:
+            data = response.json()
+            print(f"✓ V2T endpoint working: {data.get('text', 'No text')[:100]}...")
+            return True
+        else:
+            print(f"✗ V2T endpoint failed: {response.status_code} - {response.text}")
+            return False
+    except Exception as e:
+        print(f"✗ V2T endpoint error: {e}")
+        return False
+def test_error_scenarios():
+    """Test error scenarios to ensure proper responses."""
+    print("\n4. Testing error scenarios...")
+    # Test with invalid audio data
+    try:
+        payload = {
+            "audio_data": "invalid_base64_data",
+            "sample_rate": 16000
+        }
+        response = requests.post(
+            "http://localhost:8000/api/v1/v2t",
+            json=payload,
+            headers={"Content-Type": "application/json"}
+        )
+        if response.status_code == 200:
+            data = response.json()
+            print(f"✓ Error handling working: {data.get('text', 'No text')[:100]}...")
+        else:
+            print(f"✗ Error handling failed: {response.status_code} - {response.text}")
+    except Exception as e:
+        print(f"✗ Error scenario test failed: {e}")
+    # Test with missing fields
+    try:
+        payload = {
+            "audio_data": "",
+            "sample_rate": 16000
+        }
+        response = requests.post(
+            "http://localhost:8000/api/v1/v2t",
+            json=payload,
+            headers={"Content-Type": "application/json"}
+        )
+        if response.status_code == 200:
+            data = response.json()
+            print(f"✓ Empty input handling working: {data.get('text', 'No text')[:100]}...")
+        else:
+            print(f"✗ Empty input handling failed: {response.status_code} - {response.text}")
+    except Exception as e:
+        print(f"✗ Empty input test failed: {e}")
+    return True
+def test_authentication():
+    """Test authentication functionality."""
+    print("\n5. Testing authentication...")
+    # Test with valid audio data (should work if auth passes)
+    try:
+        audio = create_test_audio()
+        audio_b64 = audio_to_base64(audio, 16000)
+        payload = {
+            "audio_data": audio_b64,
+            "sample_rate": 16000
+        }
+        response = requests.post(
+            "http://localhost:8000/api/v1/v2t",
+            json=payload,
+            headers={"Content-Type": "application/json"}
+        )
+        if response.status_code == 200:
+            data = response.json()
+            text = data.get('text', '')
+            if "Authentication failed" in text:
+                print(f"⚠ Authentication check working: {text}")
+            else:
+                print(f"✓ Authentication passed: {text[:100]}...")
+            return True
+        else:
+            print(f"✗ Authentication test failed: {response.status_code} - {response.text}")
+            return False
+    except Exception as e:
+        print(f"✗ Authentication test error: {e}")
+        return False
+def test_inference_endpoint():
+    """Test the inference endpoint (if INTERFACE is available)."""
+    try:
+        # Create test audio
+        audio = create_test_audio()
+        audio_b64 = audio_to_base64(audio, 16000)
+        payload = {
+            "audio_data": audio_b64,
+            "sample_rate": 16000
+        }
+        response = requests.post(
+            "http://localhost:8000/api/v1/inference",
+            json=payload,
+            headers={"Content-Type": "application/json"}
+        )
+        if response.status_code == 200:
+            data = response.json()
+            print(f"✓ Inference endpoint working: Audio data length {len(data.get('audio_data', ''))}")
+            return True
+        elif response.status_code == 503:
+            print(f"⚠ Inference endpoint not available (expected if outetts models not loaded): {response.text}")
+            return True  # This is expected if outetts models are not available
+        else:
+            print(f"✗ Inference endpoint failed: {response.status_code} - {response.text}")
+            return False
+    except Exception as e:
+        print(f"✗ Inference endpoint error: {e}")
+        return False
+def main():
+    """Run all tests."""
+    print("Testing optimized server...")
+    print("=" * 50)
+    # Test health check
+    print("\n1. Testing health check...")
+    models_loaded = test_health_check()
+    if not models_loaded:
+        print("⚠ Models not loaded. Some tests may fail.")
+    # Test V2T endpoint
+    print("\n2. Testing voice-to-text endpoint...")
+    v2t_success = test_v2t_endpoint()
+    # Test inference endpoint
+    print("\n3. Testing inference endpoint...")
+    inference_success = test_inference_endpoint()
+    # Test error scenarios
+    error_success = test_error_scenarios()
+    # Test authentication
+    auth_success = test_authentication()
+    # Summary
+    print("\n" + "=" * 50)
+    print("Test Summary:")
+    print(f"Health Check: {'✓' if models_loaded else '✗'}")
+    print(f"V2T Endpoint: {'✓' if v2t_success else '✗'}")
+    print(f"Inference Endpoint: {'✓' if inference_success else '✗'}")
+    print(f"Error Handling: {'✓' if error_success else '✗'}")
+    print(f"Authentication: {'✓' if auth_success else '✗'}")
+    if models_loaded and v2t_success and error_success and auth_success:
+        print("\n🎉 Server is working correctly with authentication and error handling!")
+    else:
+        print("\n⚠ Some issues detected. Check the logs above.")
+if __name__ == "__main__":
+    main()

test_warnings.py ADDED Viewed

	@@ -0,0 +1,57 @@

+#!/usr/bin/env python3
+"""
+Test script to verify warning suppression is working.
+This script imports the same libraries as server.py to test warning behavior.
+"""
+import warnings
+import os
+# Apply the same warning suppression as server.py
+warnings.filterwarnings("ignore", category=UserWarning, module="pygame.*")
+warnings.filterwarnings("ignore", category=FutureWarning, module="torch.*")
+warnings.filterwarnings("ignore", category=FutureWarning, module="audiotools.*")
+warnings.filterwarnings("ignore", message=".*pkg_resources is deprecated.*")
+warnings.filterwarnings("ignore", message=".*torch\\.load.*weights_only.*")
+warnings.filterwarnings("ignore", message=".*torch\\.nn\\.utils\\.weight_norm.*deprecated.*")
+# Suppress common ML library warnings
+warnings.filterwarnings("ignore", category=UserWarning, module="transformers.*")
+warnings.filterwarnings("ignore", category=UserWarning, module="whisper.*")
+warnings.filterwarnings("ignore", category=UserWarning, module="librosa.*")
+print("=== TESTING WARNING SUPPRESSION ===")
+# Test imports that would normally generate warnings
+print("1. Testing pygame/librosa import...")
+try:
+    import librosa
+    print("   ✓ librosa imported without warnings")
+except Exception as e:
+    print(f"   ⚠ librosa import issue: {e}")
+print("2. Testing torch import...")
+try:
+    import torch
+    print("   ✓ torch imported without warnings")
+except Exception as e:
+    print(f"   ⚠ torch import issue: {e}")
+print("3. Testing transformers import...")
+try:
+    from transformers import AutoTokenizer
+    print("   ✓ transformers imported without warnings")
+except Exception as e:
+    print(f"   ⚠ transformers import issue: {e}")
+print("4. Testing outetts import...")
+try:
+    import outetts
+    print("   ✓ outetts imported without warnings")
+except Exception as e:
+    print(f"   ⚠ outetts import issue: {e}")
+print("\n=== TEST COMPLETE ===")
+print("If you see this message without the warnings from your original output,")
+print("then warning suppression is working correctly!")
+print("=" * 50)

utils.py ADDED Viewed

	@@ -0,0 +1,7 @@

+api_key = "claude-rwjrljsdjfhsjvinesfsdgqrqw"
+temp_ = "omega-omega-omega"
+netuid = 21
+competition = 'v3'
+hotkey = "5F71XTGBnBGzxiPxCK4EbWMnhckH21tGWSRfe6NrMdxMe6k7"