Spaces:

Locutusque
/

LLM-Forest-Orchestra

Running on Zero

App Files Files Community

Locutusque commited on 17 days ago

Commit

f47aa49

verified ·

1 Parent(s): 2ed725b

Create app.py

Browse files

Files changed (1) hide show

app.py +392 -0

app.py ADDED Viewed

	@@ -0,0 +1,392 @@

+import os
+import io
+import gc
+import math
+import time
+import uuid
+import spaces
+import random
+from dataclasses import dataclass
+from typing import Dict, List, Tuple, Optional
+import gradio as gr
+import numpy as np
+import torch
+from transformers import AutoModel, AutoTokenizer
+import mido
+from mido import Message, MidiFile, MidiTrack
+# -------------------- Defaults & Music Helpers --------------------
+DEFAULT_MODEL = "unsloth/Qwen3-14B-Base"
+SCALES = {
+    "C pentatonic": [60, 62, 65, 67, 70, 72, 74, 77],
+    "C major": [60, 62, 64, 65, 67, 69, 71, 72],
+    "A minor": [57, 59, 60, 62, 64, 65, 67, 69],
+    "Custom (comma-separated MIDI notes)": [],
+}
+LAYER_INSTRUMENT_PRESETS = {
+    "Ensemble (melody+bass+pad etc.)": {
+        0: (0, 'melody'),
+        1: (33, 'bass'),
+        2: (46, 'harmony'),
+        3: (48, 'pad'),
+        4: (11, 'accent'),
+        5: (89, 'atmosphere'),
+    },
+    "Piano Trio (melody+bass+harmony)": {
+        0: (0, 'melody'),
+        1: (33, 'bass'),
+        2: (0, 'harmony'),
+        3: (48, 'pad'),
+        4: (0, 'accent'),
+        5: (0, 'atmosphere'),
+    },
+    "Pads & Atmos": {
+        0: (48, 'pad'),
+        1: (48, 'pad'),
+        2: (89, 'atmosphere'),
+        3: (89, 'atmosphere'),
+        4: (46, 'harmony'),
+        5: (11, 'accent'),
+    },
+}
+@dataclass
+class GenConfig:
+    model_name: str
+    compute_mode: str  # "Full model" or "Mock latents"
+    base_tempo: int
+    velocity_range: Tuple[int, int]
+    scale: List[int]
+    num_layers_limit: int
+    seed: int
+# --- Core math helpers ---
+def entropy(p: np.ndarray) -> float:
+    p = p / (p.sum() + 1e-9)
+    return float(-np.sum(p * np.log2(p + 1e-9)))
+def quantize_time(time_val: int, grid: int = 120) -> int:
+    return int(round(time_val / grid) * grid)
+def norm_to_scale(val: float, scale: np.ndarray, octave_range: int = 2) -> int:
+    octave = int(abs(val) * octave_range) * 12
+    note_idx = int(abs(val * 100) % len(scale))
+    return int(scale[note_idx] + octave)
+ROLE_FREQS = {
+    'melody': 2.0,
+    'bass': 0.5,
+    'harmony': 1.5,
+    'pad': 0.25,
+    'accent': 3.0,
+    'atmosphere': 0.33
+}
+ROLE_WEIGHTS = {
+    'melody': np.array([0.4, 0.2, 0.2, 0.1, 0.1]),
+    'bass': np.array([0.1, 0.4, 0.1, 0.3, 0.1]),
+    'harmony': np.array([0.2, 0.2, 0.3, 0.2, 0.1]),
+    'pad': np.array([0.1, 0.3, 0.1, 0.1, 0.4]),
+    'accent': np.array([0.5, 0.1, 0.2, 0.1, 0.1]),
+    'atmosphere': np.array([0.1, 0.2, 0.1, 0.2, 0.4])
+}
+def create_note_probability(layer_idx, token_idx, attention_val, hidden_state, num_tokens, role: str):
+    base_prob = 1 / (1 + np.exp(-10 * (attention_val - 0.5)))
+    temporal_factor = 0.5 + 0.5 * np.sin(2 * np.pi * ROLE_FREQS[role] * token_idx / max(1, num_tokens))
+    energy = np.linalg.norm(hidden_state)
+    energy_factor = np.tanh(energy / 10)
+    local_variance = np.var(hidden_state)
+    variance_factor = 1 - np.exp(-local_variance)
+    state_entropy = entropy(np.abs(hidden_state))
+    max_entropy = np.log2(max(2, hidden_state.shape[0]))
+    entropy_factor = state_entropy / max_entropy
+    factors = np.array([base_prob, temporal_factor, energy_factor, variance_factor, entropy_factor])
+    weights = ROLE_WEIGHTS[role]
+    combined_prob = float(np.dot(weights, factors))
+    noise_seed = layer_idx * 1000 + token_idx
+    noise = 0.1 * (np.sin(noise_seed * 0.1) + np.cos(noise_seed * 0.23)) / 2
+    final_prob = (combined_prob + noise) ** 1.5
+    return float(np.clip(final_prob, 0, 1))
+def should_play_note_stochastic(layer_idx, token_idx, attention_val, hidden_state, num_tokens, role: str, history: Dict[int,int]):
+    prob = create_note_probability(layer_idx, token_idx, attention_val, hidden_state, num_tokens, role)
+    if layer_idx in history:
+        last_played = history[layer_idx]
+        silence_duration = token_idx - last_played
+        prob *= (1 + np.tanh(silence_duration / 5) * 0.5)
+    play_note = np.random.random() < prob
+    if play_note:
+        history[layer_idx] = token_idx
+    return play_note
+# -------------------- Model / Latents --------------------
+@dataclass
+class Latents:
+    hidden_states: List[torch.Tensor]
+    attentions: List[torch.Tensor]
+    num_layers: int
+    num_tokens: int
+@spaces.GPU(duration=45)
+def get_latents(text: str, model_name: str, compute_mode: str, max_layers: int, progress=gr.Progress(track_tqdm=True)) -> Latents:
+    if compute_mode == "Mock latents":
+        # Fast path for Spaces without big GPUs
+        tokens = max(16, min(128, len(text.split()) * 4))
+        layers = min(max_layers, 6)
+        hidden_states = [torch.randn(1, tokens, 128) for _ in range(layers)]
+        attentions = [torch.rand(1, 8, tokens, tokens) for _ in range(layers)]
+        return Latents(hidden_states=hidden_states, attentions=attentions, num_layers=layers, num_tokens=tokens)
+    # Full model path
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    if tokenizer.pad_token is None and tokenizer.eos_token is not None:
+        tokenizer.pad_token = tokenizer.eos_token
+    # Try different memory-friendly loading strategies
+    load_kwargs = dict(
+        output_hidden_states=True,
+        output_attentions=True,
+        device_map="cuda",
+    )
+    # dtype heuristics
+    try:
+        load_kwargs["torch_dtype"] = torch.bfloat16 if torch.cuda.is_available() else torch.float32
+    except Exception:
+        pass
+    model = AutoModel.from_pretrained(model_name, **load_kwargs)
+    inputs = tokenizer(text, return_tensors="pt")
+    device = next(model.parameters()).device
+    inputs = {k: v.to(device) for k, v in inputs.items()}
+    with torch.no_grad():
+        outputs = model(**inputs)
+        hidden_states = list(outputs.hidden_states)
+        attentions = list(outputs.attentions)
+    # Move to CPU numpy-friendly dtype to free VRAM
+    hidden_states = [hs.to("cpu") for hs in hidden_states]
+    attentions = [att.to("cpu") for att in attentions]
+    # Trim layers
+    layers = min(max_layers, 6, len(hidden_states))
+    tokens = hidden_states[0].shape[1]
+    # Clean up VRAM
+    try:
+        del model
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+    except Exception:
+        pass
+    return Latents(hidden_states=hidden_states[:layers], attentions=attentions[:layers], num_layers=layers, num_tokens=tokens)
+# -------------------- MIDI Rendering --------------------
+def render_midi(latents: Latents, scale_notes: List[int], base_tempo: int, velocity_range: Tuple[int, int], preset_name: str, seed: int) -> Tuple[bytes, Dict]:
+    np.random.seed(seed)
+    random.seed(seed)
+    scale = np.array(scale_notes, dtype=int)
+    num_layers = latents.num_layers
+    num_tokens = latents.num_tokens
+    hidden_states = [hs.numpy() if isinstance(hs, torch.Tensor) else hs for hs in latents.hidden_states]
+    attentions = [att.numpy() if isinstance(att, torch.Tensor) else att for att in latents.attentions]
+    layer_instruments = LAYER_INSTRUMENT_PRESETS[preset_name]
+    mid = MidiFile()
+    tracks: List[MidiTrack] = []
+    for ch in range(num_layers):
+        track = MidiTrack()
+        mid.tracks.append(track)
+        tracks.append(track)
+        instrument = layer_instruments.get(ch, (0, 'melody'))[0]
+        track.append(Message('program_change', program=int(instrument), time=0, channel=ch))
+    history: Dict[int, int] = {}
+    current_time = [0] * num_layers
+    notes_count = [0] * num_layers
+    for token_idx in range(num_tokens):
+        if token_idx > 0 and token_idx % 4 == 0:
+            for layer_idx in range(num_layers):
+                current_time[layer_idx] += base_tempo
+        pan = 64 + int(32 * np.sin(token_idx * math.pi / max(1, num_tokens)))
+        for layer_idx in range(num_layers):
+            role = layer_instruments.get(layer_idx, (0, 'melody'))[1]
+            attn_matrix = attentions[min(layer_idx, len(attentions) - 1)][0, :, token_idx, :]
+            attention_strength = float(np.mean(attn_matrix))
+            layer_vec = hidden_states[layer_idx][0, token_idx]
+            if not should_play_note_stochastic(layer_idx, token_idx, attention_strength, layer_vec, num_tokens, role, history):
+                continue
+            if role == 'melody':
+                note = norm_to_scale(layer_vec[0], scale, octave_range=1)
+                notes_to_play = [note]
+            elif role == 'bass':
+                note = norm_to_scale(layer_vec[0], scale, octave_range=0) - 12
+                notes_to_play = [note]
+            elif role == 'harmony':
+                notes_to_play = [norm_to_scale(layer_vec[i], scale, octave_range=1) for i in range(0, min(2, len(layer_vec)), 1)]
+            elif role == 'pad':
+                notes_to_play = [norm_to_scale(layer_vec[i], scale, octave_range=1) for i in range(0, min(3, len(layer_vec)), 2)]
+            elif role == 'accent':
+                note = norm_to_scale(layer_vec[0], scale, octave_range=2) + 12
+                notes_to_play = [note]
+            else:
+                notes_to_play = [norm_to_scale(layer_vec[i], scale, octave_range=1) for i in range(0, min(2, len(layer_vec)), 3)]
+            base_velocity = int(attention_strength * (velocity_range[1] - velocity_range[0]) + velocity_range[0])
+            if role == 'melody':
+                velocity = min(base_velocity + 10, 127)
+            elif role == 'bass':
+                velocity = base_velocity
+            elif role == 'accent':
+                velocity = min(base_velocity + 20, 127)
+            else:
+                velocity = max(base_velocity - 10, 20)
+            if role in ['pad', 'atmosphere']:
+                duration = base_tempo * 4
+            elif role == 'bass':
+                duration = base_tempo
+            else:
+                try:
+                    dur_factor = entropy(attn_matrix.mean(axis=0)) / (np.log2(attn_matrix.shape[-1]) + 1e-9)
+                except Exception:
+                    dur_factor = 0.5
+                duration = quantize_time(int(base_tempo * (0.5 + dur_factor * 1.5)))
+            for note in notes_to_play:
+                note = max(21, min(108, int(note)))
+                tracks[layer_idx].append(Message('note_on', note=note, velocity=velocity, time=current_time[layer_idx], channel=layer_idx))
+                tracks[layer_idx].append(Message('note_off', note=note, velocity=0, time=duration, channel=layer_idx))
+                current_time[layer_idx] = 0
+                notes_count[layer_idx] += 1
+            if token_idx == 0:
+                tracks[layer_idx].append(Message('control_change', control=10, value=pan, time=0, channel=layer_idx))
+    # Save to bytes
+    bio = io.BytesIO()
+    mid.save(file=bio)
+    bio.seek(0)
+    meta = {
+        "num_layers": num_layers,
+        "num_tokens": num_tokens,
+        "notes_per_layer": notes_count,
+        "total_notes": int(sum(notes_count)),
+        "tempo_ticks_per_beat": int(base_tempo),
+        "scale": list(map(int, scale.tolist())),
+    }
+    return bio.read(), meta
+# -------------------- Gradio UI --------------------
+DESCRIPTION = """
+# LLM Forest Orchestra — Sonify Transformer Internals
+Turn hidden states and attentions into a multi-track MIDI composition.
+- **Two compute modes**: *Full model* (loads a HF model and extracts latents) or *Mock latents* (quick demo with synthetic tensors — great for CPU-only Spaces).
+- Choose **scale**, **tempo**, **velocity range**, and **instrument/role preset**.
+- Exports a **MIDI** you can arrange further in your DAW.
+"""
+EXAMPLE_TEXT = """Joy cascades in golden waterfalls, crashing into pools of melancholy blue.
+Anger burns red through veins of marble, while serenity floats on clouds of softest grey.
+Love pulses in waves of crimson and rose, intertwining with longing's purple haze.
+Each feeling resonates at its own frequency, painting music across the soul's canvas.
+"""
+def parse_scale(selection: str, custom: str) -> List[int]:
+    if selection == "Custom (comma-separated MIDI notes)":
+        try:
+            return [int(x.strip()) for x in custom.split(",") if x.strip()]
+        except Exception:
+            return SCALES["C pentatonic"]
+    return SCALES[selection] if SCALES[selection] else SCALES["C pentatonic"]
+def generate(text, model_name, compute_mode, base_tempo, velocity_low, velocity_high, scale_choice, custom_scale, num_layers, preset, seed):
+    scale = parse_scale(scale_choice, custom_scale)
+    cfg = GenConfig(
+        model_name=model_name or DEFAULT_MODEL,
+        compute_mode=compute_mode,
+        base_tempo=int(base_tempo),
+        velocity_range=(int(velocity_low), int(velocity_high)),
+        scale=scale,
+        num_layers_limit=int(num_layers),
+        seed=int(seed),
+    )
+    # Get latents
+    latents = get_latents(text, cfg.model_name, cfg.compute_mode, cfg.num_layers_limit)
+    # Render MIDI
+    midi_bytes, meta = render_midi(latents, cfg.scale, cfg.base_tempo, cfg.velocity_range, preset, cfg.seed)
+    # Persist to a file for download
+    out_name = f"llm_forest_orchestra_{uuid.uuid4().hex[:8]}.mid"
+    with open(out_name, "wb") as f:
+        f.write(midi_bytes)
+    # Prepare quick stats
+    stats = (
+        f"Layers: {meta['num_layers']} | Tokens: {meta['num_tokens']} | "
+        f"Total notes: {meta['total_notes']} | Scale: {meta['scale']} | "
+        f"Tempo (ticks/beat): {meta['tempo_ticks_per_beat']}"
+    )
+    return out_name, stats, json.dumps(meta, indent=2)
+with gr.Blocks(title="LLM Forest Orchestra — MIDI from Transformer Internals") as demo:
+    gr.Markdown(DESCRIPTION)
+    with gr.Row():
+        with gr.Column():
+            text = gr.Textbox(value=EXAMPLE_TEXT, label="Input text", lines=8)
+            model_name = gr.Textbox(value=DEFAULT_MODEL, label="HF model (base) to probe", info="Should support output_hidden_states & output_attentions")
+            compute_mode = gr.Radio(choices=["Mock latents", "Full model"], value="Full model", label="Compute mode")
+            preset = gr.Dropdown(choices=list(LAYER_INSTRUMENT_PRESETS.keys()), value="Ensemble (melody+bass+pad etc.)", label="Instrument/Role preset")
+            with gr.Row():
+                base_tempo = gr.Slider(120, 960, value=480, step=1, label="Ticks per beat (tempo grid)")
+                num_layers = gr.Slider(1, 6, value=6, step=1, label="Max layers to use")
+            with gr.Row():
+                velocity_low = gr.Slider(1, 126, value=40, step=1, label="Velocity min")
+                velocity_high = gr.Slider(2, 127, value=90, step=1, label="Velocity max")
+            with gr.Row():
+                scale_choice = gr.Dropdown(choices=list(SCALES.keys()), value="C pentatonic", label="Scale")
+                custom_scale = gr.Textbox(value="", label="Custom scale notes (e.g. 60,62,65,67)")
+            seed = gr.Number(value=42, precision=0, label="Random seed")
+            btn = gr.Button("Generate MIDI", variant="primary")
+        with gr.Column():
+            midi_file = gr.File(label="MIDI output (.mid)")
+            stats = gr.Markdown("")
+            meta_json = gr.Code(label="Meta (JSON)")
+    btn.click(
+        fn=generate,
+        inputs=[text, model_name, compute_mode, base_tempo, velocity_low, velocity_high, scale_choice, custom_scale, num_layers, preset, seed],
+        outputs=[midi_file, stats, meta_json]
+    )
+if __name__ == "__main__":
+    demo.launch()