{ "cells": [ { "cell_type": "markdown", "metadata": { "gradient": { "editing": false, "id": "ac5a4cf0-d9d2-47b5-9633-b53f8d99a4d2", "kernelId": "" }, "id": "SiTIpPjArIyr" }, "source": [ "# Orpheus Music Transformer Training Dataset Maker (ver. 1.0)\n", "\n", "***\n", "\n", "Powered by tegridy-tools: https://github.com/asigalov61/tegridy-tools\n", "\n", "***\n", "\n", "#### Project Los Angeles\n", "\n", "#### Tegridy Code 2025\n", "\n", "***" ] }, { "cell_type": "markdown", "metadata": { "gradient": { "editing": false, "id": "fa0a611c-1803-42ae-bdf6-a49b5a4e781b", "kernelId": "" }, "id": "gOd93yV0sGd2" }, "source": [ "# (SETUP ENVIRONMENT)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "gradient": { "editing": false, "id": "a1a45a91-d909-4fd4-b67a-5e16b971d179", "kernelId": "" }, "id": "fX12Yquyuihc", "scrolled": true }, "outputs": [], "source": [ "#@title Install all dependencies (run only once per session)\n", "\n", "!git clone https://github.com/asigalov61/tegridy-tools\n", "!pip install tqdm" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "gradient": { "editing": false, "id": "b8207b76-9514-4c07-95db-95a4742e52c5", "kernelId": "" }, "id": "z7n9vnKmug1J", "scrolled": true }, "outputs": [], "source": [ "#@title Import all needed modules\n", "\n", "print('Loading needed modules. Please wait...')\n", "import os\n", "import copy\n", "import math\n", "import statistics\n", "import random\n", "import pickle\n", "\n", "from collections import Counter\n", "\n", "from tqdm import tqdm\n", "\n", "print('Loading TMIDIX module...')\n", "%cd tegridy-tools/tegridy-tools\n", "\n", "import TMIDIX\n", "\n", "%cd /home/ubuntu/\n", "\n", "print('Done!')\n", "print('Enjoy! :)')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# (Download and untar full Godzilla MIDI Dataset)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## https://huggingface.co/datasets/projectlosangeles/Godzilla-MIDI-Dataset" ] }, { "cell_type": "markdown", "metadata": { "id": "JwrqQeie08t0" }, "source": [ "# (FILE LIST)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "id": "DuVWtdDNcqKh", "scrolled": true }, "outputs": [], "source": [ "filez = TMIDIX.create_files_list(['./Godzilla-MIDI-Dataset/MIDIs/', './Godzilla-Piano-MIDI-Dataset/'])\n", "\n", "TMIDIX.Tegridy_Any_Pickle_File_Writer(filez, '/home/ubuntu/filez')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "id": "qI_adhjojrJ9", "scrolled": true }, "outputs": [], "source": [ "#@title Load file list\n", "filez = TMIDIX.Tegridy_Any_Pickle_File_Reader('/home/ubuntu/filez')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "len(filez)" ] }, { "cell_type": "markdown", "metadata": { "id": "FLxHvO-wlwfU" }, "source": [ "# (PROCESS)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def TMIDIX_MIDI_Processor(midi_file):\n", "\n", " try:\n", " \n", " raw_score = TMIDIX.midi2single_track_ms_score(midi_file)\n", " \n", " escore_notes = TMIDIX.advanced_score_processor(raw_score, return_enhanced_score_notes=True, apply_sustain=True)\n", " \n", " if escore_notes:\n", " \n", " escore_notes = TMIDIX.augment_enhanced_score_notes(escore_notes[0], sort_drums_last=True)\n", " \n", " instruments_list = sorted(set([y[6] for y in escore_notes]))\n", " instruments_list_without_drums = [i for i in instruments_list if i != 128]\n", " \n", " if instruments_list_without_drums and len(escore_notes) > 255:\n", " \n", " escore_notes_without_drums = [e for e in escore_notes if e[3] != 9]\n", " \n", " durs_counts = TMIDIX.escore_notes_durations_counter(escore_notes_without_drums, min_duration=128)\n", " \n", " if (durs_counts[0] / durs_counts[1]) < 0.1 and (durs_counts[2] / durs_counts[1]) < 0.1:\n", " \n", " escore_notes_times = [e[1] for e in escore_notes_without_drums]\n", " \n", " escore_notes_tones = sorted(set([e[4] % 12 for e in escore_notes_without_drums]))\n", " \n", " if len(escore_notes_times) > (len(set(escore_notes_times)) * 1.1) and len(escore_notes_tones) > 4:\n", " \n", " escore_notes_velocities = [e[5] for e in escore_notes]\n", " \n", " avg_escore_notes_velocity = sum(escore_notes_velocities) / len(escore_notes_velocities)\n", " \n", " if avg_escore_notes_velocity < 64:\n", " TMIDIX.adjust_score_velocities(escore_notes, 124)\n", "\n", " dscore = TMIDIX.delta_score_notes(escore_notes)\n", " \n", " dcscore = TMIDIX.chordify_score([d[1:] for d in dscore])\n", "\n", " bad_chords_counts = TMIDIX.count_bad_chords_in_chordified_score(dcscore, pitches_index=3, patches_index=5)\n", "\n", " if (bad_chords_counts[0] / bad_chords_counts[1]) < 0.15:\n", " \n", " #=======================================================\n", " # FINAL PROCESSING\n", " #=======================================================\n", " \n", " melody_chords = [18816]\n", " \n", " #=======================================================\n", " # MAIN PROCESSING CYCLE\n", " #=======================================================\n", " \n", " for i, c in enumerate(dcscore):\n", " \n", " # Outro seq\n", " if len(dcscore)-i == 64 and len(dcscore) > 191:\n", " melody_chords.extend([18817])\n", " \n", " # Delta start-times\n", " \n", " delta_time = c[0][0]\n", " \n", " melody_chords.append(delta_time)\n", " \n", " for e in c:\n", " \n", " #=======================================================\n", " \n", " # Durations\n", " dur = max(1, min(255, e[1]))\n", " \n", " # Patches\n", " pat = max(0, min(128, e[5]))\n", " \n", " # Pitches\n", " ptc = max(1, min(127, e[3]))\n", " \n", " # Velocities\n", " # Calculating octo-velocity\n", " \n", " vel = max(8, min(127, e[4]))\n", " velocity = round(vel / 15)-1\n", " \n", " #=======================================================\n", " # FINAL NOTE SEQ\n", " #=======================================================\n", " \n", " # Writing final note\n", " pat_ptc = (128 * pat) + ptc \n", " dur_vel = (8 * dur) + velocity\n", " \n", " melody_chords.extend([pat_ptc+256, dur_vel+16768]) # 18816\n", "\n", " if len(melody_chords) > 8192:\n", " break\n", " \n", " melody_chords.extend([18818])\n", "\n", " return melody_chords\n", "\n", " \n", " except Exception as ex:\n", " print(midi_file)\n", " print(ex)\n", " return None" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!mkdir DATA" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "print('=' * 70)\n", "print('TMIDIX MIDI Processor')\n", "print('=' * 70)\n", "print('Starting up...')\n", "print('=' * 70)\n", "\n", "###########\n", "\n", "NUMBER_OF_FILES_PER_ITERATION = 25000\n", "\n", "files_count = 0\n", "\n", "print('Processing MIDI files. Please wait...')\n", "print('=' * 70)\n", "\n", "for i in range(0, len(filez), NUMBER_OF_FILES_PER_ITERATION):\n", "\n", " print('=' * 70)\n", " print('Processing block #', (i // NUMBER_OF_FILES_PER_ITERATION)+1, '/', (len(filez) // NUMBER_OF_FILES_PER_ITERATION)+1)\n", " print('=' * 70)\n", "\n", " output = TMIDIX.multiprocessing_wrapper(TMIDIX_MIDI_Processor, filez[i:i+NUMBER_OF_FILES_PER_ITERATION])\n", "\n", " melody_chords_f = set()\n", "\n", " for o in output:\n", " if o:\n", " melody_chords_f.add(tuple(o))\n", "\n", " melody_chords_f = list(melody_chords_f)\n", "\n", " files_count += len(melody_chords_f)\n", " print('SAVING !!!')\n", " print('=' * 70)\n", " print('Saving processed files...')\n", " print('=' * 70)\n", " print('Data check:', min(melody_chords_f[0]), '===', max(melody_chords_f[0]), '===', len(list(set(melody_chords_f[0]))), '===', len(melody_chords_f[0]))\n", " print('=' * 70)\n", " print('Processed so far:', files_count, 'out of', len(filez), '===', files_count / len(filez), 'good files ratio')\n", " print('=' * 70)\n", " count = str(files_count)\n", " TMIDIX.Tegridy_Any_Pickle_File_Writer(melody_chords_f, '/home/ubuntu/DATA/ORPHEUS_INTs_'+count)\n", " print('=' * 70)\n", "\n", "files_count += len(melody_chords_f)\n", "print('SAVING !!!')\n", "print('=' * 70)\n", "print('Saving processed files...')\n", "print('=' * 70)\n", "print('Data check:', min(melody_chords_f[0]), '===', max(melody_chords_f[0]), '===', len(list(set(melody_chords_f[0]))), '===', len(melody_chords_f[0]))\n", "print('=' * 70)\n", "print('Processed so far:', files_count, 'out of', len(filez), '===', files_count / len(filez), 'good files ratio')\n", "print('=' * 70)\n", "count = str(files_count)\n", "TMIDIX.Tegridy_Any_Pickle_File_Writer(melody_chords_f, '/home/ubuntu/DATA/ORPHEUS_INTs_'+count)\n", "print('=' * 70)" ] }, { "cell_type": "markdown", "metadata": { "id": "-ye9rNzOHX90" }, "source": [ "# (TEST INTS)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "train_data1 = melody_chords_f[0]\n", "\n", "print('Sample INTs', train_data1[:15])\n", "\n", "out = train_data1\n", "\n", "if len(out) != 0:\n", " \n", " song = out\n", " song_f = []\n", " \n", " time = 0\n", " dur = 0\n", " vel = 90\n", " pitch = 60\n", " channel = 0\n", " patch = 0\n", "\n", " patches = [-1] * 16\n", "\n", " channels = [0] * 16\n", " channels[9] = 1\n", "\n", " for ss in song:\n", "\n", " if 0 <= ss < 256:\n", "\n", " time += ss * 16\n", "\n", " if 256 <= ss < 16768:\n", "\n", " patch = (ss-256) // 128\n", "\n", " if patch < 128:\n", "\n", " if patch not in patches:\n", " if 0 in channels:\n", " cha = channels.index(0)\n", " channels[cha] = 1\n", " else:\n", " cha = 15\n", "\n", " patches[cha] = patch\n", " channel = patches.index(patch)\n", " else:\n", " channel = patches.index(patch)\n", "\n", " if patch == 128:\n", " channel = 9\n", "\n", " pitch = (ss-256) % 128\n", "\n", "\n", " if 16768 <= ss < 18816:\n", "\n", " dur = ((ss-16768) // 8) * 16\n", " vel = (((ss-16768) % 8)+1) * 15\n", "\n", " song_f.append(['note', time, dur, channel, pitch, vel ])\n", " \n", "patches = [0 if x==-1 else x for x in patches]\n", "\n", "detailed_stats = TMIDIX.Tegridy_ms_SONG_to_MIDI_Converter(song_f,\n", " output_signature = 'Orpheus Music Transformer', \n", " output_file_name = '/home/ubuntu/Orpheus-Music-TransformerComposition', \n", " track_name='Project Los Angeles',\n", " list_of_MIDI_patches=patches\n", " )\n", "\n", "print('Done!')" ] }, { "cell_type": "markdown", "metadata": { "id": "YzCMd94Tu_gz" }, "source": [ "# Congrats! You did it! :)" ] } ], "metadata": { "colab": { "machine_shape": "hm", "private_outputs": true, "provenance": [] }, "gpuClass": "standard", "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 4 }