# Orpheus Music Transformer Training Dataset Maker (ver. 1.0)

***

Powered by tegridy-tools: https://github.com/asigalov61/tegridy-tools

***

#### Project Los Angeles

#### Tegridy Code 2025

***

# (SETUP ENVIRONMENT)

In [None]:
#@title Install all dependencies (run only once per session)

!git clone https://github.com/asigalov61/tegridy-tools
!pip install tqdm

In [None]:
#@title Import all needed modules

print('Loading needed modules. Please wait...')
import os
import copy
import math
import statistics
import random
import pickle

from collections import Counter

from tqdm import tqdm

print('Loading TMIDIX module...')
%cd tegridy-tools/tegridy-tools

import TMIDIX

%cd /home/ubuntu/

print('Done!')
print('Enjoy! :)')

# (Download and untar full Godzilla MIDI Dataset)

## https://huggingface.co/datasets/projectlosangeles/Godzilla-MIDI-Dataset

# (FILE LIST)

In [None]:
filez = TMIDIX.create_files_list(['./Godzilla-MIDI-Dataset/MIDIs/', './Godzilla-Piano-MIDI-Dataset/'])

TMIDIX.Tegridy_Any_Pickle_File_Writer(filez, '/home/ubuntu/filez')

In [None]:
#@title Load file list
filez = TMIDIX.Tegridy_Any_Pickle_File_Reader('/home/ubuntu/filez')

In [None]:
len(filez)

# (PROCESS)

In [None]:
def TMIDIX_MIDI_Processor(midi_file):

 try:
 
 raw_score = TMIDIX.midi2single_track_ms_score(midi_file)
 
 escore_notes = TMIDIX.advanced_score_processor(raw_score, return_enhanced_score_notes=True, apply_sustain=True)
 
 if escore_notes:
 
 escore_notes = TMIDIX.augment_enhanced_score_notes(escore_notes[0], sort_drums_last=True)
 
 instruments_list = sorted(set([y[6] for y in escore_notes]))
 instruments_list_without_drums = [i for i in instruments_list if i != 128]
 
 if instruments_list_without_drums and len(escore_notes) > 255:
 
 escore_notes_without_drums = [e for e in escore_notes if e[3] != 9]
 
 durs_counts = TMIDIX.escore_notes_durations_counter(escore_notes_without_drums, min_duration=128)
 
 if (durs_counts[0] / durs_counts[1]) < 0.1 and (durs_counts[2] / durs_counts[1]) < 0.1:
 
 escore_notes_times = [e[1] for e in escore_notes_without_drums]
 
 escore_notes_tones = sorted(set([e[4] % 12 for e in escore_notes_without_drums]))
 
 if len(escore_notes_times) > (len(set(escore_notes_times)) * 1.1) and len(escore_notes_tones) > 4:
 
 escore_notes_velocities = [e[5] for e in escore_notes]
 
 avg_escore_notes_velocity = sum(escore_notes_velocities) / len(escore_notes_velocities)
 
 if avg_escore_notes_velocity < 64:
 TMIDIX.adjust_score_velocities(escore_notes, 124)

 dscore = TMIDIX.delta_score_notes(escore_notes)
 
 dcscore = TMIDIX.chordify_score([d[1:] for d in dscore])

 bad_chords_counts = TMIDIX.count_bad_chords_in_chordified_score(dcscore, pitches_index=3, patches_index=5)

 if (bad_chords_counts[0] / bad_chords_counts[1]) < 0.15:
 
 #=======================================================
 # FINAL PROCESSING
 #=======================================================
 
 melody_chords = [18816]
 
 #=======================================================
 # MAIN PROCESSING CYCLE
 #=======================================================
 
 for i, c in enumerate(dcscore):
 
 # Outro seq
 if len(dcscore)-i == 64 and len(dcscore) > 191:
 melody_chords.extend([18817])
 
 # Delta start-times
 
 delta_time = c[0][0]
 
 melody_chords.append(delta_time)
 
 for e in c:
 
 #=======================================================
 
 # Durations
 dur = max(1, min(255, e[1]))
 
 # Patches
 pat = max(0, min(128, e[5]))
 
 # Pitches
 ptc = max(1, min(127, e[3]))
 
 # Velocities
 # Calculating octo-velocity
 
 vel = max(8, min(127, e[4]))
 velocity = round(vel / 15)-1
 
 #=======================================================
 # FINAL NOTE SEQ
 #=======================================================
 
 # Writing final note
 pat_ptc = (128 * pat) + ptc 
 dur_vel = (8 * dur) + velocity
 
 melody_chords.extend([pat_ptc+256, dur_vel+16768]) # 18816

 if len(melody_chords) > 8192:
 break
 
 melody_chords.extend([18818])

 return melody_chords

 
 except Exception as ex:
 print(midi_file)
 print(ex)
 return None

In [None]:
!mkdir DATA

In [None]:
print('=' * 70)
print('TMIDIX MIDI Processor')
print('=' * 70)
print('Starting up...')
print('=' * 70)

###########

NUMBER_OF_FILES_PER_ITERATION = 25000

files_count = 0

print('Processing MIDI files. Please wait...')
print('=' * 70)

for i in range(0, len(filez), NUMBER_OF_FILES_PER_ITERATION):

 print('=' * 70)
 print('Processing block #', (i // NUMBER_OF_FILES_PER_ITERATION)+1, '/', (len(filez) // NUMBER_OF_FILES_PER_ITERATION)+1)
 print('=' * 70)

 output = TMIDIX.multiprocessing_wrapper(TMIDIX_MIDI_Processor, filez[i:i+NUMBER_OF_FILES_PER_ITERATION])

 melody_chords_f = set()

 for o in output:
 if o:
 melody_chords_f.add(tuple(o))

 melody_chords_f = list(melody_chords_f)

 files_count += len(melody_chords_f)
 print('SAVING !!!')
 print('=' * 70)
 print('Saving processed files...')
 print('=' * 70)
 print('Data check:', min(melody_chords_f[0]), '===', max(melody_chords_f[0]), '===', len(list(set(melody_chords_f[0]))), '===', len(melody_chords_f[0]))
 print('=' * 70)
 print('Processed so far:', files_count, 'out of', len(filez), '===', files_count / len(filez), 'good files ratio')
 print('=' * 70)
 count = str(files_count)
 TMIDIX.Tegridy_Any_Pickle_File_Writer(melody_chords_f, '/home/ubuntu/DATA/ORPHEUS_INTs_'+count)
 print('=' * 70)

files_count += len(melody_chords_f)
print('SAVING !!!')
print('=' * 70)
print('Saving processed files...')
print('=' * 70)
print('Data check:', min(melody_chords_f[0]), '===', max(melody_chords_f[0]), '===', len(list(set(melody_chords_f[0]))), '===', len(melody_chords_f[0]))
print('=' * 70)
print('Processed so far:', files_count, 'out of', len(filez), '===', files_count / len(filez), 'good files ratio')
print('=' * 70)
count = str(files_count)
TMIDIX.Tegridy_Any_Pickle_File_Writer(melody_chords_f, '/home/ubuntu/DATA/ORPHEUS_INTs_'+count)
print('=' * 70)

# (TEST INTS)

In [None]:
train_data1 = melody_chords_f[0]

print('Sample INTs', train_data1[:15])

out = train_data1

if len(out) != 0:
 
 song = out
 song_f = []
 
 time = 0
 dur = 0
 vel = 90
 pitch = 60
 channel = 0
 patch = 0

 patches = [-1] * 16

 channels = [0] * 16
 channels[9] = 1

 for ss in song:

 if 0 <= ss < 256:

 time += ss * 16

 if 256 <= ss < 16768:

 patch = (ss-256) // 128

 if patch < 128:

 if patch not in patches:
 if 0 in channels:
 cha = channels.index(0)
 channels[cha] = 1
 else:
 cha = 15

 patches[cha] = patch
 channel = patches.index(patch)
 else:
 channel = patches.index(patch)

 if patch == 128:
 channel = 9

 pitch = (ss-256) % 128


 if 16768 <= ss < 18816:

 dur = ((ss-16768) // 8) * 16
 vel = (((ss-16768) % 8)+1) * 15

 song_f.append(['note', time, dur, channel, pitch, vel ])
 
patches = [0 if x==-1 else x for x in patches]

detailed_stats = TMIDIX.Tegridy_ms_SONG_to_MIDI_Converter(song_f,
 output_signature = 'Orpheus Music Transformer', 
 output_file_name = '/home/ubuntu/Orpheus-Music-TransformerComposition', 
 track_name='Project Los Angeles',
 list_of_MIDI_patches=patches
 )

print('Done!')

# Congrats! You did it! :)