Spaces:

Fancellu
/

piano_transcription_to_midi

Running

App Files Files Community

Fancellu commited on Jul 19

Commit

a468822

verified ·

1 Parent(s): 145ce0a

Upload 5 files

Browse files

Files changed (6) hide show

.gitattributes +1 -0
.gitignore +7 -0
README.md +66 -5
app.py +99 -0
jazz_sample.mp3 +3 -0
requirements.txt +5 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+jazz_sample.mp3 filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,7 @@

+example/*
+*__pycache__*
+test.py
+rename.sh
+flagged/*
+.idea
+.gradio

README.md CHANGED Viewed

@@ -1,13 +1,74 @@
 ---
-title: Piano Transcription To Midi
-emoji: 🏢
-colorFrom: green
 colorTo: green
 sdk: gradio
 sdk_version: 5.38.0
 app_file: app.py
 pinned: false
-short_description: 'Piano Transcription Tool: Audio->MIDI'
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Piano Transcriptor
+emoji: 🎹
+colorFrom: yellow
 colorTo: green
 sdk: gradio
 sdk_version: 5.38.0
 app_file: app.py
 pinned: false
+license: mit
+short_description: Audio to MIDI piano transcription tool
 ---
+# Piano Transcription Tool: Audio->MIDI
+This tool converts audio recordings of piano performances to MIDI files.
+It uses a deep learning model to detect piano notes in audio and transcribe them to MIDI format.
+It is useful if you have some piece you want to learn, that they have not provided the sheet music for
+Once you have the MIDI it is easy to turn into sheet music. Or drag into PianoTeq and learn it that way
+## Features
+- Audio to MIDI conversion
+- Support for uploaded audio files
+## Usage
+### Docker
+Two Docker configurations are available:
+#### Standard Version (with GPU support)
+```bash
+# Windows
+run_docker.bat
+# Linux/Mac
+bash run_docker.sh
+```
+#### Minimal Version (CPU-only)
+```bash
+# Windows
+run_docker_minimal.bat
+# Linux/Mac
+bash run_docker_minimal.sh
+```
+This builds a smaller image but only supports CPU processing, which is slower for transcription.
+### Direct Python Installation
+Run directly with Python after installing the requirements:
+```bash
+pip install -r requirements.txt
+python app.py
+```
+### UI
+Drag your MP3 or WAV file into the "Upload an audio" component.
+Click Transcribe
+You should get an `output.mid` file in the "Download MIDI" component
+Note the `jazz_sample.mp3` example, a nice 13 second jazz piano clip

app.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import os
+import warnings
+import torch
+import librosa
+import huggingface_hub
+import gradio as gr
+from piano_transcription_inference import PianoTranscription, sample_rate
+# Suppress specific Gradio warning about package URL parsing
+warnings.filterwarnings("ignore", message="unable to parse version details from package URL.")
+WEIGHTS_PATH = huggingface_hub.snapshot_download(
+    "Genius-Society/piano_trans",
+    cache_dir="./__pycache__",
+) + "/CRNN_note_F1=0.9677_pedal_F1=0.9186.pth"
+def audio2midi(audio_path: str, cache_dir: str):
+    print(f"Loading audio from {audio_path}")
+    try:
+        audio, _ = librosa.load(audio_path, sr=sample_rate, mono=True)
+        print("Audio loaded successfully")
+    except Exception as e:
+        print(f"Error loading audio: {e}")
+        raise
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    print(f"Using device: {device}")
+    transcriptor = PianoTranscription(
+        device=device,
+        checkpoint_path=WEIGHTS_PATH,
+    )
+    midi_path = f"{cache_dir}/output.mid"
+    transcriptor.transcribe(audio, midi_path)
+    return midi_path, os.path.basename(audio_path).split(".")[-2].capitalize()
+def process_audio(audio_path: str, cache_dir="./__pycache__/uploads"):
+    status = "Success"
+    midi = None
+    try:
+        os.makedirs(cache_dir, exist_ok=True)
+        if not os.path.exists(audio_path):
+            raise FileNotFoundError(f"Audio file not found: {audio_path}")
+        file_size = os.path.getsize(audio_path)
+        print(f"Audio file size: {file_size} bytes")
+        midi, title = audio2midi(audio_path, cache_dir)
+        print(f"MIDI generated successfully: {midi}")
+    except Exception as e:
+        import traceback
+        status = f"{e}\n{traceback.format_exc()}"
+    if midi and not os.path.exists(midi):
+        print(f"Warning: MIDI file does not exist: {midi}")
+        midi = None
+    return status, midi
+if __name__ == "__main__":
+    with gr.Blocks() as iface:
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        gr.Markdown("# Piano Transcription Tool: Audio->MIDI")
+        gr.Markdown(f"Device: {device}")
+        if device == "cpu":
+            gr.Markdown("Will run slower on CPU, best on GPU")
+        with gr.Row():
+            with gr.Column(scale=1):
+                audio_input = gr.Audio(label="Upload an audio", type="filepath")
+                submit_btn = gr.Button("Transcribe")
+            with gr.Column(scale=2):
+                status_output = gr.Textbox(label="Status", show_copy_button=True)
+                midi_file_output = gr.File(label="Download MIDI")
+        submit_btn.click(
+            fn=process_audio,
+            inputs=audio_input,
+            outputs=[status_output, midi_file_output]
+        )
+        gr.Examples(
+            examples=["jazz_sample.mp3"],
+            inputs=audio_input,
+            outputs=[status_output, midi_file_output],
+            fn=process_audio,
+            cache_examples=True
+        )
+    iface.launch()

jazz_sample.mp3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8efea40299966bb30ed95226599f5ff764fe8106fe59d5f73b937798248b7644
+size 214420

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+torch
+librosa
+piano_transcription_inference
+gradio>=5.38.0
+huggingface_hub