Fancellu commited on
Commit
a468822
·
verified ·
1 Parent(s): 145ce0a

Upload 5 files

Browse files
Files changed (6) hide show
  1. .gitattributes +1 -0
  2. .gitignore +7 -0
  3. README.md +66 -5
  4. app.py +99 -0
  5. jazz_sample.mp3 +3 -0
  6. requirements.txt +5 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ jazz_sample.mp3 filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ example/*
2
+ *__pycache__*
3
+ test.py
4
+ rename.sh
5
+ flagged/*
6
+ .idea
7
+ .gradio
README.md CHANGED
@@ -1,13 +1,74 @@
1
  ---
2
- title: Piano Transcription To Midi
3
- emoji: 🏢
4
- colorFrom: green
5
  colorTo: green
6
  sdk: gradio
7
  sdk_version: 5.38.0
8
  app_file: app.py
9
  pinned: false
10
- short_description: 'Piano Transcription Tool: Audio->MIDI'
 
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Piano Transcriptor
3
+ emoji: 🎹
4
+ colorFrom: yellow
5
  colorTo: green
6
  sdk: gradio
7
  sdk_version: 5.38.0
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
+ short_description: Audio to MIDI piano transcription tool
12
  ---
13
 
14
+ # Piano Transcription Tool: Audio->MIDI
15
+
16
+ This tool converts audio recordings of piano performances to MIDI files.
17
+
18
+ It uses a deep learning model to detect piano notes in audio and transcribe them to MIDI format.
19
+
20
+ It is useful if you have some piece you want to learn, that they have not provided the sheet music for
21
+
22
+ Once you have the MIDI it is easy to turn into sheet music. Or drag into PianoTeq and learn it that way
23
+
24
+ ## Features
25
+ - Audio to MIDI conversion
26
+ - Support for uploaded audio files
27
+
28
+ ## Usage
29
+
30
+ ### Docker
31
+
32
+ Two Docker configurations are available:
33
+
34
+ #### Standard Version (with GPU support)
35
+
36
+ ```bash
37
+ # Windows
38
+ run_docker.bat
39
+
40
+ # Linux/Mac
41
+ bash run_docker.sh
42
+ ```
43
+
44
+
45
+ #### Minimal Version (CPU-only)
46
+
47
+ ```bash
48
+ # Windows
49
+ run_docker_minimal.bat
50
+
51
+ # Linux/Mac
52
+ bash run_docker_minimal.sh
53
+ ```
54
+
55
+ This builds a smaller image but only supports CPU processing, which is slower for transcription.
56
+
57
+ ### Direct Python Installation
58
+
59
+ Run directly with Python after installing the requirements:
60
+
61
+ ```bash
62
+ pip install -r requirements.txt
63
+ python app.py
64
+ ```
65
+
66
+ ### UI
67
+
68
+ Drag your MP3 or WAV file into the "Upload an audio" component.
69
+
70
+ Click Transcribe
71
+
72
+ You should get an `output.mid` file in the "Download MIDI" component
73
+
74
+ Note the `jazz_sample.mp3` example, a nice 13 second jazz piano clip
app.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import warnings
3
+ import torch
4
+ import librosa
5
+ import huggingface_hub
6
+ import gradio as gr
7
+ from piano_transcription_inference import PianoTranscription, sample_rate
8
+
9
+ # Suppress specific Gradio warning about package URL parsing
10
+ warnings.filterwarnings("ignore", message="unable to parse version details from package URL.")
11
+
12
+ WEIGHTS_PATH = huggingface_hub.snapshot_download(
13
+ "Genius-Society/piano_trans",
14
+ cache_dir="./__pycache__",
15
+ ) + "/CRNN_note_F1=0.9677_pedal_F1=0.9186.pth"
16
+
17
+
18
+ def audio2midi(audio_path: str, cache_dir: str):
19
+ print(f"Loading audio from {audio_path}")
20
+ try:
21
+ audio, _ = librosa.load(audio_path, sr=sample_rate, mono=True)
22
+ print("Audio loaded successfully")
23
+ except Exception as e:
24
+ print(f"Error loading audio: {e}")
25
+ raise
26
+
27
+ device = "cuda" if torch.cuda.is_available() else "cpu"
28
+ print(f"Using device: {device}")
29
+
30
+ transcriptor = PianoTranscription(
31
+ device=device,
32
+ checkpoint_path=WEIGHTS_PATH,
33
+ )
34
+
35
+ midi_path = f"{cache_dir}/output.mid"
36
+ transcriptor.transcribe(audio, midi_path)
37
+
38
+ return midi_path, os.path.basename(audio_path).split(".")[-2].capitalize()
39
+
40
+
41
+ def process_audio(audio_path: str, cache_dir="./__pycache__/uploads"):
42
+ status = "Success"
43
+ midi = None
44
+
45
+ try:
46
+ os.makedirs(cache_dir, exist_ok=True)
47
+
48
+ if not os.path.exists(audio_path):
49
+ raise FileNotFoundError(f"Audio file not found: {audio_path}")
50
+
51
+ file_size = os.path.getsize(audio_path)
52
+ print(f"Audio file size: {file_size} bytes")
53
+
54
+ midi, title = audio2midi(audio_path, cache_dir)
55
+ print(f"MIDI generated successfully: {midi}")
56
+
57
+ except Exception as e:
58
+ import traceback
59
+ status = f"{e}\n{traceback.format_exc()}"
60
+
61
+ if midi and not os.path.exists(midi):
62
+ print(f"Warning: MIDI file does not exist: {midi}")
63
+ midi = None
64
+
65
+ return status, midi
66
+
67
+
68
+ if __name__ == "__main__":
69
+ with gr.Blocks() as iface:
70
+ device = "cuda" if torch.cuda.is_available() else "cpu"
71
+ gr.Markdown("# Piano Transcription Tool: Audio->MIDI")
72
+ gr.Markdown(f"Device: {device}")
73
+ if device == "cpu":
74
+ gr.Markdown("Will run slower on CPU, best on GPU")
75
+
76
+ with gr.Row():
77
+ with gr.Column(scale=1):
78
+ audio_input = gr.Audio(label="Upload an audio", type="filepath")
79
+ submit_btn = gr.Button("Transcribe")
80
+
81
+ with gr.Column(scale=2):
82
+ status_output = gr.Textbox(label="Status", show_copy_button=True)
83
+ midi_file_output = gr.File(label="Download MIDI")
84
+
85
+ submit_btn.click(
86
+ fn=process_audio,
87
+ inputs=audio_input,
88
+ outputs=[status_output, midi_file_output]
89
+ )
90
+
91
+ gr.Examples(
92
+ examples=["jazz_sample.mp3"],
93
+ inputs=audio_input,
94
+ outputs=[status_output, midi_file_output],
95
+ fn=process_audio,
96
+ cache_examples=True
97
+ )
98
+
99
+ iface.launch()
jazz_sample.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8efea40299966bb30ed95226599f5ff764fe8106fe59d5f73b937798248b7644
3
+ size 214420
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ torch
2
+ librosa
3
+ piano_transcription_inference
4
+ gradio>=5.38.0
5
+ huggingface_hub