Spaces:

vincentamato
/

ARIA

Running

App Files Files Community

vincentamato commited on Jan 25

Commit

8629f1c

1 Parent(s): 8429ccf

Fixed chart rendering

Browse files

Files changed (3) hide show

app.py +17 -7
aria/aria.py +2 -2
aria/generate.py +0 -61

app.py CHANGED Viewed

@@ -65,6 +65,9 @@ models = {}
 def create_emotion_plot(valence, arousal):
     """Create a valence-arousal plot with the predicted emotion point"""
     fig = plt.figure(figsize=(8, 8), dpi=100)
     ax = fig.add_subplot(111)
@@ -113,7 +116,13 @@ def create_emotion_plot(valence, arousal):
     # Adjust layout with more padding
     plt.tight_layout(pad=1.5)
-    return fig
 def get_model(conditioning_type):
     """Get or initialize model with specified conditioning"""
@@ -168,7 +177,7 @@ def convert_midi_to_wav(midi_path):
         print(f"Error converting MIDI to WAV: {str(e)}")
         return None
-@spaces.GPU(duration=120) # Set duration to 60 seconds for music generation
 def generate_music(image, conditioning_type, gen_len, temperature, top_p, min_instruments):
     """Generate music from input image"""
     model = get_model(conditioning_type)
@@ -208,11 +217,11 @@ def generate_music(image, conditioning_type, gen_len, temperature, top_p, min_in
                 results: "⚠️ Error: Failed to convert MIDI to WAV for playback"
             }
-        # Create emotion plot
-        emotion_fig = create_emotion_plot(valence, arousal)
         return {
-            emotion_chart: emotion_fig,
             midi_output: wav_path,
             results: f"""
             **Model Type:** {conditioning_type}
@@ -335,8 +344,9 @@ with gr.Blocks(title="ARIA - Art to Music Generator", theme=gr.themes.Soft(
             )
         with gr.Column(scale=2):
-            emotion_chart = gr.Plot(
-                label="Predicted Emotions"
             )
             midi_output = gr.Audio(
                 type="filepath",

 def create_emotion_plot(valence, arousal):
     """Create a valence-arousal plot with the predicted emotion point"""
+    # Create figure in a process-safe way
+    plt.switch_backend('Agg')
     fig = plt.figure(figsize=(8, 8), dpi=100)
     ax = fig.add_subplot(111)
     # Adjust layout with more padding
     plt.tight_layout(pad=1.5)
+    # Save to a temporary file and return the path
+    temp_path = os.path.join(os.path.dirname(__file__), "output", "emotion_plot.png")
+    os.makedirs(os.path.dirname(temp_path), exist_ok=True)
+    plt.savefig(temp_path, bbox_inches='tight', dpi=100)
+    plt.close(fig)  # Close the figure to free memory
+    return temp_path
 def get_model(conditioning_type):
     """Get or initialize model with specified conditioning"""
         print(f"Error converting MIDI to WAV: {str(e)}")
         return None
+@spaces.GPU(duration=120)  # Set duration to 120 seconds for music generation
 def generate_music(image, conditioning_type, gen_len, temperature, top_p, min_instruments):
     """Generate music from input image"""
     model = get_model(conditioning_type)
                 results: "⚠️ Error: Failed to convert MIDI to WAV for playback"
             }
+        # Create emotion plot and get its path
+        plot_path = create_emotion_plot(valence, arousal)
         return {
+            emotion_chart: plot_path,
             midi_output: wav_path,
             results: f"""
             **Model Type:** {conditioning_type}
             )
         with gr.Column(scale=2):
+            emotion_chart = gr.Image(
+                label="Predicted Emotions",
+                type="filepath"
             )
             midi_output = gr.Audio(
                 type="filepath",

aria/aria.py CHANGED Viewed

@@ -15,7 +15,7 @@ sys.path.append(MIDI_EMOTION_PATH)
 class ARIA:
     """ARIA model that generates music from images based on emotional content."""
-    @spaces.GPU(duration=20) # Model loading should be quick
     def __init__(
         self,
         image_model_checkpoint: str,
@@ -60,7 +60,7 @@ class ARIA:
         self.midi_model.load_state_dict(torch.load(model_fp, map_location=self.device, weights_only=True))
         self.midi_model.eval()
-    @spaces.GPU(duration=120)
     @torch.inference_mode()  # More efficient than no_grad for inference
     def generate(
         self,

 class ARIA:
     """ARIA model that generates music from images based on emotional content."""
+    @spaces.GPU(duration=10) # Model loading should be quick
     def __init__(
         self,
         image_model_checkpoint: str,
         self.midi_model.load_state_dict(torch.load(model_fp, map_location=self.device, weights_only=True))
         self.midi_model.eval()
+    @spaces.GPU(duration=60)
     @torch.inference_mode()  # More efficient than no_grad for inference
     def generate(
         self,

aria/generate.py DELETED Viewed

@@ -1,61 +0,0 @@
-import argparse
-from src.models.aria.aria import ARIA
-def main():
-    parser = argparse.ArgumentParser(description="Generate music from images based on emotional content")
-    parser.add_argument("--image", type=str, required=True,
-                        help="Path to input image")
-    parser.add_argument("--image_model_checkpoint", type=str, required=True,
-                        help="Path to image emotion model checkpoint")
-    parser.add_argument("--midi_model_dir", type=str, required=True,
-                        help="Path to midi emotion model directory")
-    parser.add_argument("--out_dir", type=str, default="output",
-                        help="Directory to save generated MIDI")
-    parser.add_argument("--gen_len", type=int, default=512,
-                        help="Length of generation in tokens")
-    parser.add_argument("--temperature", type=float, nargs=2, default=[1.2, 1.2],
-                        help="Temperature for sampling [note_temp, rest_temp]")
-    parser.add_argument("--top_k", type=int, default=-1,
-                        help="Top-k sampling (-1 to disable)")
-    parser.add_argument("--top_p", type=float, default=0.7,
-                        help="Top-p sampling threshold")
-    parser.add_argument("--min_instruments", type=int, default=1,
-                        help="Minimum number of instruments required")
-    parser.add_argument("--cpu", action="store_true",
-                        help="Force CPU inference")
-    parser.add_argument("--conditioning", type=str, required=True,
-                        choices=["none", "discrete_token", "continuous_token", "continuous_concat"],
-                        help="Type of conditioning to use")
-    parser.add_argument("--batch_size", type=int, default=1,
-                        help="Number of samples to generate (not used for image input)")
-    args = parser.parse_args()
-    # Initialize model
-    model = ARIA(
-        image_model_checkpoint=args.image_model_checkpoint,
-        midi_model_dir=args.midi_model_dir,
-        conditioning=args.conditioning,
-        device="cpu" if args.cpu else None
-    )
-    # Generate music
-    valence, arousal, midi_path = model.generate(
-        image_path=args.image,
-        out_dir=args.out_dir,
-        gen_len=args.gen_len,
-        temperature=args.temperature,
-        top_k=args.top_k,
-        top_p=args.top_p,
-        min_instruments=args.min_instruments
-    )
-    # Print results
-    print(f"\nPredicted emotions:")
-    print(f"Valence: {valence:.3f} (negative -> positive)")
-    print(f"Arousal: {arousal:.3f} (calm -> excited)")
-    print(f"\nGenerated MIDI saved to: {midi_path}")
-if __name__ == "__main__":
-    main()