Spaces:

ACE-Step
/

Ace-Step-v1.5

Running on A100

App Files Files Community

ChuxiJ commited on Jan 11

Commit

388b5af

1 Parent(s): 03f73c6

fix bugs for save

Browse files

Files changed (3) hide show

acestep/audio_utils.py +1 -4
acestep/gradio_ui/events/__init__.py +75 -39
acestep/gradio_ui/events/results_handlers.py +13 -102

acestep/audio_utils.py CHANGED Viewed

@@ -89,15 +89,12 @@ class AudioSaver:
         try:
             if format == "mp3":
                 # MP3 uses ffmpeg backend
-                from torchaudio.io import CodecConfig
-                config = CodecConfig(bit_rate=192000, compression_level=1)
                 torchaudio.save(
                     str(output_path),
                     audio_tensor,
                     sample_rate,
                     channels_first=True,
                     backend='ffmpeg',
-                    compression=config,
                 )
             elif format in ["flac", "wav"]:
                 # FLAC and WAV use soundfile backend (fastest)
@@ -106,7 +103,7 @@ class AudioSaver:
                     audio_tensor,
                     sample_rate,
                     channels_first=True,
-                    backend='ffmpeg',
                 )
             else:
                 # Other formats use default backend

         try:
             if format == "mp3":
                 # MP3 uses ffmpeg backend
                 torchaudio.save(
                     str(output_path),
                     audio_tensor,
                     sample_rate,
                     channels_first=True,
                     backend='ffmpeg',
                 )
             elif format in ["flac", "wav"]:
                 # FLAC and WAV use soundfile backend (fastest)
                     audio_tensor,
                     sample_rate,
                     channels_first=True,
+                    backend='soundfile',
                 )
             else:
                 # Other formats use default backend

acestep/gradio_ui/events/__init__.py CHANGED Viewed

@@ -254,48 +254,84 @@ def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, datase
         ]
     )
-    # Save buttons for audio 1 and 2
-    for btn_idx, btn_key in [(1, "save_btn_1"), (2, "save_btn_2")]:
-        results_section[btn_key].click(
-            fn=res_h.save_audio_and_metadata,
             inputs=[
                 results_section[f"generated_audio_{btn_idx}"],
-                generation_section["task_type"],
-                generation_section["captions"],
-                generation_section["lyrics"],
-                generation_section["vocal_language"],
-                generation_section["bpm"],
-                generation_section["key_scale"],
-                generation_section["time_signature"],
-                generation_section["audio_duration"],
-                generation_section["batch_size_input"],
-                generation_section["inference_steps"],
-                generation_section["guidance_scale"],
-                generation_section["seed"],
-                generation_section["random_seed_checkbox"],
-                generation_section["use_adg"],
-                generation_section["cfg_interval_start"],
-                generation_section["cfg_interval_end"],
-                generation_section["audio_format"],
-                generation_section["lm_temperature"],
-                generation_section["lm_cfg_scale"],
-                generation_section["lm_top_k"],
-                generation_section["lm_top_p"],
-                generation_section["lm_negative_prompt"],
-                generation_section["use_cot_caption"],
-                generation_section["use_cot_language"],
-                generation_section["audio_cover_strength"],
-                generation_section["think_checkbox"],
-                generation_section["text2music_audio_code_string"],
-                generation_section["repainting_start"],
-                generation_section["repainting_end"],
-                generation_section["track_name"],
-                generation_section["complete_track_classes"],
-                results_section["lm_metadata_state"],
             ],
-            outputs=[gr.File(label="Download Package", visible=False)]
-        )
     # ========== Send to SRC Handlers ==========
     for btn_idx in range(1, 9):
         results_section[f"send_to_src_btn_{btn_idx}"].click(

         ]
     )
+    # Save buttons for all 8 audio outputs
+    download_existing_js = """(current_audio, batch_files) => {
+    // Debug: print what the input actually is
+    console.log("👉 [Debug] Current Audio Input:", current_audio);
+    // 1. Safety check
+    if (!current_audio) {
+        console.warn("⚠️ No audio selected or audio is empty.");
+        return;
+    }
+    if (!batch_files || !Array.isArray(batch_files)) {
+        console.warn("⚠️ Batch file list is empty/not ready.");
+        return;
+    }
+    // 2. Smartly extract path string
+    let pathString = "";
+    if (typeof current_audio === "string") {
+        // Case A: direct path string received
+        pathString = current_audio;
+    } else if (typeof current_audio === "object") {
+        // Case B: an object is received, try common properties
+        // Gradio file objects usually have path, url, or name
+        pathString = current_audio.path || current_audio.name || current_audio.url || "";
+    }
+    if (!pathString) {
+        console.error("❌ Error: Could not extract a valid path string from input.", current_audio);
+        return;
+    }
+    // 3. Extract Key (UUID)
+    // Path could be /tmp/.../uuid.mp3 or url like /file=.../uuid.mp3
+    let filename = pathString.split(/[\\\\/]/).pop(); // get the filename
+    let key = filename.split('.')[0]; // get UUID without extension
+    console.log(`🔑 Key extracted: ${key}`);
+    // 4. Find matching file(s) in the list
+    let targets = batch_files.filter(f => {
+        // Also extract names from batch_files objects
+        // f usually contains name (backend path) and orig_name (download name)
+        const fPath = f.name || f.path || "";
+        return fPath.includes(key);
+    });
+    if (targets.length === 0) {
+        console.warn("❌ No matching files found in batch list for key:", key);
+        alert("Batch list does not contain this file yet. Please wait for generation to finish.");
+        return;
+    }
+    // 5. Trigger download(s)
+    console.log(`🎯 Found ${targets.length} files to download.`);
+    targets.forEach((f, index) => {
+        setTimeout(() => {
+            const a = document.createElement('a');
+            // Prefer url (frontend-accessible link), otherwise try data
+            a.href = f.url || f.data;
+            a.download = f.orig_name || "download";
+            a.style.display = 'none';
+            document.body.appendChild(a);
+            a.click();
+            document.body.removeChild(a);
+        }, index * 1000); // 300ms interval to avoid browser blocking
+    });
+}
+"""
+    for btn_idx in range(1, 9):
+        results_section[f"save_btn_{btn_idx}"].click(
+            fn=None,
             inputs=[
                 results_section[f"generated_audio_{btn_idx}"],
+                results_section["generated_audio_batch"],
             ],
+        js=download_existing_js  # Run the above JS
+    )
     # ========== Send to SRC Handlers ==========
     for btn_idx in range(1, 9):
         results_section[f"send_to_src_btn_{btn_idx}"].click(

acestep/gradio_ui/events/results_handlers.py CHANGED Viewed

@@ -180,99 +180,6 @@ def update_navigation_buttons(current_batch, total_batches):
     can_go_next = current_batch < total_batches - 1
     return can_go_previous, can_go_next
-def save_audio_and_metadata(
-    audio_path, task_type, captions, lyrics, vocal_language, bpm, key_scale, time_signature, audio_duration,
-    batch_size_input, inference_steps, guidance_scale, seed, random_seed_checkbox,
-    use_adg, cfg_interval_start, cfg_interval_end, audio_format,
-    lm_temperature, lm_cfg_scale, lm_top_k, lm_top_p, lm_negative_prompt,
-    use_cot_caption, use_cot_language, audio_cover_strength,
-    think_checkbox, text2music_audio_code_string, repainting_start, repainting_end,
-    track_name, complete_track_classes, lm_metadata
-):
-    """Save audio file and its metadata as a zip package"""
-    if audio_path is None:
-        gr.Warning(t("messages.no_audio_to_save"))
-        return None
-    try:
-        # Create metadata dictionary
-        metadata = {
-            "saved_at": datetime.datetime.now().isoformat(),
-            "task_type": task_type,
-            "caption": captions or "",
-            "lyrics": lyrics or "",
-            "vocal_language": vocal_language,
-            "bpm": bpm if bpm is not None else None,
-            "keyscale": key_scale or "",
-            "timesignature": time_signature or "",
-            "duration": audio_duration if audio_duration is not None else -1,
-            "batch_size": batch_size_input,
-            "inference_steps": inference_steps,
-            "guidance_scale": guidance_scale,
-            "seed": seed,
-            "random_seed": False,  # Disable random seed for reproducibility
-            "use_adg": use_adg,
-            "cfg_interval_start": cfg_interval_start,
-            "cfg_interval_end": cfg_interval_end,
-            "audio_format": audio_format,
-            "lm_temperature": lm_temperature,
-            "lm_cfg_scale": lm_cfg_scale,
-            "lm_top_k": lm_top_k,
-            "lm_top_p": lm_top_p,
-            "lm_negative_prompt": lm_negative_prompt,
-            "use_cot_caption": use_cot_caption,
-            "use_cot_language": use_cot_language,
-            "audio_cover_strength": audio_cover_strength,
-            "think": think_checkbox,
-            "audio_codes": text2music_audio_code_string or "",
-            "repainting_start": repainting_start,
-            "repainting_end": repainting_end,
-            "track_name": track_name,
-            "complete_track_classes": complete_track_classes or [],
-        }
-        # Add LM-generated metadata if available
-        if lm_metadata:
-            metadata["lm_generated_metadata"] = lm_metadata
-        # Generate timestamp and base name
-        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-        # Extract audio filename extension
-        audio_ext = os.path.splitext(audio_path)[1]
-        # Create temporary directory for packaging
-        temp_dir = tempfile.mkdtemp()
-        # Save JSON metadata
-        json_path = os.path.join(temp_dir, f"metadata_{timestamp}.json")
-        with open(json_path, 'w', encoding='utf-8') as f:
-            json.dump(metadata, f, indent=2, ensure_ascii=False)
-        # Copy audio file
-        audio_copy_path = os.path.join(temp_dir, f"audio_{timestamp}{audio_ext}")
-        shutil.copy2(audio_path, audio_copy_path)
-        # Create zip file
-        zip_path = os.path.join(tempfile.gettempdir(), f"music_package_{timestamp}.zip")
-        with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
-            zipf.write(audio_copy_path, os.path.basename(audio_copy_path))
-            zipf.write(json_path, os.path.basename(json_path))
-        # Clean up temp directory
-        shutil.rmtree(temp_dir)
-        gr.Info(t("messages.save_success", filename=os.path.basename(zip_path)))
-        return zip_path
-    except Exception as e:
-        gr.Warning(t("messages.save_failed", error=str(e)))
-        import traceback
-        traceback.print_exc()
-        return None
 def send_audio_to_src_with_metadata(audio_file, lm_metadata):
     """Send generated audio file to src_audio input and populate metadata fields
@@ -455,16 +362,17 @@ def generate_with_progress(
     align_plot_2 = None
     updated_audio_codes = text2music_audio_code_string if not think_checkbox else ""
     if not result.success:
-        # Build generation_info string for error case
-        generation_info = _build_generation_info(
-            lm_metadata=lm_generated_metadata,
-            time_costs=time_costs,
-            seed_value=seed_value_for_ui,
-            inference_steps=inference_steps,
-            num_audios=0,
-        )
-        yield (None,) * 8 + (None, generation_info, result.status_message) + (gr.skip(),) * 25
         return
     audios = result.audios
@@ -480,8 +388,11 @@ def generate_with_progress(
             json_path = os.path.join(temp_dir, f"{key}.json")
             audio_path = os.path.join(temp_dir, f"{key}.{audio_format}")
             save_audio(audio_data=audio_tensor, output_path=audio_path, sample_rate=sample_rate, format=audio_format, channels_first=True)
             audio_outputs[i] = audio_path
             all_audio_paths.append(audio_path)
             code_str = audio_params.get("audio_codes", "")
             final_codes_list[i] = code_str

     can_go_next = current_batch < total_batches - 1
     return can_go_previous, can_go_next
 def send_audio_to_src_with_metadata(audio_file, lm_metadata):
     """Send generated audio file to src_audio input and populate metadata fields
     align_plot_2 = None
     updated_audio_codes = text2music_audio_code_string if not think_checkbox else ""
+    # Build initial generation_info (will be updated with post-processing times at the end)
+    generation_info = _build_generation_info(
+        lm_metadata=lm_generated_metadata,
+        time_costs=time_costs,
+        seed_value=seed_value_for_ui,
+        inference_steps=inference_steps,
+        num_audios=len(result.audios) if result.success else 0,
+    )
     if not result.success:
+        yield (None,) * 8 + (None, generation_info, result.status_message) + (gr.skip(),) * 26
         return
     audios = result.audios
             json_path = os.path.join(temp_dir, f"{key}.json")
             audio_path = os.path.join(temp_dir, f"{key}.{audio_format}")
             save_audio(audio_data=audio_tensor, output_path=audio_path, sample_rate=sample_rate, format=audio_format, channels_first=True)
+            with open(json_path, 'w', encoding='utf-8') as f:
+                json.dump(audio_params, f, indent=2, ensure_ascii=False)
             audio_outputs[i] = audio_path
             all_audio_paths.append(audio_path)
+            all_audio_paths.append(json_path)
             code_str = audio_params.get("audio_codes", "")
             final_codes_list[i] = code_str