Spaces:

ACE-Step
/

Ace-Step-v1.5

Running on A100

App Files Files Community

ChuxiJ commited on 21 days ago

Commit

0b990cd

1 Parent(s): fa05c34

refact ui

Browse files

Files changed (12) hide show

README.md +1 -1
acestep/gradio_ui/events/__init__.py +192 -14
acestep/gradio_ui/events/generation_handlers.py +99 -28
acestep/gradio_ui/events/results_handlers.py +30 -89
acestep/gradio_ui/i18n/en.json +2 -0
acestep/gradio_ui/i18n/ja.json +2 -0
acestep/gradio_ui/i18n/zh.json +2 -0
acestep/gradio_ui/interfaces/__init__.py +10 -2
acestep/gradio_ui/interfaces/generation.py +375 -448
acestep/gradio_ui/interfaces/result.py +150 -105
acestep/handler.py +67 -35
app.py +121 -41

README.md CHANGED Viewed

@@ -16,7 +16,7 @@ short_description: Music Generation Foundation Model v1.5
     <a href="https://ace-step-v1.5.github.io">Project</a> |
     <a href="https://huggingface.co/collections/ACE-Step/ace-step-15">Hugging Face</a> |
     <a href="https://modelscope.cn/models/ACE-Step/ACE-Step-v1-5">ModelScope</a> |
-    <a href="https://huggingface.co/spaces/ACE-Step/ACE-Step-1.5">Space Demo</a> |
     <a href="https://discord.gg/PeWDxrkdj7">Discord</a> |
     <a href="https://arxiv.org/abs/2506.00045">Technical Report</a>
 </p>

     <a href="https://ace-step-v1.5.github.io">Project</a> |
     <a href="https://huggingface.co/collections/ACE-Step/ace-step-15">Hugging Face</a> |
     <a href="https://modelscope.cn/models/ACE-Step/ACE-Step-v1-5">ModelScope</a> |
+    <a href="https://huggingface.co/spaces/ACE-Step/Ace-Step-v1.5">Space Demo</a> |
     <a href="https://discord.gg/PeWDxrkdj7">Discord</a> |
     <a href="https://arxiv.org/abs/2506.00045">Technical Report</a>
 </p>

acestep/gradio_ui/events/__init__.py CHANGED Viewed

@@ -12,8 +12,20 @@ from . import training_handlers as train_h
 from acestep.gradio_ui.i18n import t
-def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, dataset_section, generation_section, results_section):
-    """Setup event handlers connecting UI components and business logic"""
     # ========== Dataset Handlers ==========
     dataset_section["import_dataset_btn"].click(
@@ -260,17 +272,42 @@ def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, datase
         ]
     )
-    # ========== Simple/Custom Mode Toggle ==========
     generation_section["generation_mode"].change(
         fn=gen_h.handle_generation_mode_change,
         inputs=[generation_section["generation_mode"]],
         outputs=[
             generation_section["simple_mode_group"],
-            generation_section["caption_accordion"],
-            generation_section["lyrics_accordion"],
             generation_section["generate_btn"],
             generation_section["simple_sample_created"],
-            generation_section["optional_params_accordion"],
         ]
     )
@@ -451,10 +488,28 @@ def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, datase
             ],
         js=download_existing_js  # Run the above JS
     )
-    # ========== Send to SRC Handlers ==========
     for btn_idx in range(1, 9):
-        results_section[f"send_to_src_btn_{btn_idx}"].click(
-            fn=res_h.send_audio_to_src_with_metadata,
             inputs=[
                 results_section[f"generated_audio_{btn_idx}"],
                 results_section["lm_metadata_state"]
@@ -468,7 +523,50 @@ def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, datase
                 generation_section["key_scale"],
                 generation_section["vocal_language"],
                 generation_section["time_signature"],
-                results_section["is_format_caption_state"]
             ]
         )
@@ -519,12 +617,84 @@ def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, datase
             ]
         )
-    def generation_wrapper(*args):
-        yield from res_h.generate_with_batch_management(dit_handler, llm_handler, *args)
     # ========== Generation Handler ==========
     generation_section["generate_btn"].click(
         fn=generation_wrapper,
         inputs=[
             generation_section["captions"],
             generation_section["lyrics"],
             generation_section["bpm"],
@@ -634,8 +804,12 @@ def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, datase
             results_section["restore_params_btn"],
         ]
     ).then(
-        fn=lambda *args: res_h.generate_next_batch_background(dit_handler, llm_handler, *args),
         inputs=[
             generation_section["autogen_checkbox"],
             results_section["generation_params_state"],
             results_section["current_batch_index"],
@@ -819,8 +993,12 @@ def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, datase
             results_section["restore_params_btn"],
         ]
     ).then(
-        fn=lambda *args: res_h.generate_next_batch_background(dit_handler, llm_handler, *args),
         inputs=[
             generation_section["autogen_checkbox"],
             results_section["generation_params_state"],
             results_section["current_batch_index"],

 from acestep.gradio_ui.i18n import t
+def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, dataset_section, generation_section, results_section, init_params=None):
+    """Setup event handlers connecting UI components and business logic
+    Args:
+        init_params: Dictionary containing initialization parameters including:
+            - dit_handler_2: Optional second DiT handler for multi-model setup
+            - available_dit_models: List of available DiT model names
+            - config_path: Primary model config path
+            - config_path_2: Secondary model config path (if available)
+    """
+    # Get secondary DiT handler from init_params (for multi-model support)
+    dit_handler_2 = init_params.get('dit_handler_2') if init_params else None
+    config_path_1 = init_params.get('config_path', '') if init_params else ''
+    config_path_2 = init_params.get('config_path_2', '') if init_params else ''
     # ========== Dataset Handlers ==========
     dataset_section["import_dataset_btn"].click(
         ]
     )
+    # ========== Generation Mode Toggle (Simple/Custom/Cover/Repaint) ==========
     generation_section["generation_mode"].change(
         fn=gen_h.handle_generation_mode_change,
         inputs=[generation_section["generation_mode"]],
         outputs=[
             generation_section["simple_mode_group"],
+            generation_section["custom_mode_content"],
+            generation_section["cover_mode_group"],
+            generation_section["repainting_group"],
+            generation_section["task_type"],
             generation_section["generate_btn"],
             generation_section["simple_sample_created"],
+            generation_section["src_audio_group"],
+            generation_section["audio_cover_strength"],
+        ]
+    )
+    # ========== Process Source Audio Button ==========
+    # Combines Convert to Codes + Transcribe in one step
+    generation_section["process_src_btn"].click(
+        fn=lambda src, debug: gen_h.process_source_audio(dit_handler, llm_handler, src, debug),
+        inputs=[
+            generation_section["src_audio"],
+            generation_section["constrained_decoding_debug"]
+        ],
+        outputs=[
+            generation_section["text2music_audio_code_string"],
+            results_section["status_output"],
+            generation_section["captions"],
+            generation_section["lyrics"],
+            generation_section["bpm"],
+            generation_section["audio_duration"],
+            generation_section["key_scale"],
+            generation_section["vocal_language"],
+            generation_section["time_signature"],
+            results_section["is_format_caption_state"],
         ]
     )
             ],
         js=download_existing_js  # Run the above JS
     )
+    # ========== Send to Cover Handlers ==========
+    def send_to_cover_handler(audio_file, lm_metadata):
+        """Send audio to cover mode and switch to cover"""
+        if audio_file is None:
+            return (gr.skip(),) * 11
+        return (
+            audio_file,      # src_audio
+            gr.skip(),       # bpm
+            gr.skip(),       # captions
+            gr.skip(),       # lyrics
+            gr.skip(),       # audio_duration
+            gr.skip(),       # key_scale
+            gr.skip(),       # vocal_language
+            gr.skip(),       # time_signature
+            gr.skip(),       # is_format_caption_state
+            "cover",         # generation_mode - switch to cover
+            "cover",         # task_type - set to cover
+        )
     for btn_idx in range(1, 9):
+        results_section[f"send_to_cover_btn_{btn_idx}"].click(
+            fn=send_to_cover_handler,
             inputs=[
                 results_section[f"generated_audio_{btn_idx}"],
                 results_section["lm_metadata_state"]
                 generation_section["key_scale"],
                 generation_section["vocal_language"],
                 generation_section["time_signature"],
+                results_section["is_format_caption_state"],
+                generation_section["generation_mode"],
+                generation_section["task_type"],
+            ]
+        )
+    # ========== Send to Repaint Handlers ==========
+    def send_to_repaint_handler(audio_file, lm_metadata):
+        """Send audio to repaint mode and switch to repaint"""
+        if audio_file is None:
+            return (gr.skip(),) * 11
+        return (
+            audio_file,      # src_audio
+            gr.skip(),       # bpm
+            gr.skip(),       # captions
+            gr.skip(),       # lyrics
+            gr.skip(),       # audio_duration
+            gr.skip(),       # key_scale
+            gr.skip(),       # vocal_language
+            gr.skip(),       # time_signature
+            gr.skip(),       # is_format_caption_state
+            "repaint",       # generation_mode - switch to repaint
+            "repaint",       # task_type - set to repaint
+        )
+    for btn_idx in range(1, 9):
+        results_section[f"send_to_repaint_btn_{btn_idx}"].click(
+            fn=send_to_repaint_handler,
+            inputs=[
+                results_section[f"generated_audio_{btn_idx}"],
+                results_section["lm_metadata_state"]
+            ],
+            outputs=[
+                generation_section["src_audio"],
+                generation_section["bpm"],
+                generation_section["captions"],
+                generation_section["lyrics"],
+                generation_section["audio_duration"],
+                generation_section["key_scale"],
+                generation_section["vocal_language"],
+                generation_section["time_signature"],
+                results_section["is_format_caption_state"],
+                generation_section["generation_mode"],
+                generation_section["task_type"],
             ]
         )
             ]
         )
+    def generation_wrapper(selected_model, generation_mode, simple_query_input, simple_vocal_language, *args):
+        """Wrapper that selects the appropriate DiT handler based on model selection"""
+        # Convert args to list for modification
+        args_list = list(args)
+        # args order (after simple mode params):
+        # captions (0), lyrics (1), bpm (2), key_scale (3), time_signature (4), vocal_language (5),
+        # inference_steps (6), guidance_scale (7), random_seed_checkbox (8), seed (9),
+        # reference_audio (10), audio_duration (11), batch_size_input (12), src_audio (13),
+        # text2music_audio_code_string (14), repainting_start (15), repainting_end (16),
+        # instruction_display_gen (17), audio_cover_strength (18), task_type (19), ...
+        # ... lm_temperature (27), think_checkbox (28), ...
+        # ... instrumental_checkbox (at position after all regular params)
+        src_audio = args_list[13] if len(args_list) > 13 else None
+        task_type = args_list[19] if len(args_list) > 19 else "text2music"
+        # Validate: Cover and Repaint modes require source audio
+        if task_type in ["cover", "repaint"] and src_audio is None:
+            raise gr.Error(f"Source Audio is required for {task_type.capitalize()} mode. Please upload an audio file.")
+        # Handle Simple mode: first create sample, then generate
+        if generation_mode == "simple":
+            # Get instrumental from the main checkbox (args[-6] based on input order)
+            # The instrumental_checkbox is passed after all the regular generation params
+            instrumental = args_list[-6] if len(args_list) > 6 else False  # instrumental_checkbox position
+            lm_temperature = args_list[27] if len(args_list) > 27 else 0.85
+            lm_top_k = args_list[30] if len(args_list) > 30 else 0
+            lm_top_p = args_list[31] if len(args_list) > 31 else 0.9
+            constrained_decoding_debug = args_list[38] if len(args_list) > 38 else False
+            # Call create_sample to generate caption/lyrics/metadata
+            from acestep.inference import create_sample
+            top_k_value = None if not lm_top_k or lm_top_k == 0 else int(lm_top_k)
+            top_p_value = None if not lm_top_p or lm_top_p >= 1.0 else lm_top_p
+            result = create_sample(
+                llm_handler=llm_handler,
+                query=simple_query_input,
+                instrumental=instrumental,
+                vocal_language=simple_vocal_language,
+                temperature=lm_temperature,
+                top_k=top_k_value,
+                top_p=top_p_value,
+                use_constrained_decoding=True,
+                constrained_decoding_debug=constrained_decoding_debug,
+            )
+            if not result.success:
+                raise gr.Error(f"Failed to create sample: {result.status_message}")
+            # Update args with generated data
+            args_list[0] = result.caption  # captions
+            args_list[1] = result.lyrics  # lyrics
+            args_list[2] = result.bpm  # bpm
+            args_list[3] = result.keyscale  # key_scale
+            args_list[4] = result.timesignature  # time_signature
+            args_list[5] = result.language  # vocal_language
+            if result.duration and result.duration > 0:
+                args_list[11] = result.duration  # audio_duration
+            # Enable thinking for Simple mode
+            args_list[28] = True  # think_checkbox
+        # Determine which handler to use
+        active_handler = dit_handler  # Default to primary handler
+        if dit_handler_2 is not None and selected_model == config_path_2:
+            active_handler = dit_handler_2
+        yield from res_h.generate_with_batch_management(active_handler, llm_handler, *args_list)
     # ========== Generation Handler ==========
     generation_section["generate_btn"].click(
         fn=generation_wrapper,
         inputs=[
+            generation_section["dit_model_selector"],  # Model selection input
+            generation_section["generation_mode"],  # For Simple mode detection
+            generation_section["simple_query_input"],  # Simple mode query
+            generation_section["simple_vocal_language"],  # Simple mode vocal language
             generation_section["captions"],
             generation_section["lyrics"],
             generation_section["bpm"],
             results_section["restore_params_btn"],
         ]
     ).then(
+        fn=lambda selected_model, *args: res_h.generate_next_batch_background(
+            dit_handler_2 if (dit_handler_2 is not None and selected_model == config_path_2) else dit_handler,
+            llm_handler, *args
+        ),
         inputs=[
+            generation_section["dit_model_selector"],  # Model selection input
             generation_section["autogen_checkbox"],
             results_section["generation_params_state"],
             results_section["current_batch_index"],
             results_section["restore_params_btn"],
         ]
     ).then(
+        fn=lambda selected_model, *args: res_h.generate_next_batch_background(
+            dit_handler_2 if (dit_handler_2 is not None and selected_model == config_path_2) else dit_handler,
+            llm_handler, *args
+        ),
         inputs=[
+            generation_section["dit_model_selector"],  # Model selection input
             generation_section["autogen_checkbox"],
             results_section["generation_params_state"],
             results_section["current_batch_index"],

acestep/gradio_ui/events/generation_handlers.py CHANGED Viewed

@@ -480,10 +480,14 @@ def update_negative_prompt_visibility(init_llm_checked):
 def update_audio_cover_strength_visibility(task_type_value, init_llm_checked):
     """Update audio_cover_strength visibility and label"""
-    # Show if task is cover OR if LM is initialized
-    is_visible = (task_type_value == "cover") or init_llm_checked
     # Change label based on context
-    if init_llm_checked and task_type_value != "cover":
         label = "LM codes strength"
         info = "Control how many denoising steps use LM-generated codes"
     else:
@@ -518,10 +522,12 @@ def update_instruction_ui(
     track_name_visible = task_type_value in ["lego", "extract"]
     # Show complete_track_classes for complete
     complete_visible = task_type_value == "complete"
-    # Show audio_cover_strength for cover OR when LM is initialized
-    audio_cover_strength_visible = (task_type_value == "cover") or init_llm_checked
     # Determine label and info based on context
-    if init_llm_checked and task_type_value != "cover":
         audio_cover_strength_label = "LM codes strength"
         audio_cover_strength_info = "Control how many denoising steps use LM-generated codes"
     else:
@@ -605,9 +611,9 @@ def reset_format_caption_flag():
 def update_audio_uploads_accordion(reference_audio, src_audio):
-    """Update Audio Uploads accordion open state based on whether audio files are present"""
     has_audio = (reference_audio is not None) or (src_audio is not None)
-    return gr.Accordion(open=has_audio)
 def handle_instrumental_checkbox(instrumental_checked, current_lyrics):
@@ -682,41 +688,106 @@ def update_audio_components_visibility(batch_size):
 def handle_generation_mode_change(mode: str):
     """
-    Handle generation mode change between Simple and Custom modes.
-    In Simple mode:
-    - Show simple mode group (query input, instrumental checkbox, create button)
-    - Collapse caption and lyrics accordions
-    - Hide optional parameters accordion
-    - Disable generate button until sample is created
-    In Custom mode:
-    - Hide simple mode group
-    - Expand caption and lyrics accordions
-    - Show optional parameters accordion
-    - Enable generate button
     Args:
-        mode: "simple" or "custom"
     Returns:
         Tuple of updates for:
         - simple_mode_group (visibility)
-        - caption_accordion (open state)
-        - lyrics_accordion (open state)
         - generate_btn (interactive state)
         - simple_sample_created (reset state)
-        - optional_params_accordion (visibility)
     """
     is_simple = mode == "simple"
     return (
         gr.update(visible=is_simple),  # simple_mode_group
-        gr.Accordion(open=not is_simple),  # caption_accordion - collapsed in simple, open in custom
-        gr.Accordion(open=not is_simple),  # lyrics_accordion - collapsed in simple, open in custom
-        gr.update(interactive=not is_simple),  # generate_btn - disabled in simple until sample created
         False,  # simple_sample_created - reset to False on mode change
-        gr.Accordion(open=not is_simple),  # optional_params_accordion - hidden in simple mode
     )

 def update_audio_cover_strength_visibility(task_type_value, init_llm_checked):
     """Update audio_cover_strength visibility and label"""
+    # Show if task is cover OR if LM is initialized (but NOT for repaint mode)
+    # Repaint mode never shows this control
+    is_repaint = task_type_value == "repaint"
+    is_cover = task_type_value == "cover"
+    is_visible = is_cover or (init_llm_checked and not is_repaint)
     # Change label based on context
+    if init_llm_checked and not is_cover:
         label = "LM codes strength"
         info = "Control how many denoising steps use LM-generated codes"
     else:
     track_name_visible = task_type_value in ["lego", "extract"]
     # Show complete_track_classes for complete
     complete_visible = task_type_value == "complete"
+    # Show audio_cover_strength for cover OR when LM is initialized (but NOT for repaint)
+    is_repaint = task_type_value == "repaint"
+    is_cover = task_type_value == "cover"
+    audio_cover_strength_visible = is_cover or (init_llm_checked and not is_repaint)
     # Determine label and info based on context
+    if init_llm_checked and not is_cover:
         audio_cover_strength_label = "LM codes strength"
         audio_cover_strength_info = "Control how many denoising steps use LM-generated codes"
     else:
 def update_audio_uploads_accordion(reference_audio, src_audio):
+    """Update Audio Uploads visibility based on whether audio files are present"""
     has_audio = (reference_audio is not None) or (src_audio is not None)
+    return gr.update(visible=has_audio)
 def handle_instrumental_checkbox(instrumental_checked, current_lyrics):
 def handle_generation_mode_change(mode: str):
     """
+    Handle generation mode change between Simple, Custom, Cover, and Repaint modes.
+    Modes:
+    - Simple: Show simple mode group, hide others
+    - Custom: Show custom content (prompt), hide others
+    - Cover: Show src_audio_group + custom content + LM codes strength
+    - Repaint: Show src_audio_group + custom content + repaint time controls (hide LM codes strength)
     Args:
+        mode: "simple", "custom", "cover", or "repaint"
     Returns:
         Tuple of updates for:
         - simple_mode_group (visibility)
+        - custom_mode_content (visibility)
+        - cover_mode_group (visibility) - legacy, always hidden
+        - repainting_group (visibility)
+        - task_type (value)
         - generate_btn (interactive state)
         - simple_sample_created (reset state)
+        - src_audio_group (visibility) - shown for cover and repaint
+        - audio_cover_strength (visibility) - shown only for cover mode
     """
     is_simple = mode == "simple"
+    is_custom = mode == "custom"
+    is_cover = mode == "cover"
+    is_repaint = mode == "repaint"
+    # Map mode to task_type
+    task_type_map = {
+        "simple": "text2music",
+        "custom": "text2music",
+        "cover": "cover",
+        "repaint": "repaint",
+    }
+    task_type_value = task_type_map.get(mode, "text2music")
     return (
         gr.update(visible=is_simple),  # simple_mode_group
+        gr.update(visible=not is_simple),  # custom_mode_content - visible for custom/cover/repaint
+        gr.update(visible=False),  # cover_mode_group - legacy, always hidden
+        gr.update(visible=is_repaint),  # repainting_group - time range controls
+        gr.update(value=task_type_value),  # task_type
+        gr.update(interactive=True),  # generate_btn - always enabled (Simple mode does create+generate in one step)
         False,  # simple_sample_created - reset to False on mode change
+        gr.update(visible=is_cover or is_repaint),  # src_audio_group - shown for cover and repaint
+        gr.update(visible=is_cover),  # audio_cover_strength - only shown for cover mode
+    )
+def process_source_audio(dit_handler, llm_handler, src_audio, constrained_decoding_debug):
+    """
+    Process source audio: convert to codes and then transcribe.
+    This combines convert_src_audio_to_codes_wrapper + transcribe_audio_codes.
+    Args:
+        dit_handler: DiT handler instance for audio code conversion
+        llm_handler: LLM handler instance for transcription
+        src_audio: Path to source audio file
+        constrained_decoding_debug: Whether to enable debug logging
+    Returns:
+        Tuple of (audio_codes, status_message, caption, lyrics, bpm, duration, keyscale, language, timesignature, is_format_caption)
+    """
+    if src_audio is None:
+        return ("", "No audio file provided", "", "", None, None, "", "", "", False)
+    # Step 1: Convert audio to codes
+    try:
+        codes_string = dit_handler.convert_src_audio_to_codes(src_audio)
+        if not codes_string:
+            return ("", "Failed to convert audio to codes", "", "", None, None, "", "", "", False)
+    except Exception as e:
+        return ("", f"Error converting audio: {str(e)}", "", "", None, None, "", "", "", False)
+    # Step 2: Transcribe the codes
+    result = understand_music(
+        llm_handler=llm_handler,
+        audio_codes=codes_string,
+        use_constrained_decoding=True,
+        constrained_decoding_debug=constrained_decoding_debug,
+    )
+    # Handle error case
+    if not result.success:
+        if result.error == "LLM not initialized":
+            return (codes_string, t("messages.lm_not_initialized"), "", "", None, None, "", "", "", False)
+        return (codes_string, result.status_message, "", "", None, None, "", "", "", False)
+    return (
+        codes_string,
+        result.status_message,
+        result.caption,
+        result.lyrics,
+        result.bpm,
+        result.duration,
+        result.keyscale,
+        result.language,
+        result.timesignature,
+        True  # Set is_format_caption to True
     )

acestep/gradio_ui/events/results_handlers.py CHANGED Viewed

@@ -265,106 +265,45 @@ def _build_generation_info(
         Formatted generation info string
     """
     info_parts = []
-    # Part 1: Per-track average time (prominently displayed at the top)
-    # Only count model time (LM + DiT), not post-processing like audio conversion
-    if time_costs and num_audios > 0:
-        lm_total = time_costs.get('lm_total_time', 0.0)
-        dit_total = time_costs.get('dit_total_time_cost', 0.0)
-        model_total = lm_total + dit_total
-        if model_total > 0:
-            avg_time_per_track = model_total / num_audios
-            avg_section = f"**🎯 Average Time per Track: {avg_time_per_track:.2f}s** ({num_audios} track(s))"
-            info_parts.append(avg_section)
-    # Part 2: LM-generated metadata (if available)
-    if lm_metadata:
-        metadata_lines = []
-        if lm_metadata.get('bpm'):
-            metadata_lines.append(f"- **BPM:** {lm_metadata['bpm']}")
-        if lm_metadata.get('caption'):
-            metadata_lines.append(f"- **Refined Caption:** {lm_metadata['caption']}")
-        if lm_metadata.get('lyrics'):
-            metadata_lines.append(f"- **Refined Lyrics:** {lm_metadata['lyrics']}")
-        if lm_metadata.get('duration'):
-            metadata_lines.append(f"- **Duration:** {lm_metadata['duration']} seconds")
-        if lm_metadata.get('keyscale'):
-            metadata_lines.append(f"- **Key Scale:** {lm_metadata['keyscale']}")
-        if lm_metadata.get('language'):
-            metadata_lines.append(f"- **Language:** {lm_metadata['language']}")
-        if lm_metadata.get('timesignature'):
-            metadata_lines.append(f"- **Time Signature:** {lm_metadata['timesignature']}")
-        if metadata_lines:
-            metadata_section = "**🤖 LM-Generated Metadata:**\n" + "\n".join(metadata_lines)
-            info_parts.append(metadata_section)
-    # Part 3: Time costs breakdown (formatted and beautified)
     if time_costs:
-        time_lines = []
-        # LM time costs
-        lm_phase1 = time_costs.get('lm_phase1_time', 0.0)
-        lm_phase2 = time_costs.get('lm_phase2_time', 0.0)
         lm_total = time_costs.get('lm_total_time', 0.0)
-        if lm_total > 0:
-            time_lines.append("**🧠 LM Time:**")
-            if lm_phase1 > 0:
-                time_lines.append(f"  - Phase 1 (CoT): {lm_phase1:.2f}s")
-            if lm_phase2 > 0:
-                time_lines.append(f"  - Phase 2 (Codes): {lm_phase2:.2f}s")
-            time_lines.append(f"  - Total: {lm_total:.2f}s")
-        # DiT time costs
-        dit_encoder = time_costs.get('dit_encoder_time_cost', 0.0)
-        dit_model = time_costs.get('dit_model_time_cost', 0.0)
-        dit_vae_decode = time_costs.get('dit_vae_decode_time_cost', 0.0)
-        dit_offload = time_costs.get('dit_offload_time_cost', 0.0)
         dit_total = time_costs.get('dit_total_time_cost', 0.0)
-        if dit_total > 0:
-            time_lines.append("\n**🎵 DiT Time:**")
-            if dit_encoder > 0:
-                time_lines.append(f"  - Encoder: {dit_encoder:.2f}s")
-            if dit_model > 0:
-                time_lines.append(f"  - Model: {dit_model:.2f}s")
-            if dit_vae_decode > 0:
-                time_lines.append(f"  - VAE Decode: {dit_vae_decode:.2f}s")
-            if dit_offload > 0:
-                time_lines.append(f"  - Offload: {dit_offload:.2f}s")
-            time_lines.append(f"  - Total: {dit_total:.2f}s")
-        # Post-processing time costs
         audio_conversion_time = time_costs.get('audio_conversion_time', 0.0)
         auto_score_time = time_costs.get('auto_score_time', 0.0)
         auto_lrc_time = time_costs.get('auto_lrc_time', 0.0)
-        if audio_conversion_time > 0 or auto_score_time > 0 or auto_lrc_time > 0:
-            time_lines.append("\n**🔧 Post-processing Time:**")
             if audio_conversion_time > 0:
-                time_lines.append(f"  - Audio Conversion: {audio_conversion_time:.2f}s")
             if auto_score_time > 0:
-                time_lines.append(f"  - Auto Score: {auto_score_time:.2f}s")
             if auto_lrc_time > 0:
-                time_lines.append(f"  - Auto LRC: {auto_lrc_time:.2f}s")
-        if time_lines:
-            time_section = "\n".join(time_lines)
-            info_parts.append(time_section)
-    # Part 4: Generation summary
-    summary_lines = [
-        "**🎵 Generation Complete**",
-        f"  - **Seeds:** {seed_value}",
-        f"  - **Steps:** {inference_steps}",
-        f"  - **Audio Count:** {num_audios} audio(s)",
-    ]
-    info_parts.append("\n".join(summary_lines))
-    # Part 5: Pipeline total time (at the end)
-    pipeline_total = time_costs.get('pipeline_total_time', 0.0) if time_costs else 0.0
-    if pipeline_total > 0:
-        info_parts.append(f"**⏱️ Total Time: {pipeline_total:.2f}s**")
     # Combine all parts
     return "\n\n".join(info_parts)
@@ -775,7 +714,9 @@ def generate_with_progress(
             codes_display_updates[i] = gr.update(value=code_str, visible=True)  # Keep visible=True
             details_accordion_updates = [gr.skip() for _ in range(8)]
-            # Don't change accordion visibility - keep it always expandable
             # Clear LRC first (this triggers .change() to clear subtitles)
             # Keep visible=True to ensure .change() event is properly triggered

         Formatted generation info string
     """
     info_parts = []
+    songs_label = f"({num_audios} songs)"
+    # Part 1: Total generation time (LM + DiT)
     if time_costs:
         lm_total = time_costs.get('lm_total_time', 0.0)
         dit_total = time_costs.get('dit_total_time_cost', 0.0)
+        generation_total = lm_total + dit_total
+        if generation_total > 0:
+            avg_per_song = generation_total / num_audios if num_audios > 0 else 0
+            gen_lines = [
+                f"**🎵 Total generation time {songs_label}: {generation_total:.2f}s**",
+                f"**{avg_per_song:.2f}s per song**",
+            ]
+            if lm_total > 0:
+                gen_lines.append(f"- LM phase {songs_label}: {lm_total:.2f}s")
+            if dit_total > 0:
+                gen_lines.append(f"- DiT phase {songs_label}: {dit_total:.2f}s")
+            info_parts.append("\n".join(gen_lines))
+    # Part 2: Total processing time (post-processing)
+    if time_costs:
         audio_conversion_time = time_costs.get('audio_conversion_time', 0.0)
         auto_score_time = time_costs.get('auto_score_time', 0.0)
         auto_lrc_time = time_costs.get('auto_lrc_time', 0.0)
+        processing_total = audio_conversion_time + auto_score_time + auto_lrc_time
+        if processing_total > 0:
+            proc_lines = [
+                f"**🔧 Total processing time {songs_label}: {processing_total:.2f}s**",
+            ]
             if audio_conversion_time > 0:
+                info_format = time_costs.get('audio_format', 'mp3')
+                proc_lines.append(f"- to {info_format} {songs_label}: {audio_conversion_time:.2f}s")
             if auto_score_time > 0:
+                proc_lines.append(f"- scoring {songs_label}: {auto_score_time:.2f}s")
             if auto_lrc_time > 0:
+                proc_lines.append(f"- LRC detection {songs_label}: {auto_lrc_time:.2f}s")
+            info_parts.append("\n".join(proc_lines))
     # Combine all parts
     return "\n\n".join(info_parts)
             codes_display_updates[i] = gr.update(value=code_str, visible=True)  # Keep visible=True
             details_accordion_updates = [gr.skip() for _ in range(8)]
+            # Auto-expand accordion if auto_score or auto_lrc is enabled
+            if auto_score or auto_lrc:
+                details_accordion_updates[i] = gr.Accordion(open=True)
             # Clear LRC first (this triggers .change() to clear subtitles)
             # Keep visible=True to ensure .change() event is properly triggered

acestep/gradio_ui/i18n/en.json CHANGED Viewed

@@ -174,6 +174,8 @@
     "title": "🎵 Results",
     "generated_music": "🎵 Generated Music (Sample {n})",
     "send_to_src_btn": "🔗 Send To Src Audio",
     "save_btn": "💾 Save",
     "score_btn": "📊 Score",
     "lrc_btn": "🎵 LRC",

     "title": "🎵 Results",
     "generated_music": "🎵 Generated Music (Sample {n})",
     "send_to_src_btn": "🔗 Send To Src Audio",
+    "send_to_cover_btn": "🔗 Send To Cover",
+    "send_to_repaint_btn": "🔗 Send To Repaint",
     "save_btn": "💾 Save",
     "score_btn": "📊 Score",
     "lrc_btn": "🎵 LRC",

acestep/gradio_ui/i18n/ja.json CHANGED Viewed

@@ -174,6 +174,8 @@
     "title": "🎵 結果",
     "generated_music": "🎵 生成された音楽(サンプル {n})",
     "send_to_src_btn": "🔗 ソースオーディオに送信",
     "save_btn": "💾 保存",
     "score_btn": "📊 スコア",
     "lrc_btn": "🎵 LRC",

     "title": "🎵 結果",
     "generated_music": "🎵 生成された音楽(サンプル {n})",
     "send_to_src_btn": "🔗 ソースオーディオに送信",
+    "send_to_cover_btn": "🔗 Send To Cover",
+    "send_to_repaint_btn": "🔗 Send To Repaint",
     "save_btn": "💾 保存",
     "score_btn": "📊 スコア",
     "lrc_btn": "🎵 LRC",

acestep/gradio_ui/i18n/zh.json CHANGED Viewed

@@ -174,6 +174,8 @@
     "title": "🎵 结果",
     "generated_music": "🎵 生成的音乐(样本 {n})",
     "send_to_src_btn": "🔗 发送到源音频",
     "save_btn": "💾 保存",
     "score_btn": "📊 评分",
     "lrc_btn": "🎵 LRC",

     "title": "🎵 结果",
     "generated_music": "🎵 生成的音乐(样本 {n})",
     "send_to_src_btn": "🔗 发送到源音频",
+    "send_to_cover_btn": "🔗 Send To Cover",
+    "send_to_repaint_btn": "🔗 Send To Repaint",
     "save_btn": "💾 保存",
     "score_btn": "📊 评分",
     "lrc_btn": "🎵 LRC",

acestep/gradio_ui/interfaces/__init__.py CHANGED Viewed

@@ -65,6 +65,14 @@ def create_gradio_interface(dit_handler, llm_handler, dataset_handler, init_para
         <div class="main-header">
             <h1>{t("app.title")}</h1>
             <p>{t("app.subtitle")}</p>
         </div>
         """)
@@ -81,8 +89,8 @@ def create_gradio_interface(dit_handler, llm_handler, dataset_handler, init_para
         # Pass init_params to support hiding in service mode
         training_section = create_training_section(dit_handler, llm_handler, init_params=init_params)
-        # Connect event handlers
-        setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, dataset_section, generation_section, results_section)
         # Connect training event handlers
         setup_training_event_handlers(demo, dit_handler, llm_handler, training_section)

         <div class="main-header">
             <h1>{t("app.title")}</h1>
             <p>{t("app.subtitle")}</p>
+            <p style="margin-top: 0.5rem;">
+                <a href="https://ace-step-v1.5.github.io" target="_blank">Project</a> |
+                <a href="https://huggingface.co/collections/ACE-Step/ace-step-15" target="_blank">Hugging Face</a> |
+                <a href="https://modelscope.cn/models/ACE-Step/ACE-Step-v1-5" target="_blank">ModelScope</a> |
+                <a href="https://huggingface.co/spaces/ACE-Step/Ace-Step-v1.5" target="_blank">Space Demo</a> |
+                <a href="https://discord.gg/PeWDxrkdj7" target="_blank">Discord</a> |
+                <a href="https://arxiv.org/abs/2506.00045" target="_blank">Technical Report</a>
+            </p>
         </div>
         """)
         # Pass init_params to support hiding in service mode
         training_section = create_training_section(dit_handler, llm_handler, init_params=init_params)
+        # Connect event handlers (pass init_params for multi-model support)
+        setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, dataset_section, generation_section, results_section, init_params=init_params)
         # Connect training event handlers
         setup_training_event_handlers(demo, dit_handler, llm_handler, training_section)

acestep/gradio_ui/interfaces/generation.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """
 Gradio UI Generation Section Module
-Contains generation section component definitions
 """
 import gradio as gr
 from acestep.constants import (
@@ -14,7 +14,7 @@ from acestep.gradio_ui.i18n import t
 def create_generation_section(dit_handler, llm_handler, init_params=None, language='en') -> dict:
-    """Create generation section
     Args:
         dit_handler: DiT handler instance
@@ -32,10 +32,15 @@ def create_generation_section(dit_handler, llm_handler, init_params=None, langua
     # Get current language from init_params if available
     current_language = init_params.get('language', language) if init_params else language
     with gr.Group():
-        # Service Configuration - collapse if pre-initialized, hide if in service mode
         accordion_open = not service_pre_initialized
-        accordion_visible = not service_pre_initialized  # Hide when running in service mode
         with gr.Accordion(t("service.title"), open=accordion_open, visible=accordion_visible) as service_config_accordion:
             # Language selector at the top
             with gr.Row():
@@ -51,10 +56,8 @@ def create_generation_section(dit_handler, llm_handler, init_params=None, langua
                     scale=1,
                 )
-            # Dropdown options section - all dropdowns grouped together
             with gr.Row(equal_height=True):
                 with gr.Column(scale=4):
-                    # Set checkpoint value from init_params if pre-initialized
                     checkpoint_value = init_params.get('checkpoint') if service_pre_initialized else None
                     checkpoint_dropdown = gr.Dropdown(
                         label=t("service.checkpoint_label"),
@@ -66,11 +69,8 @@ def create_generation_section(dit_handler, llm_handler, init_params=None, langua
                     refresh_btn = gr.Button(t("service.refresh_btn"), size="sm")
             with gr.Row():
-                # Get available acestep-v15- model list
                 available_models = dit_handler.get_available_acestep_v15_models()
                 default_model = "acestep-v15-turbo" if "acestep-v15-turbo" in available_models else (available_models[0] if available_models else None)
-                # Set config_path value from init_params if pre-initialized
                 config_path_value = init_params.get('config_path', default_model) if service_pre_initialized else default_model
                 config_path = gr.Dropdown(
                     label=t("service.model_path_label"),
@@ -78,7 +78,6 @@ def create_generation_section(dit_handler, llm_handler, init_params=None, langua
                     value=config_path_value,
                     info=t("service.model_path_info")
                 )
-                # Set device value from init_params if pre-initialized
                 device_value = init_params.get('device', 'auto') if service_pre_initialized else 'auto'
                 device = gr.Dropdown(
                     choices=["auto", "cuda", "cpu"],
@@ -88,11 +87,8 @@ def create_generation_section(dit_handler, llm_handler, init_params=None, langua
                 )
             with gr.Row():
-                # Get available 5Hz LM model list
                 available_lm_models = llm_handler.get_available_5hz_lm_models()
                 default_lm_model = "acestep-5Hz-lm-0.6B" if "acestep-5Hz-lm-0.6B" in available_lm_models else (available_lm_models[0] if available_lm_models else None)
-                # Set lm_model_path value from init_params if pre-initialized
                 lm_model_path_value = init_params.get('lm_model_path', default_lm_model) if service_pre_initialized else default_lm_model
                 lm_model_path = gr.Dropdown(
                     label=t("service.lm_model_path_label"),
@@ -100,7 +96,6 @@ def create_generation_section(dit_handler, llm_handler, init_params=None, langua
                     value=lm_model_path_value,
                     info=t("service.lm_model_path_info")
                 )
-                # Set backend value from init_params if pre-initialized
                 backend_value = init_params.get('backend', 'vllm') if service_pre_initialized else 'vllm'
                 backend_dropdown = gr.Dropdown(
                     choices=["vllm", "pt"],
@@ -109,18 +104,14 @@ def create_generation_section(dit_handler, llm_handler, init_params=None, langua
                     info=t("service.backend_info")
                 )
-            # Checkbox options section - all checkboxes grouped together
             with gr.Row():
-                # Set init_llm value from init_params if pre-initialized
                 init_llm_value = init_params.get('init_llm', True) if service_pre_initialized else True
                 init_llm_checkbox = gr.Checkbox(
                     label=t("service.init_llm_label"),
                     value=init_llm_value,
                     info=t("service.init_llm_info"),
                 )
-                # Auto-detect flash attention availability
                 flash_attn_available = dit_handler.is_flash_attention_available()
-                # Set use_flash_attention value from init_params if pre-initialized
                 use_flash_attention_value = init_params.get('use_flash_attention', flash_attn_available) if service_pre_initialized else flash_attn_available
                 use_flash_attention_checkbox = gr.Checkbox(
                     label=t("service.flash_attention_label"),
@@ -128,14 +119,12 @@ def create_generation_section(dit_handler, llm_handler, init_params=None, langua
                     interactive=flash_attn_available,
                     info=t("service.flash_attention_info_enabled") if flash_attn_available else t("service.flash_attention_info_disabled")
                 )
-                # Set offload_to_cpu value from init_params if pre-initialized
                 offload_to_cpu_value = init_params.get('offload_to_cpu', False) if service_pre_initialized else False
                 offload_to_cpu_checkbox = gr.Checkbox(
                     label=t("service.offload_cpu_label"),
                     value=offload_to_cpu_value,
                     info=t("service.offload_cpu_info")
                 )
-                # Set offload_dit_to_cpu value from init_params if pre-initialized
                 offload_dit_to_cpu_value = init_params.get('offload_dit_to_cpu', False) if service_pre_initialized else False
                 offload_dit_to_cpu_checkbox = gr.Checkbox(
                     label=t("service.offload_dit_cpu_label"),
@@ -144,7 +133,6 @@ def create_generation_section(dit_handler, llm_handler, init_params=None, langua
                 )
             init_btn = gr.Button(t("service.init_btn"), variant="primary", size="lg")
-            # Set init_status value from init_params if pre-initialized
             init_status_value = init_params.get('init_status', '') if service_pre_initialized else ''
             init_status = gr.Textbox(label=t("service.status_label"), interactive=False, lines=3, value=init_status_value)
@@ -173,505 +161,436 @@ def create_generation_section(dit_handler, llm_handler, init_params=None, langua
                     scale=2,
                 )
-        # Inputs
         with gr.Row():
-            with gr.Column(scale=2):
-                with gr.Accordion(t("generation.required_inputs"), open=True):
-                    # Task type
-                    # Determine initial task_type choices based on actual model in use
-                    # When service is pre-initialized, use config_path from init_params
-                    actual_model = init_params.get('config_path', default_model) if service_pre_initialized else default_model
-                    actual_model_lower = (actual_model or "").lower()
-                    if "turbo" in actual_model_lower:
-                        initial_task_choices = TASK_TYPES_TURBO
-                    else:
-                        initial_task_choices = TASK_TYPES_BASE
-                    with gr.Row(equal_height=True):
-                        with gr.Column(scale=2):
-                            task_type = gr.Dropdown(
-                                choices=initial_task_choices,
-                                value="text2music",
-                                label=t("generation.task_type_label"),
-                                info=t("generation.task_type_info"),
-                            )
-                        with gr.Column(scale=7):
-                            instruction_display_gen = gr.Textbox(
-                                label=t("generation.instruction_label"),
-                                value=DEFAULT_DIT_INSTRUCTION,
-                                interactive=False,
-                                lines=1,
-                                info=t("generation.instruction_info"),
-                            )
-                        with gr.Column(scale=1, min_width=100):
-                            load_file = gr.UploadButton(
-                                t("generation.load_btn"),
-                                file_types=[".json"],
-                                file_count="single",
-                                variant="secondary",
-                                size="sm",
-                            )
-                    track_name = gr.Dropdown(
-                        choices=TRACK_NAMES,
-                        value=None,
-                        label=t("generation.track_name_label"),
-                        info=t("generation.track_name_info"),
-                        visible=False
                     )
-                    complete_track_classes = gr.CheckboxGroup(
-                        choices=TRACK_NAMES,
-                        label=t("generation.track_classes_label"),
-                        info=t("generation.track_classes_info"),
-                        visible=False
                     )
-                    # Audio uploads
-                    audio_uploads_accordion = gr.Accordion(t("generation.audio_uploads"), open=False)
-                    with audio_uploads_accordion:
-                        with gr.Row(equal_height=True):
-                            with gr.Column(scale=2):
-                                reference_audio = gr.Audio(
-                                    label=t("generation.reference_audio"),
-                                    type="filepath",
-                                )
-                            with gr.Column(scale=7):
-                                src_audio = gr.Audio(
-                                    label=t("generation.source_audio"),
-                                    type="filepath",
-                                )
-                            with gr.Column(scale=1, min_width=80):
-                                convert_src_to_codes_btn = gr.Button(
-                                    t("generation.convert_codes_btn"),
-                                    variant="secondary",
-                                    size="sm"
-                                )
-                    # Audio Codes for text2music - single input for transcription or cover task
-                    with gr.Accordion(t("generation.lm_codes_hints"), open=False, visible=True) as text2music_audio_codes_group:
-                        with gr.Row(equal_height=True):
-                            text2music_audio_code_string = gr.Textbox(
-                                label=t("generation.lm_codes_label"),
-                                placeholder=t("generation.lm_codes_placeholder"),
-                                lines=6,
-                                info=t("generation.lm_codes_info"),
-                                scale=9,
-                            )
-                            transcribe_btn = gr.Button(
-                                t("generation.transcribe_btn"),
-                                variant="secondary",
-                                size="sm",
-                                scale=1,
-                            )
-                    # Repainting controls
-                    with gr.Group(visible=False) as repainting_group:
-                        gr.HTML(f"<h5>{t('generation.repainting_controls')}</h5>")
-                        with gr.Row():
-                            repainting_start = gr.Number(
-                                label=t("generation.repainting_start"),
-                                value=0.0,
-                                step=0.1,
-                            )
-                            repainting_end = gr.Number(
-                                label=t("generation.repainting_end"),
-                                value=-1,
-                                minimum=-1,
-                                step=0.1,
-                            )
-                    # Simple/Custom Mode Toggle
-                    # In service mode: only Custom mode, hide the toggle
-                    with gr.Row(visible=not service_mode):
-                        generation_mode = gr.Radio(
-                            choices=[
-                                (t("generation.mode_simple"), "simple"),
-                                (t("generation.mode_custom"), "custom"),
-                            ],
-                            value="custom" if service_mode else "simple",
-                            label=t("generation.mode_label"),
-                            info=t("generation.mode_info"),
-                        )
-                    # Simple Mode Components - hidden in service mode
-                    with gr.Group(visible=not service_mode) as simple_mode_group:
-                        with gr.Row(equal_height=True):
-                            simple_query_input = gr.Textbox(
-                                label=t("generation.simple_query_label"),
-                                placeholder=t("generation.simple_query_placeholder"),
-                                lines=2,
-                                info=t("generation.simple_query_info"),
-                                scale=12,
-                            )
-                            with gr.Column(scale=1, min_width=100):
-                                random_desc_btn = gr.Button(
-                                    "🎲",
-                                    variant="secondary",
-                                    size="sm",
-                                    scale=2
-                                )
-                        with gr.Row(equal_height=True):
-                            with gr.Column(scale=1, variant="compact"):
-                                simple_instrumental_checkbox = gr.Checkbox(
-                                    label=t("generation.instrumental_label"),
-                                    value=False,
-                                )
-                            with gr.Column(scale=18):
-                                create_sample_btn = gr.Button(
-                                    t("generation.create_sample_btn"),
-                                    variant="primary",
-                                    size="lg",
-                                )
-                            with gr.Column(scale=1, variant="compact"):
-                                simple_vocal_language = gr.Dropdown(
-                                    choices=VALID_LANGUAGES,
-                                    value="unknown",
-                                    allow_custom_value=True,
-                                    label=t("generation.simple_vocal_language_label"),
-                                    interactive=True,
-                                )
-                    # State to track if sample has been created in Simple mode
-                    simple_sample_created = gr.State(value=False)
-                # Music Caption - wrapped in accordion that can be collapsed in Simple mode
-                # In service mode: auto-expand
-                with gr.Accordion(t("generation.caption_title"), open=service_mode) as caption_accordion:
                     with gr.Row(equal_height=True):
                         captions = gr.Textbox(
-                            label=t("generation.caption_label"),
-                            placeholder=t("generation.caption_placeholder"),
-                            lines=3,
-                            info=t("generation.caption_info"),
-                            scale=12,
-                        )
-                        with gr.Column(scale=1, min_width=100):
-                            sample_btn = gr.Button(
-                                "🎲",
-                                variant="secondary",
-                                size="sm",
-                                scale=2,
-                            )
-                # Lyrics - wrapped in accordion that can be collapsed in Simple mode
-                # In service mode: auto-expand
-                with gr.Accordion(t("generation.lyrics_title"), open=service_mode) as lyrics_accordion:
-                    lyrics = gr.Textbox(
-                        label=t("generation.lyrics_label"),
-                        placeholder=t("generation.lyrics_placeholder"),
-                        lines=8,
-                        info=t("generation.lyrics_info")
-                    )
-                    with gr.Row(variant="compact", equal_height=True):
-                        instrumental_checkbox = gr.Checkbox(
-                            label=t("generation.instrumental_label"),
-                            value=False,
                             scale=1,
-                            min_width=120,
-                            container=True,
                         )
-                        # 中间：语言选择 (Dropdown)
-                        # 移除 gr.HTML hack，直接使用 label 参数，Gradio 会自动处理对齐
-                        vocal_language = gr.Dropdown(
-                            choices=VALID_LANGUAGES,
-                            value="unknown",
-                            label=t("generation.vocal_language_label"),
-                            show_label=False,
-                            container=True,
-                            allow_custom_value=True,
-                            scale=3,
-                        )
-                        # 右侧：格式化按钮 (Button)
-                        # 放在同一行最右侧，操作更顺手
-                        format_btn = gr.Button(
-                            t("generation.format_btn"),
-                            variant="secondary",
                             scale=1,
-                            min_width=80,
                         )
-                # Optional Parameters
-                # In service mode: auto-expand
-                with gr.Accordion(t("generation.optional_params"), open=service_mode) as optional_params_accordion:
-                    with gr.Row():
-                        bpm = gr.Number(
-                            label=t("generation.bpm_label"),
-                            value=None,
-                            step=1,
-                            info=t("generation.bpm_info")
-                        )
-                        key_scale = gr.Textbox(
-                            label=t("generation.keyscale_label"),
-                            placeholder=t("generation.keyscale_placeholder"),
-                            value="",
-                            info=t("generation.keyscale_info")
-                        )
-                        time_signature = gr.Dropdown(
-                            choices=["2", "3", "4", "N/A", ""],
-                            value="",
-                            label=t("generation.timesig_label"),
-                            allow_custom_value=True,
-                            info=t("generation.timesig_info")
-                        )
-                        audio_duration = gr.Number(
-                            label=t("generation.duration_label"),
-                            value=-1,
-                            minimum=-1,
-                            maximum=600.0,
-                            step=0.1,
-                            info=t("generation.duration_info")
-                        )
-                        batch_size_input = gr.Number(
-                            label=t("generation.batch_size_label"),
-                            value=2,
-                            minimum=1,
-                            maximum=8,
-                            step=1,
-                            info=t("generation.batch_size_info"),
-                            interactive=not service_mode  # Fixed in service mode
-                        )
-        # Advanced Settings
-        # Default UI settings use turbo mode (max 20 steps, default 8, show shift with default 3)
-        # These will be updated after model initialization based on handler.is_turbo_model()
-        with gr.Accordion(t("generation.advanced_settings"), open=False):
             with gr.Row():
                 inference_steps = gr.Slider(
                     minimum=1,
                     maximum=20,
                     value=8,
                     step=1,
-                    label=t("generation.inference_steps_label"),
-                    info=t("generation.inference_steps_info")
                 )
-                guidance_scale = gr.Slider(
-                    minimum=1.0,
-                    maximum=15.0,
-                    value=7.0,
-                    step=0.1,
-                    label=t("generation.guidance_scale_label"),
-                    info=t("generation.guidance_scale_info"),
-                    visible=False
-                )
-                with gr.Column():
-                    seed = gr.Textbox(
-                        label=t("generation.seed_label"),
-                        value="-1",
-                        info=t("generation.seed_info")
-                    )
-                    random_seed_checkbox = gr.Checkbox(
-                        label=t("generation.random_seed_label"),
-                        value=True,
-                        info=t("generation.random_seed_info")
-                    )
                 audio_format = gr.Dropdown(
                     choices=["mp3", "flac"],
                     value="mp3",
-                    label=t("generation.audio_format_label"),
-                    info=t("generation.audio_format_info"),
-                    interactive=not service_mode  # Fixed in service mode
                 )
             with gr.Row():
-                use_adg = gr.Checkbox(
-                    label=t("generation.use_adg_label"),
-                    value=False,
-                    info=t("generation.use_adg_info"),
-                    visible=False
-                )
                 shift = gr.Slider(
                     minimum=1.0,
                     maximum=5.0,
                     value=3.0,
                     step=0.1,
-                    label=t("generation.shift_label"),
-                    info=t("generation.shift_info"),
-                    visible=True
                 )
                 infer_method = gr.Dropdown(
                     choices=["ode", "sde"],
                     value="ode",
-                    label=t("generation.infer_method_label"),
-                    info=t("generation.infer_method_info"),
                 )
-            with gr.Row():
-                custom_timesteps = gr.Textbox(
-                    label=t("generation.custom_timesteps_label"),
-                    placeholder="0.97,0.76,0.615,0.5,0.395,0.28,0.18,0.085,0",
-                    value="",
-                    info=t("generation.custom_timesteps_info"),
-                )
-            with gr.Row():
-                cfg_interval_start = gr.Slider(
-                    minimum=0.0,
-                    maximum=1.0,
-                    value=0.0,
-                    step=0.01,
-                    label=t("generation.cfg_interval_start"),
-                    visible=False
-                )
-                cfg_interval_end = gr.Slider(
-                    minimum=0.0,
-                    maximum=1.0,
-                    value=1.0,
-                    step=0.01,
-                    label=t("generation.cfg_interval_end"),
-                    visible=False
-                )
-            # LM (Language Model) Parameters
-            gr.HTML(f"<h4>{t('generation.lm_params_title')}</h4>")
             with gr.Row():
                 lm_temperature = gr.Slider(
-                    label=t("generation.lm_temperature_label"),
                     minimum=0.0,
                     maximum=2.0,
                     value=0.85,
-                    step=0.1,
-                    scale=1,
-                    info=t("generation.lm_temperature_info")
                 )
                 lm_cfg_scale = gr.Slider(
-                    label=t("generation.lm_cfg_scale_label"),
                     minimum=1.0,
                     maximum=3.0,
                     value=2.0,
                     step=0.1,
-                    scale=1,
-                    info=t("generation.lm_cfg_scale_info")
                 )
                 lm_top_k = gr.Slider(
-                    label=t("generation.lm_top_k_label"),
                     minimum=0,
                     maximum=100,
                     value=0,
                     step=1,
-                    scale=1,
-                    info=t("generation.lm_top_k_info")
                 )
                 lm_top_p = gr.Slider(
-                    label=t("generation.lm_top_p_label"),
                     minimum=0.0,
                     maximum=1.0,
                     value=0.9,
                     step=0.01,
-                    scale=1,
-                    info=t("generation.lm_top_p_info")
-                )
-            with gr.Row():
-                lm_negative_prompt = gr.Textbox(
-                    label=t("generation.lm_negative_prompt_label"),
-                    value="NO USER INPUT",
-                    placeholder=t("generation.lm_negative_prompt_placeholder"),
-                    info=t("generation.lm_negative_prompt_info"),
-                    lines=2,
-                    scale=2,
                 )
-            with gr.Row():
-                use_cot_metas = gr.Checkbox(
-                    label=t("generation.cot_metas_label"),
-                    value=True,
-                    info=t("generation.cot_metas_info"),
-                    scale=1,
-                )
-                use_cot_language = gr.Checkbox(
-                    label=t("generation.cot_language_label"),
                     value=True,
-                    info=t("generation.cot_language_info"),
-                    scale=1,
                 )
-                constrained_decoding_debug = gr.Checkbox(
-                    label=t("generation.constrained_debug_label"),
                     value=False,
-                    info=t("generation.constrained_debug_info"),
-                    scale=1,
-                    interactive=not service_mode  # Fixed in service mode
                 )
-            with gr.Row():
                 auto_score = gr.Checkbox(
-                    label=t("generation.auto_score_label"),
                     value=False,
-                    info=t("generation.auto_score_info"),
-                    scale=1,
-                    interactive=not service_mode  # Fixed in service mode
                 )
                 auto_lrc = gr.Checkbox(
-                    label=t("generation.auto_lrc_label"),
                     value=False,
-                    info=t("generation.auto_lrc_info"),
-                    scale=1,
-                    interactive=not service_mode  # Fixed in service mode
-                )
-                lm_batch_chunk_size = gr.Number(
-                    label=t("generation.lm_batch_chunk_label"),
-                    value=8,
-                    minimum=1,
-                    maximum=32,
-                    step=1,
-                    info=t("generation.lm_batch_chunk_info"),
-                    scale=1,
-                    interactive=not service_mode  # Fixed in service mode
-                )
-            with gr.Row():
-                audio_cover_strength = gr.Slider(
-                    minimum=0.0,
-                    maximum=1.0,
-                    value=1.0,
-                    step=0.01,
-                    label=t("generation.codes_strength_label"),
-                    info=t("generation.codes_strength_info"),
-                    scale=1,
-                )
-                score_scale = gr.Slider(
-                    minimum=0.01,
-                    maximum=1.0,
-                    value=0.5,
-                    step=0.01,
-                    label=t("generation.score_sensitivity_label"),
-                    info=t("generation.score_sensitivity_info"),
-                    scale=1,
-                    visible=not service_mode  # Hidden in service mode
                 )
-        # Set generate_btn to interactive if service is pre-initialized
-        generate_btn_interactive = init_params.get('enable_generate', False) if service_pre_initialized else False
-        with gr.Row(equal_height=True):
-            with gr.Column(scale=1, variant="compact"):
-                think_checkbox = gr.Checkbox(
-                    label=t("generation.think_label"),
-                    value=True,
-                    scale=1,
-                )
-                allow_lm_batch = gr.Checkbox(
-                    label=t("generation.parallel_thinking_label"),
-                    value=True,
-                    scale=1,
-                )
-            with gr.Column(scale=18):
-                generate_btn = gr.Button(t("generation.generate_btn"), variant="primary", size="lg", interactive=generate_btn_interactive)
-            with gr.Column(scale=1, variant="compact"):
-                autogen_checkbox = gr.Checkbox(
-                    label=t("generation.autogen_label"),
-                    value=False,  # Default to False for both service and local modes
-                    scale=1,
-                    interactive=not service_mode  # Not selectable in service mode
-                )
-                use_cot_caption = gr.Checkbox(
-                    label=t("generation.caption_rewrite_label"),
-                    value=True,
-                    scale=1,
-                )
     return {
         "service_config_accordion": service_config_accordion,
@@ -694,6 +613,8 @@ def create_generation_section(dit_handler, llm_handler, init_params=None, langua
         "unload_lora_btn": unload_lora_btn,
         "use_lora_checkbox": use_lora_checkbox,
         "lora_status": lora_status,
         "task_type": task_type,
         "instruction_display_gen": instruction_display_gen,
         "track_name": track_name,
@@ -717,7 +638,7 @@ def create_generation_section(dit_handler, llm_handler, init_params=None, langua
         "repainting_start": repainting_start,
         "repainting_end": repainting_end,
         "audio_cover_strength": audio_cover_strength,
-        # Simple/Custom Mode Components
         "generation_mode": generation_mode,
         "simple_mode_group": simple_mode_group,
         "simple_query_input": simple_query_input,
@@ -729,6 +650,13 @@ def create_generation_section(dit_handler, llm_handler, init_params=None, langua
         "caption_accordion": caption_accordion,
         "lyrics_accordion": lyrics_accordion,
         "optional_params_accordion": optional_params_accordion,
         # Existing components
         "captions": captions,
         "sample_btn": sample_btn,
@@ -763,4 +691,3 @@ def create_generation_section(dit_handler, llm_handler, init_params=None, langua
         "auto_lrc": auto_lrc,
         "lm_batch_chunk_size": lm_batch_chunk_size,
     }

 """
 Gradio UI Generation Section Module
+Contains generation section component definitions - Simplified UI
 """
 import gradio as gr
 from acestep.constants import (
 def create_generation_section(dit_handler, llm_handler, init_params=None, language='en') -> dict:
+    """Create generation section with simplified UI
     Args:
         dit_handler: DiT handler instance
     # Get current language from init_params if available
     current_language = init_params.get('language', language) if init_params else language
+    # Get available models
+    available_dit_models = init_params.get('available_dit_models', []) if init_params else []
+    current_model_value = init_params.get('config_path', '') if init_params else ''
+    show_model_selector = len(available_dit_models) > 1
     with gr.Group():
+        # ==================== Service Configuration (Hidden in service mode) ====================
         accordion_open = not service_pre_initialized
+        accordion_visible = not service_pre_initialized
         with gr.Accordion(t("service.title"), open=accordion_open, visible=accordion_visible) as service_config_accordion:
             # Language selector at the top
             with gr.Row():
                     scale=1,
                 )
             with gr.Row(equal_height=True):
                 with gr.Column(scale=4):
                     checkpoint_value = init_params.get('checkpoint') if service_pre_initialized else None
                     checkpoint_dropdown = gr.Dropdown(
                         label=t("service.checkpoint_label"),
                     refresh_btn = gr.Button(t("service.refresh_btn"), size="sm")
             with gr.Row():
                 available_models = dit_handler.get_available_acestep_v15_models()
                 default_model = "acestep-v15-turbo" if "acestep-v15-turbo" in available_models else (available_models[0] if available_models else None)
                 config_path_value = init_params.get('config_path', default_model) if service_pre_initialized else default_model
                 config_path = gr.Dropdown(
                     label=t("service.model_path_label"),
                     value=config_path_value,
                     info=t("service.model_path_info")
                 )
                 device_value = init_params.get('device', 'auto') if service_pre_initialized else 'auto'
                 device = gr.Dropdown(
                     choices=["auto", "cuda", "cpu"],
                 )
             with gr.Row():
                 available_lm_models = llm_handler.get_available_5hz_lm_models()
                 default_lm_model = "acestep-5Hz-lm-0.6B" if "acestep-5Hz-lm-0.6B" in available_lm_models else (available_lm_models[0] if available_lm_models else None)
                 lm_model_path_value = init_params.get('lm_model_path', default_lm_model) if service_pre_initialized else default_lm_model
                 lm_model_path = gr.Dropdown(
                     label=t("service.lm_model_path_label"),
                     value=lm_model_path_value,
                     info=t("service.lm_model_path_info")
                 )
                 backend_value = init_params.get('backend', 'vllm') if service_pre_initialized else 'vllm'
                 backend_dropdown = gr.Dropdown(
                     choices=["vllm", "pt"],
                     info=t("service.backend_info")
                 )
             with gr.Row():
                 init_llm_value = init_params.get('init_llm', True) if service_pre_initialized else True
                 init_llm_checkbox = gr.Checkbox(
                     label=t("service.init_llm_label"),
                     value=init_llm_value,
                     info=t("service.init_llm_info"),
                 )
                 flash_attn_available = dit_handler.is_flash_attention_available()
                 use_flash_attention_value = init_params.get('use_flash_attention', flash_attn_available) if service_pre_initialized else flash_attn_available
                 use_flash_attention_checkbox = gr.Checkbox(
                     label=t("service.flash_attention_label"),
                     interactive=flash_attn_available,
                     info=t("service.flash_attention_info_enabled") if flash_attn_available else t("service.flash_attention_info_disabled")
                 )
                 offload_to_cpu_value = init_params.get('offload_to_cpu', False) if service_pre_initialized else False
                 offload_to_cpu_checkbox = gr.Checkbox(
                     label=t("service.offload_cpu_label"),
                     value=offload_to_cpu_value,
                     info=t("service.offload_cpu_info")
                 )
                 offload_dit_to_cpu_value = init_params.get('offload_dit_to_cpu', False) if service_pre_initialized else False
                 offload_dit_to_cpu_checkbox = gr.Checkbox(
                     label=t("service.offload_dit_cpu_label"),
                 )
             init_btn = gr.Button(t("service.init_btn"), variant="primary", size="lg")
             init_status_value = init_params.get('init_status', '') if service_pre_initialized else ''
             init_status = gr.Textbox(label=t("service.status_label"), interactive=False, lines=3, value=init_status_value)
                     scale=2,
                 )
+        # ==================== Model Selector (Top, only when multiple models) ====================
+        with gr.Row(visible=show_model_selector):
+            dit_model_selector = gr.Dropdown(
+                choices=available_dit_models,
+                value=current_model_value,
+                label="models",
+                scale=1,
+            )
+        # Hidden dropdown when only one model (for event handler compatibility)
+        if not show_model_selector:
+            dit_model_selector = gr.Dropdown(
+                choices=available_dit_models if available_dit_models else [current_model_value],
+                value=current_model_value,
+                visible=False,
+            )
+        # ==================== Generation Mode (4 modes) ====================
+        gr.HTML("<div style='background: #4a5568; color: white; padding: 8px 16px; border-radius: 4px; font-weight: bold;'>Generation Mode</div>")
         with gr.Row():
+            generation_mode = gr.Radio(
+                choices=[
+                    ("Simple", "simple"),
+                    ("Custom", "custom"),
+                    ("Cover", "cover"),
+                    ("Repaint", "repaint"),
+                ],
+                value="custom",
+                label="",
+                show_label=False,
+            )
+        # ==================== Simple Mode Group ====================
+        with gr.Column(visible=False) as simple_mode_group:
+            # Row: Song Description + Vocal Language + Random button
+            with gr.Row(equal_height=True):
+                simple_query_input = gr.Textbox(
+                    label=t("generation.simple_query_label"),
+                    placeholder=t("generation.simple_query_placeholder"),
+                    lines=2,
+                    info=t("generation.simple_query_info"),
+                    scale=10,
+                )
+                simple_vocal_language = gr.Dropdown(
+                    choices=VALID_LANGUAGES,
+                    value="unknown",
+                    allow_custom_value=True,
+                    label=t("generation.simple_vocal_language_label"),
+                    interactive=True,
+                    info="use unknown for instrumental",
+                    scale=2,
+                )
+                with gr.Column(scale=1, min_width=60):
+                    random_desc_btn = gr.Button(
+                        "🎲",
+                        variant="secondary",
+                        size="lg",
                     )
+            # Hidden components (kept for compatibility but not shown)
+            simple_instrumental_checkbox = gr.Checkbox(
+                label=t("generation.instrumental_label"),
+                value=False,
+                visible=False,
+            )
+            create_sample_btn = gr.Button(
+                t("generation.create_sample_btn"),
+                variant="primary",
+                size="lg",
+                visible=False,
+            )
+        # State to track if sample has been created in Simple mode
+        simple_sample_created = gr.State(value=False)
+        # ==================== Source Audio (for Cover/Repaint) ====================
+        # This is shown above the main content for Cover and Repaint modes
+        with gr.Column(visible=False) as src_audio_group:
+            with gr.Row(equal_height=True):
+                # Source Audio - scale=10 to match (refer_audio=2 + prompt/lyrics=8)
+                src_audio = gr.Audio(
+                    label="Source Audio",
+                    type="filepath",
+                    scale=10,
+                )
+                # Process button - scale=1 to align with random button
+                with gr.Column(scale=1, min_width=80):
+                    process_src_btn = gr.Button(
+                        "Analyze",
+                        variant="secondary",
+                        size="lg",
+                    )
+        # Hidden Audio Codes storage (needed internally but not displayed)
+        text2music_audio_code_string = gr.Textbox(
+            label="Audio Codes",
+            visible=False,
+        )
+        # ==================== Custom/Cover/Repaint Mode Content ====================
+        with gr.Column() as custom_mode_content:
+            with gr.Row(equal_height=True):
+                # Left: Reference Audio
+                with gr.Column(scale=2, min_width=200):
+                    reference_audio = gr.Audio(
+                        label="Reference Audio (optional)",
+                        type="filepath",
+                        show_label=True,
                     )
+                # Middle: Prompt + Lyrics + Format button
+                with gr.Column(scale=8):
+                    # Row 1: Prompt and Lyrics
                     with gr.Row(equal_height=True):
                         captions = gr.Textbox(
+                            label="Prompt",
+                            placeholder="Describe the music style, mood, instruments...",
+                            lines=12,
+                            max_lines=12,
                             scale=1,
                         )
+                        lyrics = gr.Textbox(
+                            label="Lyrics",
+                            placeholder="Enter lyrics here... Use [Verse], [Chorus] etc. for structure",
+                            lines=12,
+                            max_lines=12,
                             scale=1,
                         )
+                    # Row 2: Format button (only below Prompt and Lyrics)
+                    format_btn = gr.Button(
+                        "Format",
+                        variant="secondary",
+                    )
+                # Right: Random button
+                with gr.Column(scale=1, min_width=60):
+                    sample_btn = gr.Button(
+                        "🎲",
+                        variant="secondary",
+                        size="lg",
+                    )
+        # Placeholder for removed audio_uploads_accordion (for compatibility)
+        audio_uploads_accordion = gr.Column(visible=False)
+        # Legacy cover_mode_group (hidden, for backward compatibility)
+        cover_mode_group = gr.Column(visible=False)
+        # Legacy convert button (hidden, for backward compatibility)
+        convert_src_to_codes_btn = gr.Button("Convert to Codes", visible=False)
+        # ==================== Repaint Mode: Source + Time Range ====================
+        with gr.Column(visible=False) as repainting_group:
+            with gr.Row():
+                repainting_start = gr.Number(
+                    label="Start (seconds)",
+                    value=0.0,
+                    step=0.1,
+                    scale=1,
+                )
+                repainting_end = gr.Number(
+                    label="End (seconds, -1 for end)",
+                    value=-1,
+                    minimum=-1,
+                    step=0.1,
+                    scale=1,
+                )
+        # ==================== Optional Parameters ====================
+        with gr.Accordion("⚙️ Optional Parameters", open=False, visible=False) as optional_params_accordion:
+            pass
+        # ==================== Advanced Settings ====================
+        with gr.Accordion("🔧 Advanced Settings", open=False) as advanced_options_accordion:
+            with gr.Row():
+                bpm = gr.Number(
+                    label="BPM (optional)",
+                    value=0,
+                    step=1,
+                    info="leave empty for N/A",
+                    scale=1,
+                )
+                key_scale = gr.Textbox(
+                    label="Key Signature (optional)",
+                    placeholder="Leave empty for N/A",
+                    value="",
+                    info="A-G, #/♭, major/minor",
+                    scale=1,
+                )
+                time_signature = gr.Dropdown(
+                    choices=["", "2", "3", "4"],
+                    value="",
+                    label="Time Signature (optional)",
+                    allow_custom_value=True,
+                    info="2/4, 3/4, 4/4...",
+                    scale=1,
+                )
+                audio_duration = gr.Number(
+                    label="Audio Duration (seconds)",
+                    value=-1,
+                    minimum=-1,
+                    maximum=600.0,
+                    step=1,
+                    info="Use -1 for random",
+                    scale=1,
+                )
+                vocal_language = gr.Dropdown(
+                    choices=VALID_LANGUAGES,
+                    value="unknown",
+                    label="Vocal Language",
+                    allow_custom_value=True,
+                    info="use `unknown` for instrumental",
+                    scale=1,
+                )
+                batch_size_input = gr.Number(
+                    label="batch size",
+                    info="max 8",
+                    value=2,
+                    minimum=1,
+                    maximum=8,
+                    step=1,
+                    scale=1,
+                )
+            # Row 1: DiT Inference Steps, Seed, Audio Format
             with gr.Row():
                 inference_steps = gr.Slider(
                     minimum=1,
                     maximum=20,
                     value=8,
                     step=1,
+                    label="DiT Inference Steps",
+                    info="Turbo: max 8, Base: max 200",
+                )
+                seed = gr.Textbox(
+                    label="Seed",
+                    value="-1",
+                    info="Use comma-separated values for batches",
                 )
                 audio_format = gr.Dropdown(
                     choices=["mp3", "flac"],
                     value="mp3",
+                    label="Audio Format",
+                    info="Audio format for saved files",
                 )
+            # Row 2: Shift, Random Seed, Inference Method
             with gr.Row():
                 shift = gr.Slider(
                     minimum=1.0,
                     maximum=5.0,
                     value=3.0,
                     step=0.1,
+                    label="Shift",
+                    info="Timestep shift factor for base models (range 1.0-5.0, default 3.0). Not effective for turbo models.",
+                )
+                random_seed_checkbox = gr.Checkbox(
+                    label="Random Seed",
+                    value=True,
+                    info="Enable to auto-generate seeds",
                 )
                 infer_method = gr.Dropdown(
                     choices=["ode", "sde"],
                     value="ode",
+                    label="Inference Method",
+                    info="Diffusion inference method. ODE (Euler) is faster, SDE (stochastic) may produce different results.",
                 )
+            # Row 3: Custom Timesteps (full width)
+            custom_timesteps = gr.Textbox(
+                label="Custom Timesteps",
+                placeholder="0.97,0.76,0.615,0.5,0.395,0.28,0.18,0.085,0",
+                value="",
+                info="Optional: comma-separated values from 1.0 to 0.0 (e.g., '0.97,0.76,0.615,0.5,0.395,0.28,0.18,0.085,0'). Overrides inference steps and shift.",
+            )
+            # Section: LM Generation Parameters
+            gr.HTML("<h4>🎵 LM Generation Parameters</h4>")
+            # Row 4: LM Temperature, LM CFG Scale, LM Top-K, LM Top-P
             with gr.Row():
                 lm_temperature = gr.Slider(
                     minimum=0.0,
                     maximum=2.0,
                     value=0.85,
+                    step=0.05,
+                    label="LM Temperature",
+                    info="5Hz LM temperature (higher = more random)",
                 )
                 lm_cfg_scale = gr.Slider(
                     minimum=1.0,
                     maximum=3.0,
                     value=2.0,
                     step=0.1,
+                    label="LM CFG Scale",
+                    info="5Hz LM CFG (1.0 = no CFG)",
                 )
                 lm_top_k = gr.Slider(
                     minimum=0,
                     maximum=100,
                     value=0,
                     step=1,
+                    label="LM Top-K",
+                    info="Top-k (0 = disabled)",
                 )
                 lm_top_p = gr.Slider(
                     minimum=0.0,
                     maximum=1.0,
                     value=0.9,
                     step=0.01,
+                    label="LM Top-P",
+                    info="Top-p (1.0 = disabled)",
                 )
+            # Row 5: LM Negative Prompt (full width)
+            lm_negative_prompt = gr.Textbox(
+                label="LM Negative Prompt",
+                value="NO USER INPUT",
+                placeholder="Things to avoid in generation...",
+                lines=2,
+                info="Negative prompt (use when LM CFG Scale > 1.0)",
+            )
+            # audio_cover_strength remains hidden for now
+            audio_cover_strength = gr.Slider(minimum=0.0, maximum=1.0, value=1.0, visible=False)
+        # Note: audio_duration, bpm, key_scale, time_signature are now visible in Optional Parameters
+        # ==================== Generate Button Row ====================
+        generate_btn_interactive = init_params.get('enable_generate', False) if service_pre_initialized else False
+        with gr.Row(equal_height=True):
+            # Left: Thinking and Instrumental checkboxes
+            with gr.Column(scale=1, min_width=120):
+                think_checkbox = gr.Checkbox(
+                    label="Thinking",
                     value=True,
                 )
+                instrumental_checkbox = gr.Checkbox(
+                    label="Instrumental",
                     value=False,
                 )
+            # Center: Generate button
+            with gr.Column(scale=4):
+                generate_btn = gr.Button(
+                    "🎵 Generate Music",
+                    variant="primary",
+                    size="lg",
+                    interactive=generate_btn_interactive,
+                )
+            # Right: auto_score, auto_lrc
+            with gr.Column(scale=1, min_width=120):
                 auto_score = gr.Checkbox(
+                    label="Get Scores",
                     value=False,
                 )
                 auto_lrc = gr.Checkbox(
+                    label="Get LRC",
                     value=False,
                 )
+        # ==================== Hidden Components (for internal use) ====================
+        # These are needed for event handlers but not shown in UI
+        # Task type (set automatically based on generation_mode)
+        actual_model = init_params.get('config_path', 'acestep-v15-turbo') if service_pre_initialized else 'acestep-v15-turbo'
+        actual_model_lower = (actual_model or "").lower()
+        if "turbo" in actual_model_lower:
+            initial_task_choices = TASK_TYPES_TURBO
+        else:
+            initial_task_choices = TASK_TYPES_BASE
+        task_type = gr.Dropdown(
+            choices=initial_task_choices,
+            value="text2music",
+            visible=False,
+        )
+        instruction_display_gen = gr.Textbox(
+            value=DEFAULT_DIT_INSTRUCTION,
+            visible=False,
+        )
+        track_name = gr.Dropdown(
+            choices=TRACK_NAMES,
+            value=None,
+            visible=False,
+        )
+        complete_track_classes = gr.CheckboxGroup(
+            choices=TRACK_NAMES,
+            visible=False,
+        )
+        # Note: lyrics, vocal_language, instrumental_checkbox, format_btn are now visible in custom_mode_content
+        # Hidden advanced settings (keep defaults)
+        # Note: Most parameters are now visible in Advanced Settings section above
+        guidance_scale = gr.Slider(value=7.0, visible=False)
+        use_adg = gr.Checkbox(value=False, visible=False)
+        cfg_interval_start = gr.Slider(value=0.0, visible=False)
+        cfg_interval_end = gr.Slider(value=1.0, visible=False)
+        # LM parameters (remaining hidden ones)
+        use_cot_metas = gr.Checkbox(value=True, visible=False)
+        use_cot_caption = gr.Checkbox(value=True, visible=False)
+        use_cot_language = gr.Checkbox(value=True, visible=False)
+        constrained_decoding_debug = gr.Checkbox(value=False, visible=False)
+        allow_lm_batch = gr.Checkbox(value=True, visible=False)
+        lm_batch_chunk_size = gr.Number(value=8, visible=False)
+        score_scale = gr.Slider(minimum=0.01, maximum=1.0, value=0.5, visible=False)
+        autogen_checkbox = gr.Checkbox(value=False, visible=False)
+        # Transcribe button (hidden)
+        transcribe_btn = gr.Button(value="Transcribe", visible=False)
+        text2music_audio_codes_group = gr.Group(visible=False)
+        # Note: format_btn is now visible in custom_mode_content
+        # Load file button (hidden for now)
+        load_file = gr.UploadButton(
+            label="Load",
+            file_types=[".json"],
+            file_count="single",
+            visible=False,
+        )
+        # Caption/Lyrics accordions (not used in new UI but needed for compatibility)
+        caption_accordion = gr.Accordion("Caption", visible=False)
+        lyrics_accordion = gr.Accordion("Lyrics", visible=False)
+        # Note: optional_params_accordion is now visible above
     return {
         "service_config_accordion": service_config_accordion,
         "unload_lora_btn": unload_lora_btn,
         "use_lora_checkbox": use_lora_checkbox,
         "lora_status": lora_status,
+        # DiT model selector
+        "dit_model_selector": dit_model_selector,
         "task_type": task_type,
         "instruction_display_gen": instruction_display_gen,
         "track_name": track_name,
         "repainting_start": repainting_start,
         "repainting_end": repainting_end,
         "audio_cover_strength": audio_cover_strength,
+        # Generation mode components
         "generation_mode": generation_mode,
         "simple_mode_group": simple_mode_group,
         "simple_query_input": simple_query_input,
         "caption_accordion": caption_accordion,
         "lyrics_accordion": lyrics_accordion,
         "optional_params_accordion": optional_params_accordion,
+        # Custom mode components
+        "custom_mode_content": custom_mode_content,
+        "cover_mode_group": cover_mode_group,
+        # Source audio group for Cover/Repaint
+        "src_audio_group": src_audio_group,
+        "process_src_btn": process_src_btn,
+        "advanced_options_accordion": advanced_options_accordion,
         # Existing components
         "captions": captions,
         "sample_btn": sample_btn,
         "auto_lrc": auto_lrc,
         "lm_batch_chunk_size": lm_batch_chunk_size,
     }

acestep/gradio_ui/interfaces/result.py CHANGED Viewed

@@ -32,8 +32,14 @@ def create_results_section(dit_handler) -> dict:
                     buttons=[]
                 )
                 with gr.Row(equal_height=True):
-                    send_to_src_btn_1 = gr.Button(
-                        t("results.send_to_src_btn"),
                         variant="secondary",
                         size="sm",
                         scale=1
@@ -48,23 +54,17 @@ def create_results_section(dit_handler) -> dict:
                         t("results.score_btn"),
                         variant="secondary",
                         size="sm",
-                        scale=1
                     )
                     lrc_btn_1 = gr.Button(
                         t("results.lrc_btn"),
                         variant="secondary",
                         size="sm",
-                        scale=1
                     )
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_1:
-                    codes_display_1 = gr.Textbox(
-                        label=t("results.codes_label", n=1),
-                        interactive=False,
-                        buttons=["copy"],
-                        lines=4,
-                        max_lines=4,
-                        visible=True
-                    )
                     score_display_1 = gr.Textbox(
                         label=t("results.quality_score_label", n=1),
                         interactive=False,
@@ -81,6 +81,14 @@ def create_results_section(dit_handler) -> dict:
                         max_lines=8,
                         visible=True
                     )
             with gr.Column(visible=True) as audio_col_2:
                 generated_audio_2 = gr.Audio(
                     label=t("results.generated_music", n=2),
@@ -89,8 +97,14 @@ def create_results_section(dit_handler) -> dict:
                     buttons=[]
                 )
                 with gr.Row(equal_height=True):
-                    send_to_src_btn_2 = gr.Button(
-                        t("results.send_to_src_btn"),
                         variant="secondary",
                         size="sm",
                         scale=1
@@ -105,23 +119,17 @@ def create_results_section(dit_handler) -> dict:
                         t("results.score_btn"),
                         variant="secondary",
                         size="sm",
-                        scale=1
                     )
                     lrc_btn_2 = gr.Button(
                         t("results.lrc_btn"),
                         variant="secondary",
                         size="sm",
-                        scale=1
                     )
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_2:
-                    codes_display_2 = gr.Textbox(
-                        label=t("results.codes_label", n=2),
-                        interactive=False,
-                        buttons=["copy"],
-                        lines=4,
-                        max_lines=4,
-                        visible=True
-                    )
                     score_display_2 = gr.Textbox(
                         label=t("results.quality_score_label", n=2),
                         interactive=False,
@@ -138,6 +146,14 @@ def create_results_section(dit_handler) -> dict:
                         max_lines=8,
                         visible=True
                     )
             with gr.Column(visible=False) as audio_col_3:
                 generated_audio_3 = gr.Audio(
                     label=t("results.generated_music", n=3),
@@ -146,8 +162,14 @@ def create_results_section(dit_handler) -> dict:
                     buttons=[]
                 )
                 with gr.Row(equal_height=True):
-                    send_to_src_btn_3 = gr.Button(
-                        t("results.send_to_src_btn"),
                         variant="secondary",
                         size="sm",
                         scale=1
@@ -162,23 +184,17 @@ def create_results_section(dit_handler) -> dict:
                         t("results.score_btn"),
                         variant="secondary",
                         size="sm",
-                        scale=1
                     )
                     lrc_btn_3 = gr.Button(
                         t("results.lrc_btn"),
                         variant="secondary",
                         size="sm",
-                        scale=1
                     )
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_3:
-                    codes_display_3 = gr.Textbox(
-                        label=t("results.codes_label", n=3),
-                        interactive=False,
-                        buttons=["copy"],
-                        lines=4,
-                        max_lines=4,
-                        visible=True
-                    )
                     score_display_3 = gr.Textbox(
                         label=t("results.quality_score_label", n=3),
                         interactive=False,
@@ -195,6 +211,14 @@ def create_results_section(dit_handler) -> dict:
                         max_lines=8,
                         visible=True
                     )
             with gr.Column(visible=False) as audio_col_4:
                 generated_audio_4 = gr.Audio(
                     label=t("results.generated_music", n=4),
@@ -203,8 +227,14 @@ def create_results_section(dit_handler) -> dict:
                     buttons=[]
                 )
                 with gr.Row(equal_height=True):
-                    send_to_src_btn_4 = gr.Button(
-                        t("results.send_to_src_btn"),
                         variant="secondary",
                         size="sm",
                         scale=1
@@ -219,23 +249,17 @@ def create_results_section(dit_handler) -> dict:
                         t("results.score_btn"),
                         variant="secondary",
                         size="sm",
-                        scale=1
                     )
                     lrc_btn_4 = gr.Button(
                         t("results.lrc_btn"),
                         variant="secondary",
                         size="sm",
-                        scale=1
                     )
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_4:
-                    codes_display_4 = gr.Textbox(
-                        label=t("results.codes_label", n=4),
-                        interactive=False,
-                        buttons=["copy"],
-                        lines=4,
-                        max_lines=4,
-                        visible=True
-                    )
                     score_display_4 = gr.Textbox(
                         label=t("results.quality_score_label", n=4),
                         interactive=False,
@@ -252,6 +276,14 @@ def create_results_section(dit_handler) -> dict:
                         max_lines=8,
                         visible=True
                     )
         # Second row for batch size 5-8 (initially hidden)
         with gr.Row(visible=False) as audio_row_5_8:
@@ -263,19 +295,12 @@ def create_results_section(dit_handler) -> dict:
                     buttons=[]
                 )
                 with gr.Row(equal_height=True):
-                    send_to_src_btn_5 = gr.Button(t("results.send_to_src_btn"), variant="secondary", size="sm", scale=1)
                     save_btn_5 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
-                    score_btn_5 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
-                    lrc_btn_5 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_5:
-                    codes_display_5 = gr.Textbox(
-                        label=t("results.codes_label", n=5),
-                        interactive=False,
-                        buttons=["copy"],
-                        lines=4,
-                        max_lines=4,
-                        visible=True
-                    )
                     score_display_5 = gr.Textbox(
                         label=t("results.quality_score_label", n=5),
                         interactive=False,
@@ -292,6 +317,14 @@ def create_results_section(dit_handler) -> dict:
                         max_lines=8,
                         visible=True
                     )
             with gr.Column() as audio_col_6:
                 generated_audio_6 = gr.Audio(
                     label=t("results.generated_music", n=6),
@@ -300,19 +333,12 @@ def create_results_section(dit_handler) -> dict:
                     buttons=[]
                 )
                 with gr.Row(equal_height=True):
-                    send_to_src_btn_6 = gr.Button(t("results.send_to_src_btn"), variant="secondary", size="sm", scale=1)
                     save_btn_6 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
-                    score_btn_6 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
-                    lrc_btn_6 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_6:
-                    codes_display_6 = gr.Textbox(
-                        label=t("results.codes_label", n=6),
-                        interactive=False,
-                        buttons=["copy"],
-                        lines=4,
-                        max_lines=4,
-                        visible=True
-                    )
                     score_display_6 = gr.Textbox(
                         label=t("results.quality_score_label", n=6),
                         interactive=False,
@@ -329,6 +355,14 @@ def create_results_section(dit_handler) -> dict:
                         max_lines=8,
                         visible=True
                     )
             with gr.Column() as audio_col_7:
                 generated_audio_7 = gr.Audio(
                     label=t("results.generated_music", n=7),
@@ -337,19 +371,12 @@ def create_results_section(dit_handler) -> dict:
                     buttons=[]
                 )
                 with gr.Row(equal_height=True):
-                    send_to_src_btn_7 = gr.Button(t("results.send_to_src_btn"), variant="secondary", size="sm", scale=1)
                     save_btn_7 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
-                    score_btn_7 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
-                    lrc_btn_7 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_7:
-                    codes_display_7 = gr.Textbox(
-                        label=t("results.codes_label", n=7),
-                        interactive=False,
-                        buttons=["copy"],
-                        lines=4,
-                        max_lines=4,
-                        visible=True
-                    )
                     score_display_7 = gr.Textbox(
                         label=t("results.quality_score_label", n=7),
                         interactive=False,
@@ -366,6 +393,14 @@ def create_results_section(dit_handler) -> dict:
                         max_lines=8,
                         visible=True
                     )
             with gr.Column() as audio_col_8:
                 generated_audio_8 = gr.Audio(
                     label=t("results.generated_music", n=8),
@@ -374,19 +409,12 @@ def create_results_section(dit_handler) -> dict:
                     buttons=[]
                 )
                 with gr.Row(equal_height=True):
-                    send_to_src_btn_8 = gr.Button(t("results.send_to_src_btn"), variant="secondary", size="sm", scale=1)
                     save_btn_8 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
-                    score_btn_8 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
-                    lrc_btn_8 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_8:
-                    codes_display_8 = gr.Textbox(
-                        label=t("results.codes_label", n=8),
-                        interactive=False,
-                        buttons=["copy"],
-                        lines=4,
-                        max_lines=4,
-                        visible=True
-                    )
                     score_display_8 = gr.Textbox(
                         label=t("results.quality_score_label", n=8),
                         interactive=False,
@@ -403,11 +431,19 @@ def create_results_section(dit_handler) -> dict:
                         max_lines=8,
                         visible=True
                     )
         status_output = gr.Textbox(label=t("results.generation_status"), interactive=False)
-        # Batch navigation controls
-        with gr.Row(equal_height=True):
             prev_batch_btn = gr.Button(
                 t("results.prev_btn"),
                 variant="secondary",
@@ -435,12 +471,13 @@ def create_results_section(dit_handler) -> dict:
                 size="sm"
             )
-        # One-click restore parameters button
         restore_params_btn = gr.Button(
             t("results.restore_params_btn"),
             variant="secondary",
-            interactive=False,  # Initially disabled, enabled after generation
-            size="sm"
         )
         with gr.Accordion(t("results.batch_results_title"), open=False):
@@ -482,14 +519,22 @@ def create_results_section(dit_handler) -> dict:
         "audio_col_6": audio_col_6,
         "audio_col_7": audio_col_7,
         "audio_col_8": audio_col_8,
-        "send_to_src_btn_1": send_to_src_btn_1,
-        "send_to_src_btn_2": send_to_src_btn_2,
-        "send_to_src_btn_3": send_to_src_btn_3,
-        "send_to_src_btn_4": send_to_src_btn_4,
-        "send_to_src_btn_5": send_to_src_btn_5,
-        "send_to_src_btn_6": send_to_src_btn_6,
-        "send_to_src_btn_7": send_to_src_btn_7,
-        "send_to_src_btn_8": send_to_src_btn_8,
         "save_btn_1": save_btn_1,
         "save_btn_2": save_btn_2,
         "save_btn_3": save_btn_3,

                     buttons=[]
                 )
                 with gr.Row(equal_height=True):
+                    send_to_cover_btn_1 = gr.Button(
+                        t("results.send_to_cover_btn"),
+                        variant="secondary",
+                        size="sm",
+                        scale=1
+                    )
+                    send_to_repaint_btn_1 = gr.Button(
+                        t("results.send_to_repaint_btn"),
                         variant="secondary",
                         size="sm",
                         scale=1
                         t("results.score_btn"),
                         variant="secondary",
                         size="sm",
+                        scale=1,
+                        visible=False
                     )
                     lrc_btn_1 = gr.Button(
                         t("results.lrc_btn"),
                         variant="secondary",
                         size="sm",
+                        scale=1,
+                        visible=False
                     )
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_1:
                     score_display_1 = gr.Textbox(
                         label=t("results.quality_score_label", n=1),
                         interactive=False,
                         max_lines=8,
                         visible=True
                     )
+                    codes_display_1 = gr.Textbox(
+                        label=t("results.codes_label", n=1),
+                        interactive=False,
+                        buttons=["copy"],
+                        lines=4,
+                        max_lines=4,
+                        visible=True
+                    )
             with gr.Column(visible=True) as audio_col_2:
                 generated_audio_2 = gr.Audio(
                     label=t("results.generated_music", n=2),
                     buttons=[]
                 )
                 with gr.Row(equal_height=True):
+                    send_to_cover_btn_2 = gr.Button(
+                        t("results.send_to_cover_btn"),
+                        variant="secondary",
+                        size="sm",
+                        scale=1
+                    )
+                    send_to_repaint_btn_2 = gr.Button(
+                        t("results.send_to_repaint_btn"),
                         variant="secondary",
                         size="sm",
                         scale=1
                         t("results.score_btn"),
                         variant="secondary",
                         size="sm",
+                        scale=1,
+                        visible=False
                     )
                     lrc_btn_2 = gr.Button(
                         t("results.lrc_btn"),
                         variant="secondary",
                         size="sm",
+                        scale=1,
+                        visible=False
                     )
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_2:
                     score_display_2 = gr.Textbox(
                         label=t("results.quality_score_label", n=2),
                         interactive=False,
                         max_lines=8,
                         visible=True
                     )
+                    codes_display_2 = gr.Textbox(
+                        label=t("results.codes_label", n=2),
+                        interactive=False,
+                        buttons=["copy"],
+                        lines=4,
+                        max_lines=4,
+                        visible=True
+                    )
             with gr.Column(visible=False) as audio_col_3:
                 generated_audio_3 = gr.Audio(
                     label=t("results.generated_music", n=3),
                     buttons=[]
                 )
                 with gr.Row(equal_height=True):
+                    send_to_cover_btn_3 = gr.Button(
+                        t("results.send_to_cover_btn"),
+                        variant="secondary",
+                        size="sm",
+                        scale=1
+                    )
+                    send_to_repaint_btn_3 = gr.Button(
+                        t("results.send_to_repaint_btn"),
                         variant="secondary",
                         size="sm",
                         scale=1
                         t("results.score_btn"),
                         variant="secondary",
                         size="sm",
+                        scale=1,
+                        visible=False
                     )
                     lrc_btn_3 = gr.Button(
                         t("results.lrc_btn"),
                         variant="secondary",
                         size="sm",
+                        scale=1,
+                        visible=False
                     )
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_3:
                     score_display_3 = gr.Textbox(
                         label=t("results.quality_score_label", n=3),
                         interactive=False,
                         max_lines=8,
                         visible=True
                     )
+                    codes_display_3 = gr.Textbox(
+                        label=t("results.codes_label", n=3),
+                        interactive=False,
+                        buttons=["copy"],
+                        lines=4,
+                        max_lines=4,
+                        visible=True
+                    )
             with gr.Column(visible=False) as audio_col_4:
                 generated_audio_4 = gr.Audio(
                     label=t("results.generated_music", n=4),
                     buttons=[]
                 )
                 with gr.Row(equal_height=True):
+                    send_to_cover_btn_4 = gr.Button(
+                        t("results.send_to_cover_btn"),
+                        variant="secondary",
+                        size="sm",
+                        scale=1
+                    )
+                    send_to_repaint_btn_4 = gr.Button(
+                        t("results.send_to_repaint_btn"),
                         variant="secondary",
                         size="sm",
                         scale=1
                         t("results.score_btn"),
                         variant="secondary",
                         size="sm",
+                        scale=1,
+                        visible=False
                     )
                     lrc_btn_4 = gr.Button(
                         t("results.lrc_btn"),
                         variant="secondary",
                         size="sm",
+                        scale=1,
+                        visible=False
                     )
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_4:
                     score_display_4 = gr.Textbox(
                         label=t("results.quality_score_label", n=4),
                         interactive=False,
                         max_lines=8,
                         visible=True
                     )
+                    codes_display_4 = gr.Textbox(
+                        label=t("results.codes_label", n=4),
+                        interactive=False,
+                        buttons=["copy"],
+                        lines=4,
+                        max_lines=4,
+                        visible=True
+                    )
         # Second row for batch size 5-8 (initially hidden)
         with gr.Row(visible=False) as audio_row_5_8:
                     buttons=[]
                 )
                 with gr.Row(equal_height=True):
+                    send_to_cover_btn_5 = gr.Button(t("results.send_to_cover_btn"), variant="secondary", size="sm", scale=1)
+                    send_to_repaint_btn_5 = gr.Button(t("results.send_to_repaint_btn"), variant="secondary", size="sm", scale=1)
                     save_btn_5 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
+                    score_btn_5 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1, visible=False)
+                    lrc_btn_5 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1, visible=False)
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_5:
                     score_display_5 = gr.Textbox(
                         label=t("results.quality_score_label", n=5),
                         interactive=False,
                         max_lines=8,
                         visible=True
                     )
+                    codes_display_5 = gr.Textbox(
+                        label=t("results.codes_label", n=5),
+                        interactive=False,
+                        buttons=["copy"],
+                        lines=4,
+                        max_lines=4,
+                        visible=True
+                    )
             with gr.Column() as audio_col_6:
                 generated_audio_6 = gr.Audio(
                     label=t("results.generated_music", n=6),
                     buttons=[]
                 )
                 with gr.Row(equal_height=True):
+                    send_to_cover_btn_6 = gr.Button(t("results.send_to_cover_btn"), variant="secondary", size="sm", scale=1)
+                    send_to_repaint_btn_6 = gr.Button(t("results.send_to_repaint_btn"), variant="secondary", size="sm", scale=1)
                     save_btn_6 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
+                    score_btn_6 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1, visible=False)
+                    lrc_btn_6 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1, visible=False)
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_6:
                     score_display_6 = gr.Textbox(
                         label=t("results.quality_score_label", n=6),
                         interactive=False,
                         max_lines=8,
                         visible=True
                     )
+                    codes_display_6 = gr.Textbox(
+                        label=t("results.codes_label", n=6),
+                        interactive=False,
+                        buttons=["copy"],
+                        lines=4,
+                        max_lines=4,
+                        visible=True
+                    )
             with gr.Column() as audio_col_7:
                 generated_audio_7 = gr.Audio(
                     label=t("results.generated_music", n=7),
                     buttons=[]
                 )
                 with gr.Row(equal_height=True):
+                    send_to_cover_btn_7 = gr.Button(t("results.send_to_cover_btn"), variant="secondary", size="sm", scale=1)
+                    send_to_repaint_btn_7 = gr.Button(t("results.send_to_repaint_btn"), variant="secondary", size="sm", scale=1)
                     save_btn_7 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
+                    score_btn_7 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1, visible=False)
+                    lrc_btn_7 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1, visible=False)
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_7:
                     score_display_7 = gr.Textbox(
                         label=t("results.quality_score_label", n=7),
                         interactive=False,
                         max_lines=8,
                         visible=True
                     )
+                    codes_display_7 = gr.Textbox(
+                        label=t("results.codes_label", n=7),
+                        interactive=False,
+                        buttons=["copy"],
+                        lines=4,
+                        max_lines=4,
+                        visible=True
+                    )
             with gr.Column() as audio_col_8:
                 generated_audio_8 = gr.Audio(
                     label=t("results.generated_music", n=8),
                     buttons=[]
                 )
                 with gr.Row(equal_height=True):
+                    send_to_cover_btn_8 = gr.Button(t("results.send_to_cover_btn"), variant="secondary", size="sm", scale=1)
+                    send_to_repaint_btn_8 = gr.Button(t("results.send_to_repaint_btn"), variant="secondary", size="sm", scale=1)
                     save_btn_8 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
+                    score_btn_8 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1, visible=False)
+                    lrc_btn_8 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1, visible=False)
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_8:
                     score_display_8 = gr.Textbox(
                         label=t("results.quality_score_label", n=8),
                         interactive=False,
                         max_lines=8,
                         visible=True
                     )
+                    codes_display_8 = gr.Textbox(
+                        label=t("results.codes_label", n=8),
+                        interactive=False,
+                        buttons=["copy"],
+                        lines=4,
+                        max_lines=4,
+                        visible=True
+                    )
         status_output = gr.Textbox(label=t("results.generation_status"), interactive=False)
+        # Batch navigation controls (hidden for simplified UI)
+        with gr.Row(equal_height=True, visible=False):
             prev_batch_btn = gr.Button(
                 t("results.prev_btn"),
                 variant="secondary",
                 size="sm"
             )
+        # One-click restore parameters button (hidden for simplified UI)
         restore_params_btn = gr.Button(
             t("results.restore_params_btn"),
             variant="secondary",
+            interactive=False,
+            size="sm",
+            visible=False
         )
         with gr.Accordion(t("results.batch_results_title"), open=False):
         "audio_col_6": audio_col_6,
         "audio_col_7": audio_col_7,
         "audio_col_8": audio_col_8,
+        "send_to_cover_btn_1": send_to_cover_btn_1,
+        "send_to_cover_btn_2": send_to_cover_btn_2,
+        "send_to_cover_btn_3": send_to_cover_btn_3,
+        "send_to_cover_btn_4": send_to_cover_btn_4,
+        "send_to_cover_btn_5": send_to_cover_btn_5,
+        "send_to_cover_btn_6": send_to_cover_btn_6,
+        "send_to_cover_btn_7": send_to_cover_btn_7,
+        "send_to_cover_btn_8": send_to_cover_btn_8,
+        "send_to_repaint_btn_1": send_to_repaint_btn_1,
+        "send_to_repaint_btn_2": send_to_repaint_btn_2,
+        "send_to_repaint_btn_3": send_to_repaint_btn_3,
+        "send_to_repaint_btn_4": send_to_repaint_btn_4,
+        "send_to_repaint_btn_5": send_to_repaint_btn_5,
+        "send_to_repaint_btn_6": send_to_repaint_btn_6,
+        "send_to_repaint_btn_7": send_to_repaint_btn_7,
+        "send_to_repaint_btn_8": send_to_repaint_btn_8,
         "save_btn_1": save_btn_1,
         "save_btn_2": save_btn_2,
         "save_btn_3": save_btn_3,

acestep/handler.py CHANGED Viewed

@@ -315,6 +315,11 @@ class AceStepHandler:
         offload_to_cpu: bool = False,
         offload_dit_to_cpu: bool = False,
         quantization: Optional[str] = None,
     ) -> Tuple[str, bool]:
         """
         Initialize DiT model service
@@ -327,6 +332,10 @@ class AceStepHandler:
             compile_model: Whether to use torch.compile to optimize the model
             offload_to_cpu: Whether to offload models to CPU when not in use
             offload_dit_to_cpu: Whether to offload DiT model to CPU when not in use (only effective if offload_to_cpu is True)
         Returns:
             (status_message, enable_generate_button)
@@ -440,54 +449,77 @@ class AceStepHandler:
                         logger.info(f"[initialize_service] DiT quantized with: {self.quantization}")
-                silence_latent_path = os.path.join(acestep_v15_checkpoint_path, "silence_latent.pt")
-                if os.path.exists(silence_latent_path):
-                    self.silence_latent = torch.load(silence_latent_path).transpose(1, 2)
-                    # Always keep silence_latent on GPU - it's used in many places outside model context
-                    # and is small enough that it won't significantly impact VRAM
-                    self.silence_latent = self.silence_latent.to(device).to(self.dtype)
                 else:
-                    raise FileNotFoundError(f"Silence latent not found at {silence_latent_path}")
             else:
                 raise FileNotFoundError(f"ACE-Step V1.5 checkpoint not found at {acestep_v15_checkpoint_path}")
-            # 2. Load VAE
-            vae_checkpoint_path = os.path.join(checkpoint_dir, "vae")
-            if os.path.exists(vae_checkpoint_path):
-                self.vae = AutoencoderOobleck.from_pretrained(vae_checkpoint_path)
-                # Use bfloat16 for VAE on GPU, otherwise use self.dtype (float32 on CPU)
-                vae_dtype = self._get_vae_dtype(device)
-                if not self.offload_to_cpu:
-                    self.vae = self.vae.to(device).to(vae_dtype)
-                else:
-                    self.vae = self.vae.to("cpu").to(vae_dtype)
-                self.vae.eval()
             else:
-                raise FileNotFoundError(f"VAE checkpoint not found at {vae_checkpoint_path}")
-            if compile_model:
-                self.vae = torch.compile(self.vae)
-            # 3. Load text encoder and tokenizer
-            text_encoder_path = os.path.join(checkpoint_dir, "Qwen3-Embedding-0.6B")
-            if os.path.exists(text_encoder_path):
-                self.text_tokenizer = AutoTokenizer.from_pretrained(text_encoder_path)
-                self.text_encoder = AutoModel.from_pretrained(text_encoder_path)
-                if not self.offload_to_cpu:
-                    self.text_encoder = self.text_encoder.to(device).to(self.dtype)
                 else:
-                    self.text_encoder = self.text_encoder.to("cpu").to(self.dtype)
-                self.text_encoder.eval()
             else:
-                raise FileNotFoundError(f"Text encoder not found at {text_encoder_path}")
             # Determine actual attention implementation used
             actual_attn = getattr(self.config, "_attn_implementation", "eager")
             status_msg = f"✅ Model initialized successfully on {device}\n"
             status_msg += f"Main model: {acestep_v15_checkpoint_path}\n"
-            status_msg += f"VAE: {vae_checkpoint_path}\n"
-            status_msg += f"Text encoder: {text_encoder_path}\n"
             status_msg += f"Dtype: {self.dtype}\n"
             status_msg += f"Attention: {actual_attn}\n"
             status_msg += f"Compiled: {compile_model}\n"

         offload_to_cpu: bool = False,
         offload_dit_to_cpu: bool = False,
         quantization: Optional[str] = None,
+        # Shared components (for multi-model setup to save memory)
+        shared_vae = None,
+        shared_text_encoder = None,
+        shared_text_tokenizer = None,
+        shared_silence_latent = None,
     ) -> Tuple[str, bool]:
         """
         Initialize DiT model service
             compile_model: Whether to use torch.compile to optimize the model
             offload_to_cpu: Whether to offload models to CPU when not in use
             offload_dit_to_cpu: Whether to offload DiT model to CPU when not in use (only effective if offload_to_cpu is True)
+            shared_vae: Optional shared VAE instance (for multi-model setup)
+            shared_text_encoder: Optional shared text encoder instance (for multi-model setup)
+            shared_text_tokenizer: Optional shared text tokenizer instance (for multi-model setup)
+            shared_silence_latent: Optional shared silence latent tensor (for multi-model setup)
         Returns:
             (status_message, enable_generate_button)
                         logger.info(f"[initialize_service] DiT quantized with: {self.quantization}")
+                # Load or use shared silence_latent
+                if shared_silence_latent is not None:
+                    self.silence_latent = shared_silence_latent
+                    logger.info("[initialize_service] Using shared silence_latent")
                 else:
+                    silence_latent_path = os.path.join(acestep_v15_checkpoint_path, "silence_latent.pt")
+                    if os.path.exists(silence_latent_path):
+                        self.silence_latent = torch.load(silence_latent_path).transpose(1, 2)
+                        # Always keep silence_latent on GPU - it's used in many places outside model context
+                        # and is small enough that it won't significantly impact VRAM
+                        self.silence_latent = self.silence_latent.to(device).to(self.dtype)
+                    else:
+                        raise FileNotFoundError(f"Silence latent not found at {silence_latent_path}")
             else:
                 raise FileNotFoundError(f"ACE-Step V1.5 checkpoint not found at {acestep_v15_checkpoint_path}")
+            # 2. Load or use shared VAE
+            vae_checkpoint_path = os.path.join(checkpoint_dir, "vae")  # Define for status message
+            if shared_vae is not None:
+                self.vae = shared_vae
+                logger.info("[initialize_service] Using shared VAE")
             else:
+                if os.path.exists(vae_checkpoint_path):
+                    self.vae = AutoencoderOobleck.from_pretrained(vae_checkpoint_path)
+                    # Use bfloat16 for VAE on GPU, otherwise use self.dtype (float32 on CPU)
+                    vae_dtype = self._get_vae_dtype(device)
+                    if not self.offload_to_cpu:
+                        self.vae = self.vae.to(device).to(vae_dtype)
+                    else:
+                        self.vae = self.vae.to("cpu").to(vae_dtype)
+                    self.vae.eval()
                 else:
+                    raise FileNotFoundError(f"VAE checkpoint not found at {vae_checkpoint_path}")
+                if compile_model:
+                    self.vae = torch.compile(self.vae)
+            # 3. Load or use shared text encoder and tokenizer
+            text_encoder_path = os.path.join(checkpoint_dir, "Qwen3-Embedding-0.6B")  # Define for status message
+            if shared_text_encoder is not None and shared_text_tokenizer is not None:
+                self.text_encoder = shared_text_encoder
+                self.text_tokenizer = shared_text_tokenizer
+                logger.info("[initialize_service] Using shared text encoder and tokenizer")
             else:
+                if os.path.exists(text_encoder_path):
+                    self.text_tokenizer = AutoTokenizer.from_pretrained(text_encoder_path)
+                    self.text_encoder = AutoModel.from_pretrained(text_encoder_path)
+                    if not self.offload_to_cpu:
+                        self.text_encoder = self.text_encoder.to(device).to(self.dtype)
+                    else:
+                        self.text_encoder = self.text_encoder.to("cpu").to(self.dtype)
+                    self.text_encoder.eval()
+                else:
+                    raise FileNotFoundError(f"Text encoder not found at {text_encoder_path}")
             # Determine actual attention implementation used
             actual_attn = getattr(self.config, "_attn_implementation", "eager")
+            # Determine if using shared components
+            using_shared = shared_vae is not None or shared_text_encoder is not None
             status_msg = f"✅ Model initialized successfully on {device}\n"
             status_msg += f"Main model: {acestep_v15_checkpoint_path}\n"
+            if shared_vae is None:
+                status_msg += f"VAE: {vae_checkpoint_path}\n"
+            else:
+                status_msg += f"VAE: shared\n"
+            if shared_text_encoder is None:
+                status_msg += f"Text encoder: {text_encoder_path}\n"
+            else:
+                status_msg += f"Text encoder: shared\n"
             status_msg += f"Dtype: {self.dtype}\n"
             status_msg += f"Attention: {actual_attn}\n"
             status_msg += f"Compiled: {compile_model}\n"

app.py CHANGED Viewed

@@ -53,7 +53,24 @@ def get_persistent_storage_path():
     1. Must be enabled in Space settings
     2. Path is typically /data for Docker SDK
     3. Falls back to app directory if /data is not writable
     """
     # Try HuggingFace Space persistent storage first
     hf_data_path = "/data"
@@ -80,6 +97,14 @@ def get_persistent_storage_path():
 def main():
     """Main entry point for HuggingFace Space"""
     # Get persistent storage path (auto-detect)
     persistent_storage_path = get_persistent_storage_path()
@@ -87,14 +112,15 @@ def main():
     gpu_memory_gb = get_gpu_memory_gb()
     auto_offload = gpu_memory_gb > 0 and gpu_memory_gb < 16
-    if auto_offload:
-        print(f"Detected GPU memory: {gpu_memory_gb:.2f} GB (< 16GB)")
-        print("Auto-enabling CPU offload to reduce GPU memory usage")
-    elif gpu_memory_gb > 0:
-        print(f"Detected GPU memory: {gpu_memory_gb:.2f} GB (>= 16GB)")
-        print("CPU offload disabled by default")
-    else:
-        print("No GPU detected, running on CPU")
     # Create handler instances
     print("Creating handlers...")
@@ -107,6 +133,9 @@ def main():
         "SERVICE_MODE_DIT_MODEL",
         "acestep-v15-turbo"
     )
     lm_model_path = os.environ.get(
         "SERVICE_MODE_LM_MODEL",
         "acestep-5Hz-lm-1.7B"
@@ -115,50 +144,97 @@ def main():
     device = "auto"
     print(f"Service mode configuration:")
-    print(f"  DiT model: {config_path}")
     print(f"  LM model: {lm_model_path}")
     print(f"  Backend: {backend}")
     print(f"  Offload to CPU: {auto_offload}")
     # Determine flash attention availability
     use_flash_attention = dit_handler.is_flash_attention_available()
     print(f"  Flash Attention: {use_flash_attention}")
-    # Initialize DiT model
-    print(f"Initializing DiT model: {config_path}...")
-    init_status, enable_generate = dit_handler.initialize_service(
-        project_root=current_dir,
-        config_path=config_path,
-        device=device,
-        use_flash_attention=use_flash_attention,
-        compile_model=False,
-        offload_to_cpu=auto_offload,
-        offload_dit_to_cpu=False
-    )
-    if not enable_generate:
-        print(f"Warning: DiT model initialization issue: {init_status}", file=sys.stderr)
     else:
-        print("DiT model initialized successfully")
-    # Initialize LM model
-    checkpoint_dir = dit_handler._get_checkpoint_dir()
-    print(f"Initializing 5Hz LM: {lm_model_path}...")
-    lm_status, lm_success = llm_handler.initialize(
-        checkpoint_dir=checkpoint_dir,
-        lm_model_path=lm_model_path,
-        backend=backend,
-        device=device,
-        offload_to_cpu=auto_offload,
-        dtype=dit_handler.dtype
-    )
-    if lm_success:
-        print("5Hz LM initialized successfully")
-        init_status += f"\n{lm_status}"
-    else:
-        print(f"Warning: 5Hz LM initialization failed: {lm_status}", file=sys.stderr)
-        init_status += f"\n{lm_status}"
     # Prepare initialization parameters for UI
     init_params = {
@@ -166,6 +242,7 @@ def main():
         'service_mode': True,
         'checkpoint': None,
         'config_path': config_path,
         'device': device,
         'init_llm': True,
         'lm_model_path': lm_model_path,
@@ -176,9 +253,12 @@ def main():
         'init_status': init_status,
         'enable_generate': enable_generate,
         'dit_handler': dit_handler,
         'llm_handler': llm_handler,
         'language': 'en',
         'persistent_storage_path': persistent_storage_path,
     }
     print("Service initialization completed!")

     1. Must be enabled in Space settings
     2. Path is typically /data for Docker SDK
     3. Falls back to app directory if /data is not writable
+    Local development:
+    - Set CHECKPOINT_DIR environment variable to use local checkpoints
+      Example: CHECKPOINT_DIR=/path/to/checkpoints python app.py
+      The path should be the parent directory of 'checkpoints' folder
     """
+    # Check for local checkpoint directory override (for development)
+    checkpoint_dir_override = os.environ.get("CHECKPOINT_DIR")
+    if checkpoint_dir_override:
+        # If user specifies the checkpoints folder directly, use its parent
+        if checkpoint_dir_override.endswith("/checkpoints") or checkpoint_dir_override.endswith("\\checkpoints"):
+            checkpoint_dir_override = os.path.dirname(checkpoint_dir_override)
+        if os.path.exists(checkpoint_dir_override):
+            print(f"Using local checkpoint directory (CHECKPOINT_DIR): {checkpoint_dir_override}")
+            return checkpoint_dir_override
+        else:
+            print(f"Warning: CHECKPOINT_DIR path does not exist: {checkpoint_dir_override}")
     # Try HuggingFace Space persistent storage first
     hf_data_path = "/data"
 def main():
     """Main entry point for HuggingFace Space"""
+    # Check for DEBUG_UI mode (skip model initialization for UI development)
+    debug_ui = os.environ.get("DEBUG_UI", "").lower() in ("1", "true", "yes")
+    if debug_ui:
+        print("=" * 60)
+        print("DEBUG_UI mode enabled - skipping model initialization")
+        print("UI will be fully functional but generation is disabled")
+        print("=" * 60)
     # Get persistent storage path (auto-detect)
     persistent_storage_path = get_persistent_storage_path()
     gpu_memory_gb = get_gpu_memory_gb()
     auto_offload = gpu_memory_gb > 0 and gpu_memory_gb < 16
+    if not debug_ui:
+        if auto_offload:
+            print(f"Detected GPU memory: {gpu_memory_gb:.2f} GB (< 16GB)")
+            print("Auto-enabling CPU offload to reduce GPU memory usage")
+        elif gpu_memory_gb > 0:
+            print(f"Detected GPU memory: {gpu_memory_gb:.2f} GB (>= 16GB)")
+            print("CPU offload disabled by default")
+        else:
+            print("No GPU detected, running on CPU")
     # Create handler instances
     print("Creating handlers...")
         "SERVICE_MODE_DIT_MODEL",
         "acestep-v15-turbo"
     )
+    # Second DiT model - default to turbo-shift3 for two-model setup
+    config_path_2 = os.environ.get("SERVICE_MODE_DIT_MODEL_2", "acestep-v15-turbo-shift3").strip()
     lm_model_path = os.environ.get(
         "SERVICE_MODE_LM_MODEL",
         "acestep-5Hz-lm-1.7B"
     device = "auto"
     print(f"Service mode configuration:")
+    print(f"  DiT model 1: {config_path}")
+    if config_path_2:
+        print(f"  DiT model 2: {config_path_2}")
     print(f"  LM model: {lm_model_path}")
     print(f"  Backend: {backend}")
     print(f"  Offload to CPU: {auto_offload}")
+    print(f"  DEBUG_UI: {debug_ui}")
     # Determine flash attention availability
     use_flash_attention = dit_handler.is_flash_attention_available()
     print(f"  Flash Attention: {use_flash_attention}")
+    # Initialize models (skip in DEBUG_UI mode)
+    init_status = ""
+    enable_generate = False
+    dit_handler_2 = None
+    if debug_ui:
+        # In DEBUG_UI mode, skip all model initialization
+        init_status = "⚠️ DEBUG_UI mode - models not loaded\nUI is functional but generation is disabled"
+        enable_generate = False
+        print("Skipping model initialization (DEBUG_UI mode)")
     else:
+        # Initialize primary DiT model
+        print(f"Initializing DiT model 1: {config_path}...")
+        init_status, enable_generate = dit_handler.initialize_service(
+            project_root=current_dir,
+            config_path=config_path,
+            device=device,
+            use_flash_attention=use_flash_attention,
+            compile_model=False,
+            offload_to_cpu=auto_offload,
+            offload_dit_to_cpu=False
+        )
+        if not enable_generate:
+            print(f"Warning: DiT model 1 initialization issue: {init_status}", file=sys.stderr)
+        else:
+            print("DiT model 1 initialized successfully")
+        # Initialize second DiT model if configured
+        if config_path_2:
+            print(f"Initializing DiT model 2: {config_path_2}...")
+            dit_handler_2 = AceStepHandler(persistent_storage_path=persistent_storage_path)
+            # Share VAE, text_encoder, and silence_latent from the first handler to save memory
+            init_status_2, enable_generate_2 = dit_handler_2.initialize_service(
+                project_root=current_dir,
+                config_path=config_path_2,
+                device=device,
+                use_flash_attention=use_flash_attention,
+                compile_model=False,
+                offload_to_cpu=auto_offload,
+                offload_dit_to_cpu=False,
+                # Share components from first handler
+                shared_vae=dit_handler.vae,
+                shared_text_encoder=dit_handler.text_encoder,
+                shared_text_tokenizer=dit_handler.text_tokenizer,
+                shared_silence_latent=dit_handler.silence_latent,
+            )
+            if not enable_generate_2:
+                print(f"Warning: DiT model 2 initialization issue: {init_status_2}", file=sys.stderr)
+                init_status += f"\n⚠️ DiT model 2 failed: {init_status_2}"
+            else:
+                print("DiT model 2 initialized successfully")
+                init_status += f"\n✅ DiT model 2: {config_path_2}"
+        # Initialize LM model
+        checkpoint_dir = dit_handler._get_checkpoint_dir()
+        print(f"Initializing 5Hz LM: {lm_model_path}...")
+        lm_status, lm_success = llm_handler.initialize(
+            checkpoint_dir=checkpoint_dir,
+            lm_model_path=lm_model_path,
+            backend=backend,
+            device=device,
+            offload_to_cpu=auto_offload,
+            dtype=dit_handler.dtype
+        )
+        if lm_success:
+            print("5Hz LM initialized successfully")
+            init_status += f"\n{lm_status}"
+        else:
+            print(f"Warning: 5Hz LM initialization failed: {lm_status}", file=sys.stderr)
+            init_status += f"\n{lm_status}"
+    # Build available models list for UI
+    available_dit_models = [config_path]
+    if config_path_2 and dit_handler_2 is not None:
+        available_dit_models.append(config_path_2)
     # Prepare initialization parameters for UI
     init_params = {
         'service_mode': True,
         'checkpoint': None,
         'config_path': config_path,
+        'config_path_2': config_path_2 if config_path_2 else None,
         'device': device,
         'init_llm': True,
         'lm_model_path': lm_model_path,
         'init_status': init_status,
         'enable_generate': enable_generate,
         'dit_handler': dit_handler,
+        'dit_handler_2': dit_handler_2,
+        'available_dit_models': available_dit_models,
         'llm_handler': llm_handler,
         'language': 'en',
         'persistent_storage_path': persistent_storage_path,
+        'debug_ui': debug_ui,
     }
     print("Service initialization completed!")