Spaces:

ACE-Step
/

Ace-Step-v1.5

Running on A100

App Files Files Community

ChuxiJ commited on Jan 12

Commit

c9570f3

1 Parent(s): 748bd62

fix lrc bugs

Browse files

Files changed (9) hide show

acestep/gradio_ui/events/__init__.py +90 -43
acestep/gradio_ui/events/generation_handlers.py +0 -57
acestep/gradio_ui/events/results_handlers.py +492 -77
acestep/gradio_ui/i18n/en.json +4 -1
acestep/gradio_ui/i18n/ja.json +4 -1
acestep/gradio_ui/i18n/zh.json +4 -1
acestep/gradio_ui/interfaces/generation.py +33 -105
acestep/gradio_ui/interfaces/result.py +112 -24
acestep/llm_inference.py +6 -1

acestep/gradio_ui/events/__init__.py CHANGED Viewed

@@ -92,31 +92,6 @@ def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, datase
         ]
     )
-    # Update codes hints visibility
-    for trigger in [generation_section["src_audio"], generation_section["allow_lm_batch"], generation_section["batch_size_input"]]:
-        trigger.change(
-            fn=gen_h.update_codes_hints_visibility,
-            inputs=[
-                generation_section["src_audio"],
-                generation_section["allow_lm_batch"],
-                generation_section["batch_size_input"]
-            ],
-            outputs=[
-                generation_section["codes_single_row"],
-                generation_section["codes_batch_row"],
-                generation_section["codes_batch_row_2"],
-                generation_section["codes_col_1"],
-                generation_section["codes_col_2"],
-                generation_section["codes_col_3"],
-                generation_section["codes_col_4"],
-                generation_section["codes_col_5"],
-                generation_section["codes_col_6"],
-                generation_section["codes_col_7"],
-                generation_section["codes_col_8"],
-                generation_section["transcribe_btn"],
-            ]
-        )
     # ========== Audio Conversion ==========
     generation_section["convert_src_to_codes_btn"].click(
         fn=lambda src: gen_h.convert_src_audio_to_codes_wrapper(dit_handler, src),
@@ -397,7 +372,9 @@ def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, datase
             ],
             outputs=[
                 results_section[f"lrc_display_{btn_idx}"],
-                results_section[f"details_accordion_{btn_idx}"]
             ]
         )
@@ -445,6 +422,7 @@ def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, datase
             generation_section["constrained_decoding_debug"],
             generation_section["allow_lm_batch"],
             generation_section["auto_score"],
             generation_section["score_scale"],
             generation_section["lm_batch_chunk_size"],
             generation_section["track_name"],
@@ -476,15 +454,30 @@ def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, datase
             results_section["score_display_6"],
             results_section["score_display_7"],
             results_section["score_display_8"],
-            generation_section["text2music_audio_code_string"],
-            generation_section["text2music_audio_code_string_1"],
-            generation_section["text2music_audio_code_string_2"],
-            generation_section["text2music_audio_code_string_3"],
-            generation_section["text2music_audio_code_string_4"],
-            generation_section["text2music_audio_code_string_5"],
-            generation_section["text2music_audio_code_string_6"],
-            generation_section["text2music_audio_code_string_7"],
-            generation_section["text2music_audio_code_string_8"],
             results_section["lm_metadata_state"],
             results_section["is_format_caption_state"],
             results_section["current_batch_index"],
@@ -546,6 +539,30 @@ def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, datase
             results_section["score_display_6"],
             results_section["score_display_7"],
             results_section["score_display_8"],
             results_section["restore_params_btn"],
         ]
     )
@@ -590,6 +607,7 @@ def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, datase
             generation_section["constrained_decoding_debug"],
             generation_section["allow_lm_batch"],
             generation_section["auto_score"],
             generation_section["score_scale"],
             generation_section["lm_batch_chunk_size"],
             generation_section["track_name"],
@@ -629,6 +647,30 @@ def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, datase
             results_section["score_display_6"],
             results_section["score_display_7"],
             results_section["score_display_8"],
             results_section["restore_params_btn"],
         ]
     ).then(
@@ -658,14 +700,6 @@ def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, datase
         ],
         outputs=[
             generation_section["text2music_audio_code_string"],
-            generation_section["text2music_audio_code_string_1"],
-            generation_section["text2music_audio_code_string_2"],
-            generation_section["text2music_audio_code_string_3"],
-            generation_section["text2music_audio_code_string_4"],
-            generation_section["text2music_audio_code_string_5"],
-            generation_section["text2music_audio_code_string_6"],
-            generation_section["text2music_audio_code_string_7"],
-            generation_section["text2music_audio_code_string_8"],
             generation_section["captions"],
             generation_section["lyrics"],
             generation_section["bpm"],
@@ -687,3 +721,16 @@ def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, datase
             generation_section["complete_track_classes"],
         ]
     )

         ]
     )
     # ========== Audio Conversion ==========
     generation_section["convert_src_to_codes_btn"].click(
         fn=lambda src: gen_h.convert_src_audio_to_codes_wrapper(dit_handler, src),
             ],
             outputs=[
                 results_section[f"lrc_display_{btn_idx}"],
+                results_section[f"details_accordion_{btn_idx}"],
+                # Audio subtitles now auto-updated via lrc_display.change()
+                results_section["batch_queue"]
             ]
         )
             generation_section["constrained_decoding_debug"],
             generation_section["allow_lm_batch"],
             generation_section["auto_score"],
+            generation_section["auto_lrc"],
             generation_section["score_scale"],
             generation_section["lm_batch_chunk_size"],
             generation_section["track_name"],
             results_section["score_display_6"],
             results_section["score_display_7"],
             results_section["score_display_8"],
+            results_section["codes_display_1"],
+            results_section["codes_display_2"],
+            results_section["codes_display_3"],
+            results_section["codes_display_4"],
+            results_section["codes_display_5"],
+            results_section["codes_display_6"],
+            results_section["codes_display_7"],
+            results_section["codes_display_8"],
+            results_section["details_accordion_1"],
+            results_section["details_accordion_2"],
+            results_section["details_accordion_3"],
+            results_section["details_accordion_4"],
+            results_section["details_accordion_5"],
+            results_section["details_accordion_6"],
+            results_section["details_accordion_7"],
+            results_section["details_accordion_8"],
+            results_section["lrc_display_1"],
+            results_section["lrc_display_2"],
+            results_section["lrc_display_3"],
+            results_section["lrc_display_4"],
+            results_section["lrc_display_5"],
+            results_section["lrc_display_6"],
+            results_section["lrc_display_7"],
+            results_section["lrc_display_8"],
             results_section["lm_metadata_state"],
             results_section["is_format_caption_state"],
             results_section["current_batch_index"],
             results_section["score_display_6"],
             results_section["score_display_7"],
             results_section["score_display_8"],
+            results_section["codes_display_1"],
+            results_section["codes_display_2"],
+            results_section["codes_display_3"],
+            results_section["codes_display_4"],
+            results_section["codes_display_5"],
+            results_section["codes_display_6"],
+            results_section["codes_display_7"],
+            results_section["codes_display_8"],
+            results_section["lrc_display_1"],
+            results_section["lrc_display_2"],
+            results_section["lrc_display_3"],
+            results_section["lrc_display_4"],
+            results_section["lrc_display_5"],
+            results_section["lrc_display_6"],
+            results_section["lrc_display_7"],
+            results_section["lrc_display_8"],
+            results_section["details_accordion_1"],
+            results_section["details_accordion_2"],
+            results_section["details_accordion_3"],
+            results_section["details_accordion_4"],
+            results_section["details_accordion_5"],
+            results_section["details_accordion_6"],
+            results_section["details_accordion_7"],
+            results_section["details_accordion_8"],
             results_section["restore_params_btn"],
         ]
     )
             generation_section["constrained_decoding_debug"],
             generation_section["allow_lm_batch"],
             generation_section["auto_score"],
+            generation_section["auto_lrc"],
             generation_section["score_scale"],
             generation_section["lm_batch_chunk_size"],
             generation_section["track_name"],
             results_section["score_display_6"],
             results_section["score_display_7"],
             results_section["score_display_8"],
+            results_section["codes_display_1"],
+            results_section["codes_display_2"],
+            results_section["codes_display_3"],
+            results_section["codes_display_4"],
+            results_section["codes_display_5"],
+            results_section["codes_display_6"],
+            results_section["codes_display_7"],
+            results_section["codes_display_8"],
+            results_section["lrc_display_1"],
+            results_section["lrc_display_2"],
+            results_section["lrc_display_3"],
+            results_section["lrc_display_4"],
+            results_section["lrc_display_5"],
+            results_section["lrc_display_6"],
+            results_section["lrc_display_7"],
+            results_section["lrc_display_8"],
+            results_section["details_accordion_1"],
+            results_section["details_accordion_2"],
+            results_section["details_accordion_3"],
+            results_section["details_accordion_4"],
+            results_section["details_accordion_5"],
+            results_section["details_accordion_6"],
+            results_section["details_accordion_7"],
+            results_section["details_accordion_8"],
             results_section["restore_params_btn"],
         ]
     ).then(
         ],
         outputs=[
             generation_section["text2music_audio_code_string"],
             generation_section["captions"],
             generation_section["lyrics"],
             generation_section["bpm"],
             generation_section["complete_track_classes"],
         ]
     )
+    # ========== LRC Display Change Handlers ==========
+    # When lrc_display textbox changes, update the corresponding audio component's subtitles
+    for i in range(1, 9):
+        results_section[f"lrc_display_{i}"].change(
+            fn=res_h.update_audio_subtitles_from_lrc,
+            inputs=[
+                results_section[f"lrc_display_{i}"],
+                results_section[f"generated_audio_{i}"],
+                generation_section["audio_duration"],
+            ],
+            outputs=[results_section[f"generated_audio_{i}"]]
+        )

acestep/gradio_ui/events/generation_handlers.py CHANGED Viewed

@@ -568,60 +568,3 @@ def update_audio_components_visibility(batch_size):
     return updates_row1 + updates_row2
-def update_codes_hints_visibility(src_audio, allow_lm_batch, batch_size):
-    """Switch between single/batch codes input based on src_audio presence
-    When src_audio is present:
-        - Show single mode with transcribe button
-        - Clear codes (will be filled by transcription)
-    When src_audio is absent:
-        - Hide transcribe button
-        - Show batch mode if allow_lm_batch=True and batch_size>=2
-        - Show single mode otherwise
-    Row 1: Codes 1-4
-    Row 2: Codes 5-8 (batch_size >= 5)
-    """
-    batch_size = min(max(int(batch_size), 1), 8)
-    has_src_audio = src_audio is not None
-    if has_src_audio:
-        # Has src_audio: show single mode with transcribe button
-        return (
-            gr.update(visible=True),   # codes_single_row
-            gr.update(visible=False),  # codes_batch_row
-            gr.update(visible=False),  # codes_batch_row_2
-            *[gr.update(visible=False)] * 8,  # Hide all batch columns
-            gr.update(visible=True),   # transcribe_btn: show when src_audio present
-        )
-    else:
-        # No src_audio: decide between single/batch mode based on settings
-        if allow_lm_batch and batch_size >= 2:
-            # Batch mode: hide single, show batch codes with dynamic columns
-            show_row_2 = batch_size >= 5
-            return (
-                gr.update(visible=False),  # codes_single_row
-                gr.update(visible=True),   # codes_batch_row (row 1)
-                gr.update(visible=show_row_2),  # codes_batch_row_2 (row 2)
-                # Row 1 columns (1-4)
-                gr.update(visible=True),   # codes_col_1: always visible in batch mode
-                gr.update(visible=batch_size >= 2),  # codes_col_2
-                gr.update(visible=batch_size >= 3),  # codes_col_3
-                gr.update(visible=batch_size >= 4),  # codes_col_4
-                # Row 2 columns (5-8)
-                gr.update(visible=batch_size >= 5),  # codes_col_5
-                gr.update(visible=batch_size >= 6),  # codes_col_6
-                gr.update(visible=batch_size >= 7),  # codes_col_7
-                gr.update(visible=batch_size >= 8),  # codes_col_8
-                gr.update(visible=False),  # transcribe_btn: hide when no src_audio
-            )
-        else:
-            # Single mode: show single, hide batch
-            return (
-                gr.update(visible=True),   # codes_single_row
-                gr.update(visible=False),  # codes_batch_row
-                gr.update(visible=False),  # codes_batch_row_2
-                *[gr.update(visible=False)] * 8,  # Hide all batch columns
-                gr.update(visible=False),  # transcribe_btn: hide when no src_audio
-            )


568	return updates_row1 + updates_row2
569
570

acestep/gradio_ui/events/results_handlers.py CHANGED Viewed

@@ -6,11 +6,12 @@ import os
 import json
 import datetime
 import math
 import tempfile
 import shutil
 import zipfile
 import time as time_module
-from typing import Dict, Any, Optional
 import gradio as gr
 from loguru import logger
 from acestep.gradio_ui.i18n import t
@@ -18,6 +19,88 @@ from acestep.inference import generate_music, GenerationParams, GenerationConfig
 from acestep.audio_utils import save_audio
 def _build_generation_info(
     lm_metadata: Optional[Dict[str, Any]],
     time_costs: Dict[str, float],
@@ -99,13 +182,16 @@ def _build_generation_info(
         # Post-processing time costs
         audio_conversion_time = time_costs.get('audio_conversion_time', 0.0)
         auto_score_time = time_costs.get('auto_score_time', 0.0)
-        if audio_conversion_time > 0 or auto_score_time > 0:
             time_lines.append("\n**🔧 Post-processing Time:**")
             if audio_conversion_time > 0:
                 time_lines.append(f"  - Audio Conversion: {audio_conversion_time:.2f}s")
             if auto_score_time > 0:
                 time_lines.append(f"  - Auto Score: {auto_score_time:.2f}s")
         # Pipeline total
         pipeline_total = time_costs.get('pipeline_total_time', 0.0)
@@ -276,6 +362,7 @@ def generate_with_progress(
     constrained_decoding_debug,
     allow_lm_batch,
     auto_score,
     score_scale,
     lm_batch_chunk_size,
     progress=gr.Progress(track_tqdm=True),
@@ -357,6 +444,11 @@ def generate_with_progress(
     # Initialize post-processing timing
     audio_conversion_start_time = time_module.time()
     total_auto_score_time = 0.0
     updated_audio_codes = text2music_audio_code_string if not think_checkbox else ""
@@ -370,11 +462,52 @@ def generate_with_progress(
     )
     if not result.success:
-        yield (None,) * 8 + (None, generation_info, result.status_message) + (gr.skip(),) * 20 + (None,)  # +1 for extra_outputs
         return
     audios = result.audios
     progress(0.99, "Converting audio to mp3...")
     for i in range(8):
         if i < len(audios):
             key = audios[i]["key"]
@@ -395,7 +528,7 @@ def generate_with_progress(
             code_str = audio_params.get("audio_codes", "")
             final_codes_list[i] = code_str
-            scores_ui_updates = [gr.skip()] * 8
             score_str = "Done!"
             if auto_score:
                 auto_score_start = time_module.time()
@@ -405,12 +538,82 @@ def generate_with_progress(
             scores_ui_updates[i] = score_str
             final_scores_list[i] = score_str
             status_message = f"Encoding & Ready: {i+1}/{len(audios)}"
-            current_audio_updates = [gr.skip()] * 8
             current_audio_updates[i] = audio_path
-            audio_codes_ui_updates = [gr.skip()] * 8
-            audio_codes_ui_updates[i] = code_str
             yield (
                 current_audio_updates[0], current_audio_updates[1], current_audio_updates[2], current_audio_updates[3],
                 current_audio_updates[4], current_audio_updates[5], current_audio_updates[6], current_audio_updates[7],
@@ -420,13 +623,19 @@ def generate_with_progress(
                 seed_value_for_ui,
                 # Scores
                 scores_ui_updates[0], scores_ui_updates[1], scores_ui_updates[2], scores_ui_updates[3], scores_ui_updates[4], scores_ui_updates[5], scores_ui_updates[6], scores_ui_updates[7],
-                updated_audio_codes,
-                # Codes
-                audio_codes_ui_updates[0], audio_codes_ui_updates[1], audio_codes_ui_updates[2], audio_codes_ui_updates[3],
-                audio_codes_ui_updates[4], audio_codes_ui_updates[5], audio_codes_ui_updates[6], audio_codes_ui_updates[7],
                 lm_generated_metadata,
                 is_format_caption,
                 None,  # Placeholder for extra_outputs (only filled in final yield)
             )
         else:
             # If i exceeds the generated count (e.g., batch=2, i=2..7), do not yield
@@ -442,10 +651,12 @@ def generate_with_progress(
         time_costs['audio_conversion_time'] = audio_conversion_time
     if total_auto_score_time > 0:
         time_costs['auto_score_time'] = total_auto_score_time
     # Update pipeline total time to include post-processing
     if 'pipeline_total_time' in time_costs:
-        time_costs['pipeline_total_time'] += audio_conversion_time + total_auto_score_time
     # Rebuild generation_info with complete timing information
     generation_info = _build_generation_info(
@@ -456,6 +667,23 @@ def generate_with_progress(
         num_audios=len(result.audios),
     )
     yield (
         gr.skip(), gr.skip(), gr.skip(), gr.skip(), # Audio 1-4: SKIP
         gr.skip(), gr.skip(), gr.skip(), gr.skip(), # Audio 5-8: SKIP
@@ -465,12 +693,23 @@ def generate_with_progress(
         seed_value_for_ui,
         final_scores_list[0], final_scores_list[1], final_scores_list[2], final_scores_list[3],
         final_scores_list[4], final_scores_list[5], final_scores_list[6], final_scores_list[7],
-        updated_audio_codes,
-        final_codes_list[0], final_codes_list[1], final_codes_list[2], final_codes_list[3],
-        final_codes_list[4], final_codes_list[5], final_codes_list[6], final_codes_list[7],
         lm_generated_metadata,
         is_format_caption,
-        result.extra_outputs,  # extra_outputs for LRC generation
     )
@@ -652,6 +891,7 @@ def generate_lrc_handler(dit_handler, sample_idx, current_batch_index, batch_que
     This function retrieves cached generation data from batch_queue and calls
     the handler's get_lyric_timestamp method to generate LRC format lyrics.
     Args:
         dit_handler: DiT handler instance with get_lyric_timestamp method
@@ -662,19 +902,19 @@ def generate_lrc_handler(dit_handler, sample_idx, current_batch_index, batch_que
         inference_steps: Number of inference steps used in generation
     Returns:
-        LRC formatted string or error message
     """
     import torch
     if current_batch_index not in batch_queue:
-        return gr.skip(), gr.skip()
     batch_data = batch_queue[current_batch_index]
     extra_outputs = batch_data.get("extra_outputs", {})
     # Check if required data is available
     if not extra_outputs:
-        return gr.update(value=t("messages.lrc_no_extra_outputs"), visible=True), gr.update(visible=True)
     pred_latents = extra_outputs.get("pred_latents")
     encoder_hidden_states = extra_outputs.get("encoder_hidden_states")
@@ -683,7 +923,7 @@ def generate_lrc_handler(dit_handler, sample_idx, current_batch_index, batch_que
     lyric_token_idss = extra_outputs.get("lyric_token_idss")
     if any(x is None for x in [pred_latents, encoder_hidden_states, encoder_attention_mask, context_latents, lyric_token_idss]):
-        return gr.update(value=t("messages.lrc_missing_tensors"), visible=True), gr.update(visible=True)
     # Adjust sample_idx to 0-based
     sample_idx_0based = sample_idx - 1
@@ -691,7 +931,7 @@ def generate_lrc_handler(dit_handler, sample_idx, current_batch_index, batch_que
     # Check if sample exists in batch
     batch_size = pred_latents.shape[0]
     if sample_idx_0based >= batch_size:
-        return gr.update(value=t("messages.lrc_sample_not_exist"), visible=True), gr.update(visible=True)
     # Extract the specific sample's data
     try:
@@ -729,15 +969,72 @@ def generate_lrc_handler(dit_handler, sample_idx, current_batch_index, batch_que
         if result.get("success"):
             lrc_text = result.get("lrc_text", "")
             if not lrc_text:
-                return gr.update(value=t("messages.lrc_empty_result"), visible=True), gr.update(visible=True)
-            return gr.update(value=lrc_text, visible=True), gr.update(visible=True)
         else:
             error_msg = result.get("error", "Unknown error")
-            return gr.update(value=f"❌ {error_msg}", visible=True), gr.update(visible=True)
     except Exception as e:
         logger.exception("[generate_lrc_handler] Error generating LRC")
-        return gr.update(value=f"❌ Error: {str(e)}", visible=True), gr.update(visible=True)
 def capture_current_params(
@@ -749,7 +1046,7 @@ def capture_current_params(
     use_adg, cfg_interval_start, cfg_interval_end, shift, audio_format, lm_temperature,
     think_checkbox, lm_cfg_scale, lm_top_k, lm_top_p, lm_negative_prompt,
     use_cot_metas, use_cot_caption, use_cot_language,
-    constrained_decoding_debug, allow_lm_batch, auto_score, score_scale, lm_batch_chunk_size,
     track_name, complete_track_classes
 ):
     """Capture current UI parameters for next batch generation
@@ -796,6 +1093,7 @@ def capture_current_params(
         "constrained_decoding_debug": constrained_decoding_debug,
         "allow_lm_batch": allow_lm_batch,
         "auto_score": auto_score,
         "score_scale": score_scale,
         "lm_batch_chunk_size": lm_batch_chunk_size,
         "track_name": track_name,
@@ -816,6 +1114,7 @@ def generate_with_batch_management(
     constrained_decoding_debug,
     allow_lm_batch,
     auto_score,
     score_scale,
     lm_batch_chunk_size,
     track_name,
@@ -844,6 +1143,7 @@ def generate_with_batch_management(
         constrained_decoding_debug,
         allow_lm_batch,
         auto_score,
         score_scale,
         lm_batch_chunk_size,
         progress
@@ -853,8 +1153,8 @@ def generate_with_batch_management(
         final_result_from_inner = partial_result
         # current_batch_index, total_batches, batch_queue, next_params,
         # batch_indicator_text, prev_btn, next_btn, next_status, restore_btn
-        # Slice off extra_outputs (last item) before re-yielding to UI
-        ui_result = partial_result[:-1] if len(partial_result) > 31 else partial_result
         yield ui_result + (
             gr.skip(), gr.skip(), gr.skip(), gr.skip(),
             gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip()
@@ -863,8 +1163,8 @@ def generate_with_batch_management(
     all_audio_paths = result[8]
     if all_audio_paths is None:
-        # Slice off extra_outputs before yielding to UI
-        ui_result = result[:-1] if len(result) > 31 else result
         yield ui_result + (
             gr.skip(), gr.skip(), gr.skip(), gr.skip(),
             gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip()
@@ -872,14 +1172,18 @@ def generate_with_batch_management(
         return
     # Extract results from generation (使用 result 下标访问)
-    # New indices after removing 6 align_* items (was 12-17, now shifted down by 6)
     generation_info = result[9]
     seed_value_for_ui = result[11]
-    lm_generated_metadata = result[29]  # was 35, now 29
-    # Extract codes
-    generated_codes_single = result[20]  # was 26, now 20
-    generated_codes_batch = [result[21], result[22], result[23], result[24], result[25], result[26], result[27], result[28]]  # was 27-34, now 21-28
     # Determine which codes to store based on mode
     if allow_lm_batch and batch_size_input >= 2:
@@ -926,6 +1230,7 @@ def generate_with_batch_management(
         "constrained_decoding_debug": constrained_decoding_debug,
         "allow_lm_batch": allow_lm_batch,
         "auto_score": auto_score,
         "score_scale": score_scale,
         "lm_batch_chunk_size": lm_batch_chunk_size,
         "track_name": track_name,
@@ -938,8 +1243,9 @@ def generate_with_batch_management(
     next_params["text2music_audio_code_string"] = ""
     next_params["random_seed_checkbox"] = True
-    # Extract extra_outputs from result tuple (index 31)
-    extra_outputs_from_result = result[31] if len(result) > 31 else {}
     # Store current batch in queue
     batch_queue = store_batch_in_queue(
@@ -957,6 +1263,13 @@ def generate_with_batch_management(
         status="completed"
     )
     # Update batch counters
     total_batches = max(total_batches, current_batch_index + 1)
@@ -973,8 +1286,14 @@ def generate_with_batch_management(
     # 4. Yield final result (includes Batch UI updates)
     # The result here is already a tuple structure
-    # Slice off extra_outputs (last item) before yielding to UI - it's already stored in batch_queue
-    ui_result = result[:-1] if len(result) > 31 else result
     yield ui_result + (
         current_batch_index,
         total_batches,
@@ -1086,6 +1405,7 @@ def generate_next_batch_background(
         params.setdefault("constrained_decoding_debug", False)
         params.setdefault("allow_lm_batch", True)
         params.setdefault("auto_score", False)
         params.setdefault("score_scale", 0.5)
         params.setdefault("lm_batch_chunk_size", 8)
         params.setdefault("track_name", None)
@@ -1134,6 +1454,7 @@ def generate_next_batch_background(
             constrained_decoding_debug=params.get("constrained_decoding_debug"),
             allow_lm_batch=params.get("allow_lm_batch"),
             auto_score=params.get("auto_score"),
             score_scale=params.get("score_scale"),
             lm_batch_chunk_size=params.get("lm_batch_chunk_size"),
             progress=progress
@@ -1145,15 +1466,22 @@ def generate_next_batch_background(
             final_result = partial_result
         # Extract results from final_result
-        # Indices shifted by -6 after removing align_* items
         all_audio_paths = final_result[8]  # generated_audio_batch
         generation_info = final_result[9]
         seed_value_for_ui = final_result[11]
-        lm_generated_metadata = final_result[29]  # was 35, now 29
-        # Extract codes
-        generated_codes_single = final_result[20]  # was 26, now 20
-        generated_codes_batch = [final_result[21], final_result[22], final_result[23], final_result[24], final_result[25], final_result[26], final_result[27], final_result[28]]  # was 27-34, now 21-28
         # Determine which codes to store
         batch_size = params.get("batch_size_input", 2)
@@ -1168,6 +1496,7 @@ def generate_next_batch_background(
         logger.info(f"  - allow_lm_batch: {allow_lm_batch}")
         logger.info(f"  - batch_size: {batch_size}")
         logger.info(f"  - generated_codes_single exists: {bool(generated_codes_single)}")
         if isinstance(codes_to_store, list):
             logger.info(f"  - codes_to_store: LIST with {len(codes_to_store)} items")
             for idx, code in enumerate(codes_to_store):
@@ -1176,7 +1505,6 @@ def generate_next_batch_background(
             logger.info(f"  - codes_to_store: STRING with {len(codes_to_store) if codes_to_store else 0} chars")
         # Store next batch in queue with codes, batch settings, and ALL generation params
-        # Note: extra_outputs not available for background batches (LRC not supported for auto-gen batches)
         batch_queue = store_batch_in_queue(
             batch_queue,
             next_batch_idx,
@@ -1188,7 +1516,7 @@ def generate_next_batch_background(
             batch_size=int(batch_size),
             generation_params=params,
             lm_generated_metadata=lm_generated_metadata,
-            extra_outputs=None,  # Not available for background batches
             status="completed"
         )
@@ -1229,7 +1557,7 @@ def navigate_to_previous_batch(current_batch_index, batch_queue):
     """Navigate to previous batch (Result View Only - Never touches Input UI)"""
     if current_batch_index <= 0:
         gr.Warning(t("messages.at_first_batch"))
-        return [gr.update()] * 24
     # Move to previous batch
     new_batch_index = current_batch_index - 1
@@ -1237,17 +1565,25 @@ def navigate_to_previous_batch(current_batch_index, batch_queue):
     # Load batch data from queue
     if new_batch_index not in batch_queue:
         gr.Warning(t("messages.batch_not_found", n=new_batch_index + 1))
-        return [gr.update()] * 24
     batch_data = batch_queue[new_batch_index]
     audio_paths = batch_data.get("audio_paths", [])
     generation_info_text = batch_data.get("generation_info", "")
-    # Prepare audio outputs (up to 8)
-    audio_outputs = [None] * 8
     real_audio_paths = [p for p in audio_paths if not p.lower().endswith('.json')]
-    for idx in range(min(len(real_audio_paths), 8)):
-        audio_outputs[idx] = real_audio_paths[idx]
     # Update batch indicator
     total_batches = len(batch_queue)
@@ -1260,14 +1596,52 @@ def navigate_to_previous_batch(current_batch_index, batch_queue):
     stored_scores = batch_data.get("scores", [""] * 8)
     score_displays = stored_scores if stored_scores else [""] * 8
     return (
-        audio_outputs[0], audio_outputs[1], audio_outputs[2], audio_outputs[3],
-        audio_outputs[4], audio_outputs[5], audio_outputs[6], audio_outputs[7],
         audio_paths, generation_info_text, new_batch_index, batch_indicator_text,
         gr.update(interactive=can_go_previous), gr.update(interactive=can_go_next),
         t("messages.viewing_batch", n=new_batch_index + 1),
         score_displays[0], score_displays[1], score_displays[2], score_displays[3],
         score_displays[4], score_displays[5], score_displays[6], score_displays[7],
         gr.update(interactive=True),
     )
@@ -1276,7 +1650,7 @@ def navigate_to_next_batch(autogen_enabled, current_batch_index, total_batches,
     """Navigate to next batch (Result View Only - Never touches Input UI)"""
     if current_batch_index >= total_batches - 1:
         gr.Warning(t("messages.at_last_batch"))
-        return [gr.update()] * 25
     # Move to next batch
     new_batch_index = current_batch_index + 1
@@ -1284,17 +1658,25 @@ def navigate_to_next_batch(autogen_enabled, current_batch_index, total_batches,
     # Load batch data from queue
     if new_batch_index not in batch_queue:
         gr.Warning(t("messages.batch_not_found", n=new_batch_index + 1))
-        return [gr.update()] * 25
     batch_data = batch_queue[new_batch_index]
     audio_paths = batch_data.get("audio_paths", [])
     generation_info_text = batch_data.get("generation_info", "")
-    # Prepare audio outputs (up to 8)
-    audio_outputs = [None] * 8
     real_audio_paths = [p for p in audio_paths if not p.lower().endswith('.json')]
-    for idx in range(min(len(real_audio_paths), 8)):
-        audio_outputs[idx] = real_audio_paths[idx]
     # Update batch indicator
     batch_indicator_text = update_batch_indicator(new_batch_index, total_batches)
@@ -1312,14 +1694,52 @@ def navigate_to_next_batch(autogen_enabled, current_batch_index, total_batches,
     stored_scores = batch_data.get("scores", [""] * 8)
     score_displays = stored_scores if stored_scores else [""] * 8
     return (
-        audio_outputs[0], audio_outputs[1], audio_outputs[2], audio_outputs[3],
-        audio_outputs[4], audio_outputs[5], audio_outputs[6], audio_outputs[7],
         audio_paths, generation_info_text, new_batch_index, batch_indicator_text,
         gr.update(interactive=can_go_previous), gr.update(interactive=can_go_next),
         t("messages.viewing_batch", n=new_batch_index + 1), next_batch_status_text,
         score_displays[0], score_displays[1], score_displays[2], score_displays[3],
         score_displays[4], score_displays[5], score_displays[6], score_displays[7],
         gr.update(interactive=True),
     )
@@ -1331,7 +1751,7 @@ def restore_batch_parameters(current_batch_index, batch_queue):
     """
     if current_batch_index not in batch_queue:
         gr.Warning(t("messages.no_batch_data"))
-        return [gr.update()] * 29
     batch_data = batch_queue[current_batch_index]
     params = batch_data.get("generation_params", {})
@@ -1357,27 +1777,22 @@ def restore_batch_parameters(current_batch_index, batch_queue):
     track_name = params.get("track_name", None)
     complete_track_classes = params.get("complete_track_classes", [])
-    # Extract and process codes
     stored_codes = batch_data.get("codes", "")
-    stored_allow_lm_batch = params.get("allow_lm_batch", False)
-    codes_outputs = [""] * 9  # [Main, 1-8]
     if stored_codes:
-        if stored_allow_lm_batch and isinstance(stored_codes, list):
-            # Batch mode: populate codes 1-8, main shows first
-            codes_outputs[0] = stored_codes[0] if stored_codes else ""
-            for idx in range(min(len(stored_codes), 8)):
-                codes_outputs[idx + 1] = stored_codes[idx]
         else:
-            # Single mode: populate main, clear 1-8
-            codes_outputs[0] = stored_codes if isinstance(stored_codes, str) else (stored_codes[0] if stored_codes else "")
     gr.Info(t("messages.params_restored", n=current_batch_index + 1))
     return (
-        codes_outputs[0], codes_outputs[1], codes_outputs[2], codes_outputs[3],
-        codes_outputs[4], codes_outputs[5], codes_outputs[6], codes_outputs[7],
-        codes_outputs[8], captions, lyrics, bpm, key_scale, time_signature,
         vocal_language, audio_duration, batch_size_input, inference_steps,
         lm_temperature, lm_cfg_scale, lm_top_k, lm_top_p, think_checkbox,
         use_cot_caption, use_cot_language, allow_lm_batch,

 import json
 import datetime
 import math
+import re
 import tempfile
 import shutil
 import zipfile
 import time as time_module
+from typing import Dict, Any, Optional, List
 import gradio as gr
 from loguru import logger
 from acestep.gradio_ui.i18n import t
 from acestep.audio_utils import save_audio
+def parse_lrc_to_subtitles(lrc_text: str, total_duration: Optional[float] = None) -> List[Dict[str, Any]]:
+    """
+    Parse LRC lyrics text to Gradio subtitles format.
+    LRC format: [MM:SS.ss]Lyric text or [MM:SS.ss][MM:SS.ss]Lyric text (with end time)
+    Gradio subtitles format: [{"text": str, "timestamp": [start, end]}]
+    Args:
+        lrc_text: LRC format lyrics string
+        total_duration: Total audio duration in seconds (used for last line's end time)
+    Returns:
+        List of subtitle dictionaries for Gradio Audio component
+    """
+    if not lrc_text or not lrc_text.strip():
+        return []
+    subtitles = []
+    lines = lrc_text.strip().split('\n')
+    # Regex patterns for LRC timestamps
+    # Pattern 1: [MM:SS.ss] - standard LRC with start time only
+    # Pattern 2: [MM:SS.ss][MM:SS.ss] - LRC with both start and end time
+    timestamp_pattern = r'\[(\d{2}):(\d{2})\.(\d{2})\]'
+    parsed_lines = []
+    for line in lines:
+        line = line.strip()
+        if not line:
+            continue
+        # Find all timestamps in the line
+        timestamps = re.findall(timestamp_pattern, line)
+        if not timestamps:
+            continue
+        # Remove timestamps from text to get the lyric content
+        text = re.sub(timestamp_pattern, '', line).strip()
+        if not text:
+            continue
+        # Parse first timestamp as start time
+        start_minutes, start_seconds, start_centiseconds = timestamps[0]
+        start_time = int(start_minutes) * 60 + int(start_seconds) + int(start_centiseconds) / 100.0
+        # If there's a second timestamp, use it as end time
+        end_time = None
+        if len(timestamps) >= 2:
+            end_minutes, end_seconds, end_centiseconds = timestamps[1]
+            end_time = int(end_minutes) * 60 + int(end_seconds) + int(end_centiseconds) / 100.0
+        parsed_lines.append({
+            'start': start_time,
+            'end': end_time,
+            'text': text
+        })
+    # Sort by start time
+    parsed_lines.sort(key=lambda x: x['start'])
+    # Fill in missing end times using next line's start time
+    for i, line_data in enumerate(parsed_lines):
+        if line_data['end'] is None:
+            if i + 1 < len(parsed_lines):
+                # Use next line's start time as end time
+                line_data['end'] = parsed_lines[i + 1]['start']
+            elif total_duration is not None:
+                # Use total duration for last line
+                line_data['end'] = total_duration
+            else:
+                # Default: add 5 seconds if no duration info
+                line_data['end'] = line_data['start'] + 5.0
+        subtitles.append({
+            'text': line_data['text'],
+            'timestamp': [line_data['start'], line_data['end']]
+        })
+    return subtitles
 def _build_generation_info(
     lm_metadata: Optional[Dict[str, Any]],
     time_costs: Dict[str, float],
         # Post-processing time costs
         audio_conversion_time = time_costs.get('audio_conversion_time', 0.0)
         auto_score_time = time_costs.get('auto_score_time', 0.0)
+        auto_lrc_time = time_costs.get('auto_lrc_time', 0.0)
+        if audio_conversion_time > 0 or auto_score_time > 0 or auto_lrc_time > 0:
             time_lines.append("\n**🔧 Post-processing Time:**")
             if audio_conversion_time > 0:
                 time_lines.append(f"  - Audio Conversion: {audio_conversion_time:.2f}s")
             if auto_score_time > 0:
                 time_lines.append(f"  - Auto Score: {auto_score_time:.2f}s")
+            if auto_lrc_time > 0:
+                time_lines.append(f"  - Auto LRC: {auto_lrc_time:.2f}s")
         # Pipeline total
         pipeline_total = time_costs.get('pipeline_total_time', 0.0)
     constrained_decoding_debug,
     allow_lm_batch,
     auto_score,
+    auto_lrc,
     score_scale,
     lm_batch_chunk_size,
     progress=gr.Progress(track_tqdm=True),
     # Initialize post-processing timing
     audio_conversion_start_time = time_module.time()
     total_auto_score_time = 0.0
+    total_auto_lrc_time = 0.0
+    # Initialize LRC storage for auto_lrc
+    final_lrcs_list = [""] * 8
+    final_subtitles_list = [None] * 8
     updated_audio_codes = text2music_audio_code_string if not think_checkbox else ""
     )
     if not result.success:
+        # Structure: 8 audio + batch_files + gen_info + status + seed + 8 scores + 8 codes_display + 8 accordions + 8 lrc_display + lm_meta + is_format + extra_outputs + raw_codes
+        yield (
+            (None,) * 8 +  # audio outputs
+            (None, generation_info, result.status_message, gr.skip()) +  # batch_files, gen_info, status, seed
+            (gr.skip(),) * 8 +  # scores
+            (gr.skip(),) * 8 +  # codes_display
+            (gr.skip(),) * 8 +  # details_accordion
+            (gr.skip(),) * 8 +  # lrc_display
+            (None, is_format_caption, None, None)  # lm_meta, is_format, extra_outputs, raw_codes
+        )
         return
     audios = result.audios
     progress(0.99, "Converting audio to mp3...")
+    # Clear all scores, codes, and lrc displays at the start of generation
+    # Note: Create independent gr.update objects (not references to the same object)
+    clear_scores = [gr.update(value="", visible=False) for _ in range(8)]
+    clear_codes = [gr.update(value="", visible=False) for _ in range(8)]
+    clear_lrcs = [gr.update(value="", visible=False) for _ in range(8)]
+    clear_accordions = [gr.update(visible=False) for _ in range(8)]
+    yield (
+        # Audio outputs (keep as skip, will be updated in loop)
+        gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip(),
+        None,  # all_audio_paths (clear batch files)
+        generation_info,
+        "Clearing previous results...",
+        gr.skip(),  # seed
+        # Clear scores
+        clear_scores[0], clear_scores[1], clear_scores[2], clear_scores[3],
+        clear_scores[4], clear_scores[5], clear_scores[6], clear_scores[7],
+        # Clear codes display
+        clear_codes[0], clear_codes[1], clear_codes[2], clear_codes[3],
+        clear_codes[4], clear_codes[5], clear_codes[6], clear_codes[7],
+        # Clear accordions
+        clear_accordions[0], clear_accordions[1], clear_accordions[2], clear_accordions[3],
+        clear_accordions[4], clear_accordions[5], clear_accordions[6], clear_accordions[7],
+        # Clear lrc displays
+        clear_lrcs[0], clear_lrcs[1], clear_lrcs[2], clear_lrcs[3],
+        clear_lrcs[4], clear_lrcs[5], clear_lrcs[6], clear_lrcs[7],
+        lm_generated_metadata,
+        is_format_caption,
+        None,  # extra_outputs placeholder
+        None,  # raw_codes placeholder
+    )
     for i in range(8):
         if i < len(audios):
             key = audios[i]["key"]
             code_str = audio_params.get("audio_codes", "")
             final_codes_list[i] = code_str
+            scores_ui_updates = [gr.skip() for _ in range(8)]
             score_str = "Done!"
             if auto_score:
                 auto_score_start = time_module.time()
             scores_ui_updates[i] = score_str
             final_scores_list[i] = score_str
+            # Auto LRC generation
+            if auto_lrc:
+                auto_lrc_start = time_module.time()
+                logger.info(f"[auto_lrc] Starting LRC generation for sample {i + 1}")
+                try:
+                    # Get extra_outputs for this sample
+                    pred_latents = result.extra_outputs.get("pred_latents")
+                    encoder_hidden_states = result.extra_outputs.get("encoder_hidden_states")
+                    encoder_attention_mask = result.extra_outputs.get("encoder_attention_mask")
+                    context_latents = result.extra_outputs.get("context_latents")
+                    lyric_token_idss = result.extra_outputs.get("lyric_token_idss")
+                    logger.info(f"[auto_lrc] pred_latents: {pred_latents is not None}, encoder_hidden_states: {encoder_hidden_states is not None}, encoder_attention_mask: {encoder_attention_mask is not None}, context_latents: {context_latents is not None}, lyric_token_idss: {lyric_token_idss is not None}")
+                    if all(x is not None for x in [pred_latents, encoder_hidden_states, encoder_attention_mask, context_latents, lyric_token_idss]):
+                        # Extract single sample tensors
+                        sample_pred_latent = pred_latents[i:i+1]
+                        sample_encoder_hidden_states = encoder_hidden_states[i:i+1]
+                        sample_encoder_attention_mask = encoder_attention_mask[i:i+1]
+                        sample_context_latents = context_latents[i:i+1]
+                        sample_lyric_token_ids = lyric_token_idss[i:i+1]
+                        # Calculate actual duration
+                        actual_duration = audio_duration
+                        if actual_duration is None or actual_duration <= 0:
+                            latent_length = pred_latents.shape[1]
+                            actual_duration = latent_length / 25.0  # 25 Hz latent rate
+                        lrc_result = dit_handler.get_lyric_timestamp(
+                            pred_latent=sample_pred_latent,
+                            encoder_hidden_states=sample_encoder_hidden_states,
+                            encoder_attention_mask=sample_encoder_attention_mask,
+                            context_latents=sample_context_latents,
+                            lyric_token_ids=sample_lyric_token_ids,
+                            total_duration_seconds=float(actual_duration),
+                            vocal_language=vocal_language or "en",
+                            inference_steps=int(inference_steps),
+                            seed=42,
+                        )
+                        logger.info(f"[auto_lrc] LRC result for sample {i + 1}: success={lrc_result.get('success')}")
+                        if lrc_result.get("success"):
+                            lrc_text = lrc_result.get("lrc_text", "")
+                            final_lrcs_list[i] = lrc_text
+                            logger.info(f"[auto_lrc] LRC text length for sample {i + 1}: {len(lrc_text)}")
+                            # Parse LRC to subtitles format
+                            subtitles_data = parse_lrc_to_subtitles(lrc_text, total_duration=float(actual_duration))
+                            final_subtitles_list[i] = subtitles_data
+                    else:
+                        logger.warning(f"[auto_lrc] Missing required extra_outputs for sample {i + 1}")
+                except Exception as e:
+                    logger.warning(f"[auto_lrc] Failed to generate LRC for sample {i + 1}: {e}")
+                auto_lrc_end = time_module.time()
+                total_auto_lrc_time += (auto_lrc_end - auto_lrc_start)
             status_message = f"Encoding & Ready: {i+1}/{len(audios)}"
+            current_audio_updates = [gr.skip() for _ in range(8)]
+            # Always set audio path first, subtitles will be applied via Audio component's subtitles parameter
             current_audio_updates[i] = audio_path
+            # Codes display updates (for results section)
+            codes_display_updates = [gr.skip() for _ in range(8)]
+            codes_display_updates[i] = gr.update(value=code_str, visible=bool(code_str))
+            # LRC display updates
+            lrc_display_updates = [gr.skip() for _ in range(8)]
+            has_lrc = bool(final_lrcs_list[i])
+            if auto_lrc and has_lrc:
+                lrc_display_updates[i] = gr.update(value=final_lrcs_list[i], visible=True)
+            # Details accordion updates (show if code OR lrc OR score exists)
+            details_accordion_updates = [gr.skip() for _ in range(8)]
+            has_score = bool(score_str) and score_str != "Done!"
+            has_content = bool(code_str) or has_lrc or has_score
+            details_accordion_updates[i] = gr.update(visible=has_content)
             yield (
                 current_audio_updates[0], current_audio_updates[1], current_audio_updates[2], current_audio_updates[3],
                 current_audio_updates[4], current_audio_updates[5], current_audio_updates[6], current_audio_updates[7],
                 seed_value_for_ui,
                 # Scores
                 scores_ui_updates[0], scores_ui_updates[1], scores_ui_updates[2], scores_ui_updates[3], scores_ui_updates[4], scores_ui_updates[5], scores_ui_updates[6], scores_ui_updates[7],
+                # Codes display in results section
+                codes_display_updates[0], codes_display_updates[1], codes_display_updates[2], codes_display_updates[3],
+                codes_display_updates[4], codes_display_updates[5], codes_display_updates[6], codes_display_updates[7],
+                # Details accordion visibility
+                details_accordion_updates[0], details_accordion_updates[1], details_accordion_updates[2], details_accordion_updates[3],
+                details_accordion_updates[4], details_accordion_updates[5], details_accordion_updates[6], details_accordion_updates[7],
+                # LRC display
+                lrc_display_updates[0], lrc_display_updates[1], lrc_display_updates[2], lrc_display_updates[3],
+                lrc_display_updates[4], lrc_display_updates[5], lrc_display_updates[6], lrc_display_updates[7],
                 lm_generated_metadata,
                 is_format_caption,
                 None,  # Placeholder for extra_outputs (only filled in final yield)
+                None,  # Placeholder for raw_codes_list (only filled in final yield)
             )
         else:
             # If i exceeds the generated count (e.g., batch=2, i=2..7), do not yield
         time_costs['audio_conversion_time'] = audio_conversion_time
     if total_auto_score_time > 0:
         time_costs['auto_score_time'] = total_auto_score_time
+    if total_auto_lrc_time > 0:
+        time_costs['auto_lrc_time'] = total_auto_lrc_time
     # Update pipeline total time to include post-processing
     if 'pipeline_total_time' in time_costs:
+        time_costs['pipeline_total_time'] += audio_conversion_time + total_auto_score_time + total_auto_lrc_time
     # Rebuild generation_info with complete timing information
     generation_info = _build_generation_info(
         num_audios=len(result.audios),
     )
+    # Build final codes display, LRC display, and accordion visibility updates
+    final_codes_display_updates = []
+    final_lrc_display_updates = []
+    final_accordion_updates = []
+    for i in range(8):
+        code_str = final_codes_list[i]
+        lrc_text = final_lrcs_list[i]
+        score_str = final_scores_list[i]
+        has_code = bool(code_str)
+        has_lrc = bool(lrc_text)
+        has_score = bool(score_str) and score_str != "Done!"
+        # Show accordion if code OR LRC OR score exists
+        has_content = has_code or has_lrc or has_score
+        final_codes_display_updates.append(gr.update(value=code_str, visible=has_code))
+        final_lrc_display_updates.append(gr.update(value=lrc_text, visible=has_lrc))
+        final_accordion_updates.append(gr.update(visible=has_content))
     yield (
         gr.skip(), gr.skip(), gr.skip(), gr.skip(), # Audio 1-4: SKIP
         gr.skip(), gr.skip(), gr.skip(), gr.skip(), # Audio 5-8: SKIP
         seed_value_for_ui,
         final_scores_list[0], final_scores_list[1], final_scores_list[2], final_scores_list[3],
         final_scores_list[4], final_scores_list[5], final_scores_list[6], final_scores_list[7],
+        # Codes display in results section
+        final_codes_display_updates[0], final_codes_display_updates[1], final_codes_display_updates[2], final_codes_display_updates[3],
+        final_codes_display_updates[4], final_codes_display_updates[5], final_codes_display_updates[6], final_codes_display_updates[7],
+        # Details accordion visibility
+        final_accordion_updates[0], final_accordion_updates[1], final_accordion_updates[2], final_accordion_updates[3],
+        final_accordion_updates[4], final_accordion_updates[5], final_accordion_updates[6], final_accordion_updates[7],
+        # LRC display
+        final_lrc_display_updates[0], final_lrc_display_updates[1], final_lrc_display_updates[2], final_lrc_display_updates[3],
+        final_lrc_display_updates[4], final_lrc_display_updates[5], final_lrc_display_updates[6], final_lrc_display_updates[7],
         lm_generated_metadata,
         is_format_caption,
+        {
+            **result.extra_outputs,
+            "lrcs": final_lrcs_list,
+            "subtitles": final_subtitles_list,
+        },  # extra_outputs for LRC generation (with auto_lrc results)
+        final_codes_list,  # Raw codes list for batch storage (index 47)
     )
     This function retrieves cached generation data from batch_queue and calls
     the handler's get_lyric_timestamp method to generate LRC format lyrics.
+    Audio subtitles are automatically updated via lrc_display.change() event.
     Args:
         dit_handler: DiT handler instance with get_lyric_timestamp method
         inference_steps: Number of inference steps used in generation
     Returns:
+        Tuple of (lrc_display_update, details_accordion_update, batch_queue)
     """
     import torch
     if current_batch_index not in batch_queue:
+        return gr.skip(), gr.skip(), batch_queue
     batch_data = batch_queue[current_batch_index]
     extra_outputs = batch_data.get("extra_outputs", {})
     # Check if required data is available
     if not extra_outputs:
+        return gr.update(value=t("messages.lrc_no_extra_outputs"), visible=True), gr.update(visible=True), batch_queue
     pred_latents = extra_outputs.get("pred_latents")
     encoder_hidden_states = extra_outputs.get("encoder_hidden_states")
     lyric_token_idss = extra_outputs.get("lyric_token_idss")
     if any(x is None for x in [pred_latents, encoder_hidden_states, encoder_attention_mask, context_latents, lyric_token_idss]):
+        return gr.update(value=t("messages.lrc_missing_tensors"), visible=True), gr.update(visible=True), batch_queue
     # Adjust sample_idx to 0-based
     sample_idx_0based = sample_idx - 1
     # Check if sample exists in batch
     batch_size = pred_latents.shape[0]
     if sample_idx_0based >= batch_size:
+        return gr.update(value=t("messages.lrc_sample_not_exist"), visible=True), gr.update(visible=True), batch_queue
     # Extract the specific sample's data
     try:
         if result.get("success"):
             lrc_text = result.get("lrc_text", "")
             if not lrc_text:
+                return gr.update(value=t("messages.lrc_empty_result"), visible=True), gr.update(visible=True), batch_queue
+            # Store LRC in batch_queue for later retrieval when switching batches
+            if "lrcs" not in batch_queue[current_batch_index]:
+                batch_queue[current_batch_index]["lrcs"] = [""] * 8
+            batch_queue[current_batch_index]["lrcs"][sample_idx_0based] = lrc_text
+            # Parse LRC to subtitles format for storage (audio subtitles will be updated via lrc_display.change())
+            subtitles_data = parse_lrc_to_subtitles(lrc_text, total_duration=float(audio_duration))
+            # Store subtitles in batch_queue for batch navigation
+            if "subtitles" not in batch_queue[current_batch_index]:
+                batch_queue[current_batch_index]["subtitles"] = [None] * 8
+            batch_queue[current_batch_index]["subtitles"][sample_idx_0based] = subtitles_data
+            # Return: lrc_display, details_accordion, batch_queue
+            # Audio subtitles are automatically updated via lrc_display.change() event
+            return (
+                gr.update(value=lrc_text, visible=True),
+                gr.update(visible=True),
+                batch_queue
+            )
         else:
             error_msg = result.get("error", "Unknown error")
+            return gr.update(value=f"❌ {error_msg}", visible=True), gr.update(visible=True), batch_queue
     except Exception as e:
         logger.exception("[generate_lrc_handler] Error generating LRC")
+        return gr.update(value=f"❌ Error: {str(e)}", visible=True), gr.update(visible=True), batch_queue
+def update_audio_subtitles_from_lrc(lrc_text: str, audio_component_value, audio_duration: float = None):
+    """
+    Update Audio component's subtitles based on LRC text content.
+    This function is triggered when lrc_display textbox changes.
+    It parses the LRC text and updates the corresponding Audio component's subtitles.
+    Args:
+        lrc_text: LRC format lyrics string from lrc_display textbox
+        audio_component_value: Current value of the audio component (path or dict)
+        audio_duration: Optional audio duration for calculating last line's end time
+    Returns:
+        gr.update for the Audio component with subtitles
+    """
+    # If no LRC text, skip update (don't clear subtitles to avoid flickering)
+    if not lrc_text or not lrc_text.strip():
+        return gr.skip()
+    # Get audio path from component value
+    audio_path = None
+    if audio_component_value:
+        if isinstance(audio_component_value, dict):
+            audio_path = audio_component_value.get("path") or audio_component_value.get("value")
+        else:
+            audio_path = audio_component_value
+    if not audio_path:
+        return gr.skip()
+    # Parse LRC to subtitles format
+    subtitles_data = parse_lrc_to_subtitles(lrc_text, total_duration=audio_duration)
+    # Return updated audio with subtitles
+    return gr.update(value=audio_path, subtitles=subtitles_data if subtitles_data else None)
 def capture_current_params(
     use_adg, cfg_interval_start, cfg_interval_end, shift, audio_format, lm_temperature,
     think_checkbox, lm_cfg_scale, lm_top_k, lm_top_p, lm_negative_prompt,
     use_cot_metas, use_cot_caption, use_cot_language,
+    constrained_decoding_debug, allow_lm_batch, auto_score, auto_lrc, score_scale, lm_batch_chunk_size,
     track_name, complete_track_classes
 ):
     """Capture current UI parameters for next batch generation
         "constrained_decoding_debug": constrained_decoding_debug,
         "allow_lm_batch": allow_lm_batch,
         "auto_score": auto_score,
+        "auto_lrc": auto_lrc,
         "score_scale": score_scale,
         "lm_batch_chunk_size": lm_batch_chunk_size,
         "track_name": track_name,
     constrained_decoding_debug,
     allow_lm_batch,
     auto_score,
+    auto_lrc,
     score_scale,
     lm_batch_chunk_size,
     track_name,
         constrained_decoding_debug,
         allow_lm_batch,
         auto_score,
+        auto_lrc,
         score_scale,
         lm_batch_chunk_size,
         progress
         final_result_from_inner = partial_result
         # current_batch_index, total_batches, batch_queue, next_params,
         # batch_indicator_text, prev_btn, next_btn, next_status, restore_btn
+        # Slice off extra_outputs and raw_codes_list (last 2 items) before re-yielding to UI
+        ui_result = partial_result[:-2] if len(partial_result) > 47 else (partial_result[:-1] if len(partial_result) > 46 else partial_result)
         yield ui_result + (
             gr.skip(), gr.skip(), gr.skip(), gr.skip(),
             gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip()
     all_audio_paths = result[8]
     if all_audio_paths is None:
+        # Slice off extra_outputs and raw_codes_list before yielding to UI
+        ui_result = result[:-2] if len(result) > 47 else (result[:-1] if len(result) > 46 else result)
         yield ui_result + (
             gr.skip(), gr.skip(), gr.skip(), gr.skip(),
             gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip()
         return
     # Extract results from generation (使用 result 下标访问)
+    # New structure after UI refactor (with lrc_display added):
+    # 0-7: audio_outputs, 8: all_audio_paths, 9: generation_info, 10: status, 11: seed
+    # 12-19: scores, 20-27: codes_display, 28-35: details_accordion, 36-43: lrc_display
+    # 44: lm_metadata, 45: is_format_caption, 46: extra_outputs, 47: raw_codes_list
     generation_info = result[9]
     seed_value_for_ui = result[11]
+    lm_generated_metadata = result[44]
+    # Extract raw codes list directly (index 47)
+    raw_codes_list = result[47] if len(result) > 47 else [""] * 8
+    generated_codes_batch = raw_codes_list if isinstance(raw_codes_list, list) else [""] * 8
+    generated_codes_single = generated_codes_batch[0] if generated_codes_batch else ""
     # Determine which codes to store based on mode
     if allow_lm_batch and batch_size_input >= 2:
         "constrained_decoding_debug": constrained_decoding_debug,
         "allow_lm_batch": allow_lm_batch,
         "auto_score": auto_score,
+        "auto_lrc": auto_lrc,
         "score_scale": score_scale,
         "lm_batch_chunk_size": lm_batch_chunk_size,
         "track_name": track_name,
     next_params["text2music_audio_code_string"] = ""
     next_params["random_seed_checkbox"] = True
+    # Extract extra_outputs from result tuple (index 46 after adding lrc_display)
+    # Note: index 47 is raw_codes_list which we already extracted above
+    extra_outputs_from_result = result[46] if len(result) > 46 else {}
     # Store current batch in queue
     batch_queue = store_batch_in_queue(
         status="completed"
     )
+    # Extract auto_lrc results from extra_outputs (generated in generate_with_progress)
+    if auto_lrc and extra_outputs_from_result:
+        lrcs_from_extra = extra_outputs_from_result.get("lrcs", [""] * 8)
+        subtitles_from_extra = extra_outputs_from_result.get("subtitles", [None] * 8)
+        batch_queue[current_batch_index]["lrcs"] = lrcs_from_extra
+        batch_queue[current_batch_index]["subtitles"] = subtitles_from_extra
     # Update batch counters
     total_batches = max(total_batches, current_batch_index + 1)
     # 4. Yield final result (includes Batch UI updates)
     # The result here is already a tuple structure
+    # Slice off extra_outputs and raw_codes_list (last 2 items) before yielding to UI - they're already stored in batch_queue
+    # New structure (with lrc_display):
+    # 0-7: audio_outputs, 8: all_audio_paths, 9: generation_info, 10: status, 11: seed
+    # 12-19: scores, 20-27: codes_display, 28-35: details_accordion, 36-43: lrc_display
+    # 44: lm_metadata, 45: is_format_caption, 46: extra_outputs, 47: raw_codes_list
+    # Note: Audio subtitles are already included in the intermediate yields from generate_with_progress
+    ui_result = result[:-2] if len(result) > 47 else (result[:-1] if len(result) > 46 else result)
     yield ui_result + (
         current_batch_index,
         total_batches,
         params.setdefault("constrained_decoding_debug", False)
         params.setdefault("allow_lm_batch", True)
         params.setdefault("auto_score", False)
+        params.setdefault("auto_lrc", False)
         params.setdefault("score_scale", 0.5)
         params.setdefault("lm_batch_chunk_size", 8)
         params.setdefault("track_name", None)
             constrained_decoding_debug=params.get("constrained_decoding_debug"),
             allow_lm_batch=params.get("allow_lm_batch"),
             auto_score=params.get("auto_score"),
+            auto_lrc=params.get("auto_lrc"),
             score_scale=params.get("score_scale"),
             lm_batch_chunk_size=params.get("lm_batch_chunk_size"),
             progress=progress
             final_result = partial_result
         # Extract results from final_result
+        # New structure after UI refactor (with lrc_display added):
+        # 0-7: audio_outputs, 8: all_audio_paths, 9: generation_info, 10: status, 11: seed
+        # 12-19: scores, 20-27: codes_display, 28-35: details_accordion, 36-43: lrc_display
+        # 44: lm_metadata, 45: is_format_caption, 46: extra_outputs, 47: raw_codes_list
         all_audio_paths = final_result[8]  # generated_audio_batch
         generation_info = final_result[9]
         seed_value_for_ui = final_result[11]
+        lm_generated_metadata = final_result[44]
+        # Extract raw codes list directly (index 47)
+        raw_codes_list = final_result[47] if len(final_result) > 47 else [""] * 8
+        generated_codes_batch = raw_codes_list if isinstance(raw_codes_list, list) else [""] * 8
+        generated_codes_single = generated_codes_batch[0] if generated_codes_batch else ""
+        # Extract extra_outputs for LRC generation (index 46)
+        extra_outputs_from_bg = final_result[46] if len(final_result) > 46 else None
         # Determine which codes to store
         batch_size = params.get("batch_size_input", 2)
         logger.info(f"  - allow_lm_batch: {allow_lm_batch}")
         logger.info(f"  - batch_size: {batch_size}")
         logger.info(f"  - generated_codes_single exists: {bool(generated_codes_single)}")
+        logger.info(f"  - extra_outputs_from_bg exists: {extra_outputs_from_bg is not None}")
         if isinstance(codes_to_store, list):
             logger.info(f"  - codes_to_store: LIST with {len(codes_to_store)} items")
             for idx, code in enumerate(codes_to_store):
             logger.info(f"  - codes_to_store: STRING with {len(codes_to_store) if codes_to_store else 0} chars")
         # Store next batch in queue with codes, batch settings, and ALL generation params
         batch_queue = store_batch_in_queue(
             batch_queue,
             next_batch_idx,
             batch_size=int(batch_size),
             generation_params=params,
             lm_generated_metadata=lm_generated_metadata,
+            extra_outputs=extra_outputs_from_bg,  # Now properly extracted from generation result
             status="completed"
         )
     """Navigate to previous batch (Result View Only - Never touches Input UI)"""
     if current_batch_index <= 0:
         gr.Warning(t("messages.at_first_batch"))
+        return [gr.update()] * 48  # 8 audio + 2 batch files/info + 1 index + 1 indicator + 2 btns + 1 status + 8 scores + 8 codes + 8 lrc + 8 accordions + 1 restore
     # Move to previous batch
     new_batch_index = current_batch_index - 1
     # Load batch data from queue
     if new_batch_index not in batch_queue:
         gr.Warning(t("messages.batch_not_found", n=new_batch_index + 1))
+        return [gr.update()] * 48
     batch_data = batch_queue[new_batch_index]
     audio_paths = batch_data.get("audio_paths", [])
     generation_info_text = batch_data.get("generation_info", "")
+    # Prepare audio outputs (up to 8) with subtitles
     real_audio_paths = [p for p in audio_paths if not p.lower().endswith('.json')]
+    stored_subtitles = batch_data.get("subtitles", [None] * 8)
+    audio_updates = []
+    for idx in range(8):
+        if idx < len(real_audio_paths):
+            audio_path = real_audio_paths[idx]
+            subtitles_data = stored_subtitles[idx] if idx < len(stored_subtitles) else None
+            # Use gr.update to set both value and subtitles
+            audio_updates.append(gr.update(value=audio_path, subtitles=subtitles_data))
+        else:
+            audio_updates.append(gr.update(value=None, subtitles=None))
     # Update batch indicator
     total_batches = len(batch_queue)
     stored_scores = batch_data.get("scores", [""] * 8)
     score_displays = stored_scores if stored_scores else [""] * 8
+    # Restore LRC displays from batch queue (clear if not stored)
+    stored_lrcs = batch_data.get("lrcs", [""] * 8)
+    lrc_displays = stored_lrcs if stored_lrcs else [""] * 8
+    # Restore codes display from batch queue
+    stored_codes = batch_data.get("codes", "")
+    stored_allow_lm_batch = batch_data.get("allow_lm_batch", False)
+    batch_size = batch_data.get("batch_size", 2)
+    codes_display_updates = []
+    lrc_display_updates = []
+    details_accordion_updates = []
+    for i in range(8):
+        if stored_allow_lm_batch and isinstance(stored_codes, list):
+            code_str = stored_codes[i] if i < len(stored_codes) else ""
+        else:
+            code_str = stored_codes if isinstance(stored_codes, str) and i == 0 else ""
+        lrc_str = lrc_displays[i] if i < len(lrc_displays) else ""
+        score_str = score_displays[i] if i < len(score_displays) else ""
+        has_code = bool(code_str) and i < batch_size
+        has_lrc = bool(lrc_str)
+        has_score = bool(score_str)
+        # Show accordion if any content exists
+        has_content = has_code or has_lrc or has_score
+        codes_display_updates.append(gr.update(value=code_str, visible=has_code))
+        lrc_display_updates.append(gr.update(value=lrc_str, visible=has_lrc))
+        details_accordion_updates.append(gr.update(visible=has_content))
     return (
+        audio_updates[0], audio_updates[1], audio_updates[2], audio_updates[3],
+        audio_updates[4], audio_updates[5], audio_updates[6], audio_updates[7],
         audio_paths, generation_info_text, new_batch_index, batch_indicator_text,
         gr.update(interactive=can_go_previous), gr.update(interactive=can_go_next),
         t("messages.viewing_batch", n=new_batch_index + 1),
         score_displays[0], score_displays[1], score_displays[2], score_displays[3],
         score_displays[4], score_displays[5], score_displays[6], score_displays[7],
+        codes_display_updates[0], codes_display_updates[1], codes_display_updates[2], codes_display_updates[3],
+        codes_display_updates[4], codes_display_updates[5], codes_display_updates[6], codes_display_updates[7],
+        lrc_display_updates[0], lrc_display_updates[1], lrc_display_updates[2], lrc_display_updates[3],
+        lrc_display_updates[4], lrc_display_updates[5], lrc_display_updates[6], lrc_display_updates[7],
+        details_accordion_updates[0], details_accordion_updates[1], details_accordion_updates[2], details_accordion_updates[3],
+        details_accordion_updates[4], details_accordion_updates[5], details_accordion_updates[6], details_accordion_updates[7],
         gr.update(interactive=True),
     )
     """Navigate to next batch (Result View Only - Never touches Input UI)"""
     if current_batch_index >= total_batches - 1:
         gr.Warning(t("messages.at_last_batch"))
+        return [gr.update()] * 49  # 8 audio + 2 batch files/info + 1 index + 1 indicator + 2 btns + 1 status + 1 next_status + 8 scores + 8 codes + 8 lrc + 8 accordions + 1 restore
     # Move to next batch
     new_batch_index = current_batch_index + 1
     # Load batch data from queue
     if new_batch_index not in batch_queue:
         gr.Warning(t("messages.batch_not_found", n=new_batch_index + 1))
+        return [gr.update()] * 49
     batch_data = batch_queue[new_batch_index]
     audio_paths = batch_data.get("audio_paths", [])
     generation_info_text = batch_data.get("generation_info", "")
+    # Prepare audio outputs (up to 8) with subtitles
     real_audio_paths = [p for p in audio_paths if not p.lower().endswith('.json')]
+    stored_subtitles = batch_data.get("subtitles", [None] * 8)
+    audio_updates = []
+    for idx in range(8):
+        if idx < len(real_audio_paths):
+            audio_path = real_audio_paths[idx]
+            subtitles_data = stored_subtitles[idx] if idx < len(stored_subtitles) else None
+            # Use gr.update to set both value and subtitles
+            audio_updates.append(gr.update(value=audio_path, subtitles=subtitles_data))
+        else:
+            audio_updates.append(gr.update(value=None, subtitles=None))
     # Update batch indicator
     batch_indicator_text = update_batch_indicator(new_batch_index, total_batches)
     stored_scores = batch_data.get("scores", [""] * 8)
     score_displays = stored_scores if stored_scores else [""] * 8
+    # Restore LRC displays from batch queue (clear if not stored)
+    stored_lrcs = batch_data.get("lrcs", [""] * 8)
+    lrc_displays = stored_lrcs if stored_lrcs else [""] * 8
+    # Restore codes display from batch queue
+    stored_codes = batch_data.get("codes", "")
+    stored_allow_lm_batch = batch_data.get("allow_lm_batch", False)
+    batch_size = batch_data.get("batch_size", 2)
+    codes_display_updates = []
+    lrc_display_updates = []
+    details_accordion_updates = []
+    for i in range(8):
+        if stored_allow_lm_batch and isinstance(stored_codes, list):
+            code_str = stored_codes[i] if i < len(stored_codes) else ""
+        else:
+            code_str = stored_codes if isinstance(stored_codes, str) and i == 0 else ""
+        lrc_str = lrc_displays[i] if i < len(lrc_displays) else ""
+        score_str = score_displays[i] if i < len(score_displays) else ""
+        has_code = bool(code_str) and i < batch_size
+        has_lrc = bool(lrc_str)
+        has_score = bool(score_str)
+        # Show accordion if any content exists
+        has_content = has_code or has_lrc or has_score
+        codes_display_updates.append(gr.update(value=code_str, visible=has_code))
+        lrc_display_updates.append(gr.update(value=lrc_str, visible=has_lrc))
+        details_accordion_updates.append(gr.update(visible=has_content))
     return (
+        audio_updates[0], audio_updates[1], audio_updates[2], audio_updates[3],
+        audio_updates[4], audio_updates[5], audio_updates[6], audio_updates[7],
         audio_paths, generation_info_text, new_batch_index, batch_indicator_text,
         gr.update(interactive=can_go_previous), gr.update(interactive=can_go_next),
         t("messages.viewing_batch", n=new_batch_index + 1), next_batch_status_text,
         score_displays[0], score_displays[1], score_displays[2], score_displays[3],
         score_displays[4], score_displays[5], score_displays[6], score_displays[7],
+        codes_display_updates[0], codes_display_updates[1], codes_display_updates[2], codes_display_updates[3],
+        codes_display_updates[4], codes_display_updates[5], codes_display_updates[6], codes_display_updates[7],
+        lrc_display_updates[0], lrc_display_updates[1], lrc_display_updates[2], lrc_display_updates[3],
+        lrc_display_updates[4], lrc_display_updates[5], lrc_display_updates[6], lrc_display_updates[7],
+        details_accordion_updates[0], details_accordion_updates[1], details_accordion_updates[2], details_accordion_updates[3],
+        details_accordion_updates[4], details_accordion_updates[5], details_accordion_updates[6], details_accordion_updates[7],
         gr.update(interactive=True),
     )
     """
     if current_batch_index not in batch_queue:
         gr.Warning(t("messages.no_batch_data"))
+        return [gr.update()] * 20  # Updated count: 1 codes + 19 other params
     batch_data = batch_queue[current_batch_index]
     params = batch_data.get("generation_params", {})
     track_name = params.get("track_name", None)
     complete_track_classes = params.get("complete_track_classes", [])
+    # Extract codes - only restore to single input
     stored_codes = batch_data.get("codes", "")
     if stored_codes:
+        if isinstance(stored_codes, list):
+            # Batch mode: use first codes for single input
+            codes_main = stored_codes[0] if stored_codes else ""
         else:
+            # Single mode
+            codes_main = stored_codes
+    else:
+        codes_main = ""
     gr.Info(t("messages.params_restored", n=current_batch_index + 1))
     return (
+        codes_main, captions, lyrics, bpm, key_scale, time_signature,
         vocal_language, audio_duration, batch_size_input, inference_steps,
         lm_temperature, lm_cfg_scale, lm_top_k, lm_top_p, think_checkbox,
         use_cot_caption, use_cot_language, allow_lm_batch,

acestep/gradio_ui/i18n/en.json CHANGED Viewed

@@ -140,6 +140,8 @@
     "constrained_debug_info": "Enable debug logging for constrained decoding (check to see detailed logs)",
     "auto_score_label": "Auto Score",
     "auto_score_info": "Automatically calculate quality scores for all generated audios",
     "lm_batch_chunk_label": "LM Batch Chunk Size",
     "lm_batch_chunk_info": "Max items per LM batch chunk (default: 8, limited by GPU memory)",
     "codes_strength_label": "LM Codes Strength",
@@ -163,9 +165,10 @@
     "lrc_btn": "🎵 LRC",
     "quality_score_label": "Quality Score (Sample {n})",
     "quality_score_placeholder": "Click 'Score' to calculate perplexity-based quality score",
     "lrc_label": "Lyrics Timestamps (Sample {n})",
     "lrc_placeholder": "Click 'LRC' to generate timestamps",
-    "details_accordion": "📊 Score & LRC",
     "generation_status": "Generation Status",
     "current_batch": "Current Batch",
     "batch_indicator": "Batch {current} / {total}",

     "constrained_debug_info": "Enable debug logging for constrained decoding (check to see detailed logs)",
     "auto_score_label": "Auto Score",
     "auto_score_info": "Automatically calculate quality scores for all generated audios",
+    "auto_lrc_label": "Auto LRC",
+    "auto_lrc_info": "Automatically generate LRC lyrics timestamps for all generated audios",
     "lm_batch_chunk_label": "LM Batch Chunk Size",
     "lm_batch_chunk_info": "Max items per LM batch chunk (default: 8, limited by GPU memory)",
     "codes_strength_label": "LM Codes Strength",
     "lrc_btn": "🎵 LRC",
     "quality_score_label": "Quality Score (Sample {n})",
     "quality_score_placeholder": "Click 'Score' to calculate perplexity-based quality score",
+    "codes_label": "LM Codes (Sample {n})",
     "lrc_label": "Lyrics Timestamps (Sample {n})",
     "lrc_placeholder": "Click 'LRC' to generate timestamps",
+    "details_accordion": "📊 Score & LRC & LM Codes",
     "generation_status": "Generation Status",
     "current_batch": "Current Batch",
     "batch_indicator": "Batch {current} / {total}",

acestep/gradio_ui/i18n/ja.json CHANGED Viewed

@@ -140,6 +140,8 @@
     "constrained_debug_info": "制約付きデコーディングのデバッグログを有効化(チェックすると詳細ログを表示)",
     "auto_score_label": "自動スコアリング",
     "auto_score_info": "生成されたすべてのオーディオの品質スコアを自動計算",
     "lm_batch_chunk_label": "LM バッチチャンクサイズ",
     "lm_batch_chunk_info": "LMバッチチャンクあたりの最大アイテム数(デフォルト: 8、GPUメモリによる制限)",
     "codes_strength_label": "LM コード強度",
@@ -163,9 +165,10 @@
     "lrc_btn": "🎵 LRC",
     "quality_score_label": "品質スコア(サンプル {n})",
     "quality_score_placeholder": "'スコア'をクリックしてパープレキシティベースの品質スコアを計算",
     "lrc_label": "歌詞タイムスタンプ(サンプル {n})",
     "lrc_placeholder": "'LRC'をクリックしてタイムスタンプを生成",
-    "details_accordion": "📊 スコア & LRC",
     "generation_status": "生成ステータス",
     "current_batch": "現在のバッチ",
     "batch_indicator": "バッチ {current} / {total}",

     "constrained_debug_info": "制約付きデコーディングのデバッグログを有効化(チェックすると詳細ログを表示)",
     "auto_score_label": "自動スコアリング",
     "auto_score_info": "生成されたすべてのオーディオの品質スコアを自動計算",
+    "auto_lrc_label": "自動 LRC",
+    "auto_lrc_info": "生成されたすべてのオーディオのLRC歌詞タイムスタンプを自動生成",
     "lm_batch_chunk_label": "LM バッチチャンクサイズ",
     "lm_batch_chunk_info": "LMバッチチャンクあたりの最大アイテム数(デフォルト: 8、GPUメモリによる制限)",
     "codes_strength_label": "LM コード強度",
     "lrc_btn": "🎵 LRC",
     "quality_score_label": "品質スコア(サンプル {n})",
     "quality_score_placeholder": "'スコア'をクリックしてパープレキシティベースの品質スコアを計算",
+    "codes_label": "LM コード(サンプル {n})",
     "lrc_label": "歌詞タイムスタンプ(サンプル {n})",
     "lrc_placeholder": "'LRC'をクリックしてタイムスタンプを生成",
+    "details_accordion": "📊 スコア & LRC & LM コード",
     "generation_status": "生成ステータス",
     "current_batch": "現在のバッチ",
     "batch_indicator": "バッチ {current} / {total}",

acestep/gradio_ui/i18n/zh.json CHANGED Viewed

@@ -140,6 +140,8 @@
     "constrained_debug_info": "启用约束解码的调试日志(勾选以查看详细日志)",
     "auto_score_label": "自动评分",
     "auto_score_info": "自动计算所有生成音频的质量分数",
     "lm_batch_chunk_label": "LM 批量块大小",
     "lm_batch_chunk_info": "每个LM批量块的最大项目数(默认: 8, 受GPU内存限制)",
     "codes_strength_label": "LM 代码强度",
@@ -163,9 +165,10 @@
     "lrc_btn": "🎵 LRC",
     "quality_score_label": "质量分数(样本 {n})",
     "quality_score_placeholder": "点击'评分'以计算基于困惑度的质量分数",
     "lrc_label": "歌词时间戳(样本 {n})",
     "lrc_placeholder": "点击'LRC'生成时间戳",
-    "details_accordion": "📊 评分与LRC",
     "generation_status": "生成状态",
     "current_batch": "当前批次",
     "batch_indicator": "批次 {current} / {total}",

     "constrained_debug_info": "启用约束解码的调试日志(勾选以查看详细日志)",
     "auto_score_label": "自动评分",
     "auto_score_info": "自动计算所有生成音频的质量分数",
+    "auto_lrc_label": "自动 LRC",
+    "auto_lrc_info": "自动为所有生成的音频生成LRC歌词时间戳",
     "lm_batch_chunk_label": "LM 批量块大小",
     "lm_batch_chunk_info": "每个LM批量块的最大项目数(默认: 8, 受GPU内存限制)",
     "codes_strength_label": "LM 代码强度",
     "lrc_btn": "🎵 LRC",
     "quality_score_label": "质量分数(样本 {n})",
     "quality_score_placeholder": "点击'评分'以计算基于困惑度的质量分数",
+    "codes_label": "LM 代码(样本 {n})",
     "lrc_label": "歌词时间戳(样本 {n})",
     "lrc_placeholder": "点击'LRC'生成时间戳",
+    "details_accordion": "📊 评分与LRC与LM代码",
     "generation_status": "生成状态",
     "current_batch": "当前批次",
     "batch_indicator": "批次 {current} / {total}",

acestep/gradio_ui/interfaces/generation.py CHANGED Viewed

@@ -218,10 +218,9 @@ def create_generation_section(dit_handler, llm_handler, init_params=None, langua
                                     size="sm"
                                 )
-                    # Audio Codes for text2music (dynamic display based on batch size and allow_lm_batch)
                     with gr.Accordion(t("generation.lm_codes_hints"), open=False, visible=True) as text2music_audio_codes_group:
-                        # Single codes input (default mode)
-                        with gr.Row(equal_height=True, visible=True) as codes_single_row:
                             text2music_audio_code_string = gr.Textbox(
                                 label=t("generation.lm_codes_label"),
                                 placeholder=t("generation.lm_codes_placeholder"),
@@ -235,68 +234,6 @@ def create_generation_section(dit_handler, llm_handler, init_params=None, langua
                                 size="sm",
                                 scale=1,
                             )
-                        # Multiple codes inputs (batch mode when allow_lm_batch is enabled)
-                        with gr.Row(visible=False) as codes_batch_row:
-                            with gr.Column(visible=True) as codes_col_1:
-                                text2music_audio_code_string_1 = gr.Textbox(
-                                    label=t("generation.lm_codes_sample", n=1),
-                                    placeholder="<|audio_code_...|>",
-                                    lines=4,
-                                    info=t("generation.lm_codes_sample_info", n=1),
-                                )
-                            with gr.Column(visible=True) as codes_col_2:
-                                text2music_audio_code_string_2 = gr.Textbox(
-                                    label=t("generation.lm_codes_sample", n=2),
-                                    placeholder="<|audio_code_...|>",
-                                    lines=4,
-                                    info=t("generation.lm_codes_sample_info", n=2),
-                                )
-                            with gr.Column(visible=False) as codes_col_3:
-                                text2music_audio_code_string_3 = gr.Textbox(
-                                    label=t("generation.lm_codes_sample", n=3),
-                                    placeholder="<|audio_code_...|>",
-                                    lines=4,
-                                    info=t("generation.lm_codes_sample_info", n=3),
-                                )
-                            with gr.Column(visible=False) as codes_col_4:
-                                text2music_audio_code_string_4 = gr.Textbox(
-                                    label=t("generation.lm_codes_sample", n=4),
-                                    placeholder="<|audio_code_...|>",
-                                    lines=4,
-                                    info=t("generation.lm_codes_sample_info", n=4),
-                                )
-                        # Additional row for codes 5-8
-                        with gr.Row(visible=False) as codes_batch_row_2:
-                            with gr.Column() as codes_col_5:
-                                text2music_audio_code_string_5 = gr.Textbox(
-                                    label=t("generation.lm_codes_sample", n=5),
-                                    placeholder="<|audio_code_...|>",
-                                    lines=4,
-                                    info=t("generation.lm_codes_sample_info", n=5),
-                                )
-                            with gr.Column() as codes_col_6:
-                                text2music_audio_code_string_6 = gr.Textbox(
-                                    label=t("generation.lm_codes_sample", n=6),
-                                    placeholder="<|audio_code_...|>",
-                                    lines=4,
-                                    info=t("generation.lm_codes_sample_info", n=6),
-                                )
-                            with gr.Column() as codes_col_7:
-                                text2music_audio_code_string_7 = gr.Textbox(
-                                    label=t("generation.lm_codes_sample", n=7),
-                                    placeholder="<|audio_code_...|>",
-                                    lines=4,
-                                    info=t("generation.lm_codes_sample_info", n=7),
-                                )
-                            with gr.Column() as codes_col_8:
-                                text2music_audio_code_string_8 = gr.Textbox(
-                                    label=t("generation.lm_codes_sample", n=8),
-                                    placeholder="<|audio_code_...|>",
-                                    lines=4,
-                                    info=t("generation.lm_codes_sample_info", n=8),
-                                )
                     # Repainting controls
                     with gr.Group(visible=False) as repainting_group:
@@ -541,6 +478,12 @@ def create_generation_section(dit_handler, llm_handler, init_params=None, langua
                     info=t("generation.auto_score_info"),
                     scale=1,
                 )
                 lm_batch_chunk_size = gr.Number(
                     label=t("generation.lm_batch_chunk_label"),
                     value=8,
@@ -581,27 +524,30 @@ def create_generation_section(dit_handler, llm_handler, init_params=None, langua
         # Set generate_btn to interactive if service is pre-initialized
         generate_btn_interactive = init_params.get('enable_generate', False) if service_pre_initialized else False
         with gr.Row(equal_height=True):
-            think_checkbox = gr.Checkbox(
-                label=t("generation.think_label"),
-                value=True,
-                scale=1,
-            )
-            allow_lm_batch = gr.Checkbox(
-                label=t("generation.parallel_thinking_label"),
-                value=True,
-                scale=1,
-            )
-            generate_btn = gr.Button(t("generation.generate_btn"), variant="primary", size="lg", interactive=generate_btn_interactive, scale=9)
-            autogen_checkbox = gr.Checkbox(
-                label=t("generation.autogen_label"),
-                value=True,
-                scale=1,
-            )
-            use_cot_caption = gr.Checkbox(
-                label=t("generation.caption_rewrite_label"),
-                value=True,
-                scale=1,
-            )
     return {
         "service_config_accordion": service_config_accordion,
@@ -669,25 +615,7 @@ def create_generation_section(dit_handler, llm_handler, init_params=None, langua
         "score_scale": score_scale,
         "allow_lm_batch": allow_lm_batch,
         "auto_score": auto_score,
         "lm_batch_chunk_size": lm_batch_chunk_size,
-        "codes_single_row": codes_single_row,
-        "codes_batch_row": codes_batch_row,
-        "codes_batch_row_2": codes_batch_row_2,
-        "text2music_audio_code_string_1": text2music_audio_code_string_1,
-        "text2music_audio_code_string_2": text2music_audio_code_string_2,
-        "text2music_audio_code_string_3": text2music_audio_code_string_3,
-        "text2music_audio_code_string_4": text2music_audio_code_string_4,
-        "text2music_audio_code_string_5": text2music_audio_code_string_5,
-        "text2music_audio_code_string_6": text2music_audio_code_string_6,
-        "text2music_audio_code_string_7": text2music_audio_code_string_7,
-        "text2music_audio_code_string_8": text2music_audio_code_string_8,
-        "codes_col_1": codes_col_1,
-        "codes_col_2": codes_col_2,
-        "codes_col_3": codes_col_3,
-        "codes_col_4": codes_col_4,
-        "codes_col_5": codes_col_5,
-        "codes_col_6": codes_col_6,
-        "codes_col_7": codes_col_7,
-        "codes_col_8": codes_col_8,
     }

                                     size="sm"
                                 )
+                    # Audio Codes for text2music - single input for transcription or cover task
                     with gr.Accordion(t("generation.lm_codes_hints"), open=False, visible=True) as text2music_audio_codes_group:
+                        with gr.Row(equal_height=True):
                             text2music_audio_code_string = gr.Textbox(
                                 label=t("generation.lm_codes_label"),
                                 placeholder=t("generation.lm_codes_placeholder"),
                                 size="sm",
                                 scale=1,
                             )
                     # Repainting controls
                     with gr.Group(visible=False) as repainting_group:
                     info=t("generation.auto_score_info"),
                     scale=1,
                 )
+                auto_lrc = gr.Checkbox(
+                    label=t("generation.auto_lrc_label"),
+                    value=False,
+                    info=t("generation.auto_lrc_info"),
+                    scale=1,
+                )
                 lm_batch_chunk_size = gr.Number(
                     label=t("generation.lm_batch_chunk_label"),
                     value=8,
         # Set generate_btn to interactive if service is pre-initialized
         generate_btn_interactive = init_params.get('enable_generate', False) if service_pre_initialized else False
         with gr.Row(equal_height=True):
+            with gr.Column(scale=1, variant="compact"):
+                think_checkbox = gr.Checkbox(
+                    label=t("generation.think_label"),
+                    value=True,
+                    scale=1,
+                )
+                allow_lm_batch = gr.Checkbox(
+                    label=t("generation.parallel_thinking_label"),
+                    value=True,
+                    scale=1,
+                )
+            with gr.Column(scale=18):
+                generate_btn = gr.Button(t("generation.generate_btn"), variant="primary", size="lg", interactive=generate_btn_interactive)
+            with gr.Column(scale=1, variant="compact"):
+                autogen_checkbox = gr.Checkbox(
+                    label=t("generation.autogen_label"),
+                    value=True,
+                    scale=1,
+                )
+                use_cot_caption = gr.Checkbox(
+                    label=t("generation.caption_rewrite_label"),
+                    value=True,
+                    scale=1,
+                )
     return {
         "service_config_accordion": service_config_accordion,
         "score_scale": score_scale,
         "allow_lm_batch": allow_lm_batch,
         "auto_score": auto_score,
+        "auto_lrc": auto_lrc,
         "lm_batch_chunk_size": lm_batch_chunk_size,
     }

acestep/gradio_ui/interfaces/result.py CHANGED Viewed

@@ -29,7 +29,7 @@ def create_results_section(dit_handler) -> dict:
                     label=t("results.generated_music", n=1),
                     type="filepath",
                     interactive=False,
-                    show_download_button=False
                 )
                 with gr.Row(equal_height=True):
                     send_to_src_btn_1 = gr.Button(
@@ -57,15 +57,25 @@ def create_results_section(dit_handler) -> dict:
                         scale=1
                     )
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_1:
                     score_display_1 = gr.Textbox(
                         label=t("results.quality_score_label", n=1),
                         interactive=False,
                         visible=False
                     )
                     lrc_display_1 = gr.Textbox(
                         label=t("results.lrc_label", n=1),
-                        interactive=False,
-                        lines=8,
                         visible=False
                     )
             with gr.Column(visible=True) as audio_col_2:
@@ -73,7 +83,7 @@ def create_results_section(dit_handler) -> dict:
                     label=t("results.generated_music", n=2),
                     type="filepath",
                     interactive=False,
-                    show_download_button=False
                 )
                 with gr.Row(equal_height=True):
                     send_to_src_btn_2 = gr.Button(
@@ -101,15 +111,25 @@ def create_results_section(dit_handler) -> dict:
                         scale=1
                     )
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_2:
                     score_display_2 = gr.Textbox(
                         label=t("results.quality_score_label", n=2),
                         interactive=False,
                         visible=False
                     )
                     lrc_display_2 = gr.Textbox(
                         label=t("results.lrc_label", n=2),
-                        interactive=False,
-                        lines=8,
                         visible=False
                     )
             with gr.Column(visible=False) as audio_col_3:
@@ -117,7 +137,7 @@ def create_results_section(dit_handler) -> dict:
                     label=t("results.generated_music", n=3),
                     type="filepath",
                     interactive=False,
-                    show_download_button=False
                 )
                 with gr.Row(equal_height=True):
                     send_to_src_btn_3 = gr.Button(
@@ -145,15 +165,25 @@ def create_results_section(dit_handler) -> dict:
                         scale=1
                     )
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_3:
                     score_display_3 = gr.Textbox(
                         label=t("results.quality_score_label", n=3),
                         interactive=False,
                         visible=False
                     )
                     lrc_display_3 = gr.Textbox(
                         label=t("results.lrc_label", n=3),
-                        interactive=False,
-                        lines=8,
                         visible=False
                     )
             with gr.Column(visible=False) as audio_col_4:
@@ -161,7 +191,7 @@ def create_results_section(dit_handler) -> dict:
                     label=t("results.generated_music", n=4),
                     type="filepath",
                     interactive=False,
-                    show_download_button=False
                 )
                 with gr.Row(equal_height=True):
                     send_to_src_btn_4 = gr.Button(
@@ -189,15 +219,25 @@ def create_results_section(dit_handler) -> dict:
                         scale=1
                     )
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_4:
                     score_display_4 = gr.Textbox(
                         label=t("results.quality_score_label", n=4),
                         interactive=False,
                         visible=False
                     )
                     lrc_display_4 = gr.Textbox(
                         label=t("results.lrc_label", n=4),
-                        interactive=False,
-                        lines=8,
                         visible=False
                     )
@@ -208,7 +248,7 @@ def create_results_section(dit_handler) -> dict:
                     label=t("results.generated_music", n=5),
                     type="filepath",
                     interactive=False,
-                    show_download_button=False
                 )
                 with gr.Row(equal_height=True):
                     send_to_src_btn_5 = gr.Button(t("results.send_to_src_btn"), variant="secondary", size="sm", scale=1)
@@ -216,15 +256,25 @@ def create_results_section(dit_handler) -> dict:
                     score_btn_5 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
                     lrc_btn_5 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_5:
                     score_display_5 = gr.Textbox(
                         label=t("results.quality_score_label", n=5),
                         interactive=False,
                         visible=False
                     )
                     lrc_display_5 = gr.Textbox(
                         label=t("results.lrc_label", n=5),
-                        interactive=False,
-                        lines=8,
                         visible=False
                     )
             with gr.Column() as audio_col_6:
@@ -232,7 +282,7 @@ def create_results_section(dit_handler) -> dict:
                     label=t("results.generated_music", n=6),
                     type="filepath",
                     interactive=False,
-                    show_download_button=False
                 )
                 with gr.Row(equal_height=True):
                     send_to_src_btn_6 = gr.Button(t("results.send_to_src_btn"), variant="secondary", size="sm", scale=1)
@@ -240,15 +290,25 @@ def create_results_section(dit_handler) -> dict:
                     score_btn_6 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
                     lrc_btn_6 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_6:
                     score_display_6 = gr.Textbox(
                         label=t("results.quality_score_label", n=6),
                         interactive=False,
                         visible=False
                     )
                     lrc_display_6 = gr.Textbox(
                         label=t("results.lrc_label", n=6),
-                        interactive=False,
-                        lines=8,
                         visible=False
                     )
             with gr.Column() as audio_col_7:
@@ -256,7 +316,7 @@ def create_results_section(dit_handler) -> dict:
                     label=t("results.generated_music", n=7),
                     type="filepath",
                     interactive=False,
-                    show_download_button=False
                 )
                 with gr.Row(equal_height=True):
                     send_to_src_btn_7 = gr.Button(t("results.send_to_src_btn"), variant="secondary", size="sm", scale=1)
@@ -264,15 +324,25 @@ def create_results_section(dit_handler) -> dict:
                     score_btn_7 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
                     lrc_btn_7 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_7:
                     score_display_7 = gr.Textbox(
                         label=t("results.quality_score_label", n=7),
                         interactive=False,
                         visible=False
                     )
                     lrc_display_7 = gr.Textbox(
                         label=t("results.lrc_label", n=7),
-                        interactive=False,
-                        lines=8,
                         visible=False
                     )
             with gr.Column() as audio_col_8:
@@ -280,7 +350,7 @@ def create_results_section(dit_handler) -> dict:
                     label=t("results.generated_music", n=8),
                     type="filepath",
                     interactive=False,
-                    show_download_button=False
                 )
                 with gr.Row(equal_height=True):
                     send_to_src_btn_8 = gr.Button(t("results.send_to_src_btn"), variant="secondary", size="sm", scale=1)
@@ -288,15 +358,25 @@ def create_results_section(dit_handler) -> dict:
                     score_btn_8 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
                     lrc_btn_8 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_8:
                     score_display_8 = gr.Textbox(
                         label=t("results.quality_score_label", n=8),
                         interactive=False,
                         visible=False
                     )
                     lrc_display_8 = gr.Textbox(
                         label=t("results.lrc_label", n=8),
-                        interactive=False,
-                        lines=8,
                         visible=False
                     )
@@ -410,6 +490,14 @@ def create_results_section(dit_handler) -> dict:
         "score_display_6": score_display_6,
         "score_display_7": score_display_7,
         "score_display_8": score_display_8,
         "lrc_btn_1": lrc_btn_1,
         "lrc_btn_2": lrc_btn_2,
         "lrc_btn_3": lrc_btn_3,

                     label=t("results.generated_music", n=1),
                     type="filepath",
                     interactive=False,
+                    buttons=[]
                 )
                 with gr.Row(equal_height=True):
                     send_to_src_btn_1 = gr.Button(
                         scale=1
                     )
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_1:
+                    codes_display_1 = gr.Textbox(
+                        label=t("results.codes_label", n=1),
+                        interactive=False,
+                        buttons=["copy"],
+                        max_lines=4,
+                        visible=False
+                    )
                     score_display_1 = gr.Textbox(
                         label=t("results.quality_score_label", n=1),
                         interactive=False,
+                        buttons=["copy"],
+                        max_lines=6,
                         visible=False
                     )
                     lrc_display_1 = gr.Textbox(
                         label=t("results.lrc_label", n=1),
+                        interactive=True,
+                        buttons=["copy"],
+                        max_lines=8,
                         visible=False
                     )
             with gr.Column(visible=True) as audio_col_2:
                     label=t("results.generated_music", n=2),
                     type="filepath",
                     interactive=False,
+                    buttons=[]
                 )
                 with gr.Row(equal_height=True):
                     send_to_src_btn_2 = gr.Button(
                         scale=1
                     )
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_2:
+                    codes_display_2 = gr.Textbox(
+                        label=t("results.codes_label", n=2),
+                        interactive=False,
+                        buttons=["copy"],
+                        max_lines=4,
+                        visible=False
+                    )
                     score_display_2 = gr.Textbox(
                         label=t("results.quality_score_label", n=2),
                         interactive=False,
+                        buttons=["copy"],
+                        max_lines=6,
                         visible=False
                     )
                     lrc_display_2 = gr.Textbox(
                         label=t("results.lrc_label", n=2),
+                        interactive=True,
+                        buttons=["copy"],
+                        max_lines=8,
                         visible=False
                     )
             with gr.Column(visible=False) as audio_col_3:
                     label=t("results.generated_music", n=3),
                     type="filepath",
                     interactive=False,
+                    buttons=[]
                 )
                 with gr.Row(equal_height=True):
                     send_to_src_btn_3 = gr.Button(
                         scale=1
                     )
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_3:
+                    codes_display_3 = gr.Textbox(
+                        label=t("results.codes_label", n=3),
+                        interactive=False,
+                        buttons=["copy"],
+                        max_lines=4,
+                        visible=False
+                    )
                     score_display_3 = gr.Textbox(
                         label=t("results.quality_score_label", n=3),
                         interactive=False,
+                        buttons=["copy"],
+                        max_lines=6,
                         visible=False
                     )
                     lrc_display_3 = gr.Textbox(
                         label=t("results.lrc_label", n=3),
+                        interactive=True,
+                        buttons=["copy"],
+                        max_lines=8,
                         visible=False
                     )
             with gr.Column(visible=False) as audio_col_4:
                     label=t("results.generated_music", n=4),
                     type="filepath",
                     interactive=False,
+                    buttons=[]
                 )
                 with gr.Row(equal_height=True):
                     send_to_src_btn_4 = gr.Button(
                         scale=1
                     )
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_4:
+                    codes_display_4 = gr.Textbox(
+                        label=t("results.codes_label", n=4),
+                        interactive=False,
+                        buttons=["copy"],
+                        max_lines=4,
+                        visible=False
+                    )
                     score_display_4 = gr.Textbox(
                         label=t("results.quality_score_label", n=4),
                         interactive=False,
+                        buttons=["copy"],
+                        max_lines=6,
                         visible=False
                     )
                     lrc_display_4 = gr.Textbox(
                         label=t("results.lrc_label", n=4),
+                        interactive=True,
+                        buttons=["copy"],
+                        max_lines=8,
                         visible=False
                     )
                     label=t("results.generated_music", n=5),
                     type="filepath",
                     interactive=False,
+                    buttons=[]
                 )
                 with gr.Row(equal_height=True):
                     send_to_src_btn_5 = gr.Button(t("results.send_to_src_btn"), variant="secondary", size="sm", scale=1)
                     score_btn_5 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
                     lrc_btn_5 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_5:
+                    codes_display_5 = gr.Textbox(
+                        label=t("results.codes_label", n=5),
+                        interactive=False,
+                        buttons=["copy"],
+                        max_lines=4,
+                        visible=False
+                    )
                     score_display_5 = gr.Textbox(
                         label=t("results.quality_score_label", n=5),
                         interactive=False,
+                        buttons=["copy"],
+                        max_lines=6,
                         visible=False
                     )
                     lrc_display_5 = gr.Textbox(
                         label=t("results.lrc_label", n=5),
+                        interactive=True,
+                        buttons=["copy"],
+                        max_lines=8,
                         visible=False
                     )
             with gr.Column() as audio_col_6:
                     label=t("results.generated_music", n=6),
                     type="filepath",
                     interactive=False,
+                    buttons=[]
                 )
                 with gr.Row(equal_height=True):
                     send_to_src_btn_6 = gr.Button(t("results.send_to_src_btn"), variant="secondary", size="sm", scale=1)
                     score_btn_6 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
                     lrc_btn_6 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_6:
+                    codes_display_6 = gr.Textbox(
+                        label=t("results.codes_label", n=6),
+                        interactive=False,
+                        buttons=["copy"],
+                        max_lines=4,
+                        visible=False
+                    )
                     score_display_6 = gr.Textbox(
                         label=t("results.quality_score_label", n=6),
                         interactive=False,
+                        buttons=["copy"],
+                        max_lines=6,
                         visible=False
                     )
                     lrc_display_6 = gr.Textbox(
                         label=t("results.lrc_label", n=6),
+                        interactive=True,
+                        buttons=["copy"],
+                        max_lines=8,
                         visible=False
                     )
             with gr.Column() as audio_col_7:
                     label=t("results.generated_music", n=7),
                     type="filepath",
                     interactive=False,
+                    buttons=[]
                 )
                 with gr.Row(equal_height=True):
                     send_to_src_btn_7 = gr.Button(t("results.send_to_src_btn"), variant="secondary", size="sm", scale=1)
                     score_btn_7 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
                     lrc_btn_7 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_7:
+                    codes_display_7 = gr.Textbox(
+                        label=t("results.codes_label", n=7),
+                        interactive=False,
+                        buttons=["copy"],
+                        max_lines=4,
+                        visible=False
+                    )
                     score_display_7 = gr.Textbox(
                         label=t("results.quality_score_label", n=7),
                         interactive=False,
+                        buttons=["copy"],
+                        max_lines=6,
                         visible=False
                     )
                     lrc_display_7 = gr.Textbox(
                         label=t("results.lrc_label", n=7),
+                        interactive=True,
+                        buttons=["copy"],
+                        max_lines=8,
                         visible=False
                     )
             with gr.Column() as audio_col_8:
                     label=t("results.generated_music", n=8),
                     type="filepath",
                     interactive=False,
+                    buttons=[]
                 )
                 with gr.Row(equal_height=True):
                     send_to_src_btn_8 = gr.Button(t("results.send_to_src_btn"), variant="secondary", size="sm", scale=1)
                     score_btn_8 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
                     lrc_btn_8 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
                 with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_8:
+                    codes_display_8 = gr.Textbox(
+                        label=t("results.codes_label", n=8),
+                        interactive=False,
+                        buttons=["copy"],
+                        max_lines=4,
+                        visible=False
+                    )
                     score_display_8 = gr.Textbox(
                         label=t("results.quality_score_label", n=8),
                         interactive=False,
+                        buttons=["copy"],
+                        max_lines=6,
                         visible=False
                     )
                     lrc_display_8 = gr.Textbox(
                         label=t("results.lrc_label", n=8),
+                        interactive=True,
+                        buttons=["copy"],
+                        max_lines=8,
                         visible=False
                     )
         "score_display_6": score_display_6,
         "score_display_7": score_display_7,
         "score_display_8": score_display_8,
+        "codes_display_1": codes_display_1,
+        "codes_display_2": codes_display_2,
+        "codes_display_3": codes_display_3,
+        "codes_display_4": codes_display_4,
+        "codes_display_5": codes_display_5,
+        "codes_display_6": codes_display_6,
+        "codes_display_7": codes_display_7,
+        "codes_display_8": codes_display_8,
         "lrc_btn_1": lrc_btn_1,
         "lrc_btn_2": lrc_btn_2,
         "lrc_btn_3": lrc_btn_3,

acestep/llm_inference.py CHANGED Viewed

@@ -773,7 +773,12 @@ class LLMHandler:
         cot_items = {}
         for key in ['bpm', 'caption', 'duration', 'keyscale', 'language', 'timesignature']:
             if key in metadata and metadata[key] is not None:
-                cot_items[key] = metadata[key]
         # Format as YAML (sorted keys, unicode support)
         if len(cot_items) > 0:

         cot_items = {}
         for key in ['bpm', 'caption', 'duration', 'keyscale', 'language', 'timesignature']:
             if key in metadata and metadata[key] is not None:
+                value = metadata[key]
+                if key == "timesignature" and value.endswith("/4"):
+                    value = value.split("/")[0]
+                if isinstance(value, str) and value.isdigit():
+                    value = int(value)
+                cot_items[key] = value
         # Format as YAML (sorted keys, unicode support)
         if len(cot_items) > 0: