Spaces:

ACE-Step
/

Ace-Step-v1.5

Running on A100

App Files Files Community

ChuxiJ commited on Dec 26, 2025

Commit

ae5026d

1 Parent(s): 3092911

feat: api support lm-dit

Browse files

Files changed (1) hide show

acestep/api_server.py +372 -10

acestep/api_server.py CHANGED Viewed

@@ -14,6 +14,7 @@ from __future__ import annotations
 import asyncio
 import json
 import os
 import sys
 import time
 import traceback
@@ -33,6 +34,7 @@ from pydantic import BaseModel, Field
 from starlette.datastructures import UploadFile as StarletteUploadFile
 from .handler import AceStepHandler
 JobStatus = Literal["queued", "running", "succeeded", "failed"]
@@ -42,6 +44,9 @@ class GenerateMusicRequest(BaseModel):
     caption: str = Field(default="", description="Text caption describing the music")
     lyrics: str = Field(default="", description="Lyric text")
     bpm: Optional[int] = None
     key_scale: str = ""
     time_signature: str = ""
@@ -72,6 +77,36 @@ class GenerateMusicRequest(BaseModel):
     audio_format: str = "mp3"
     use_tiled_decode: bool = True
 class CreateJobResponse(BaseModel):
     job_id: str
@@ -88,6 +123,15 @@ class JobResult(BaseModel):
     status_message: str = ""
     seed_value: str = ""
 class JobResponse(BaseModel):
     job_id: str
@@ -240,12 +284,46 @@ def create_app() -> FastAPI:
         for proxy_var in ["http_proxy", "https_proxy", "HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY"]:
             os.environ.pop(proxy_var, None)
         handler = AceStepHandler()
         init_lock = asyncio.Lock()
         app.state._initialized = False
         app.state._init_error = None
         app.state._init_lock = init_lock
         max_workers = int(os.getenv("ACESTEP_API_WORKERS", "1"))
         executor = ThreadPoolExecutor(max_workers=max_workers)
@@ -316,33 +394,305 @@ def create_app() -> FastAPI:
         async def _run_one_job(job_id: str, req: GenerateMusicRequest) -> None:
             job_store: _JobStore = app.state.job_store
             h: AceStepHandler = app.state.handler
             executor: ThreadPoolExecutor = app.state.executor
             await _ensure_initialized()
             job_store.mark_running(job_id)
             def _blocking_generate() -> Dict[str, Any]:
                 first, second, paths, gen_info, status_msg, seed_value, *_ = h.generate_music(
                     captions=req.caption,
                     lyrics=req.lyrics,
-                    bpm=req.bpm,
-                    key_scale=req.key_scale,
-                    time_signature=req.time_signature,
                     vocal_language=req.vocal_language,
                     inference_steps=req.inference_steps,
                     guidance_scale=req.guidance_scale,
                     use_random_seed=req.use_random_seed,
-                    seed=req.seed,
                     reference_audio=req.reference_audio_path,
-                    audio_duration=req.audio_duration,
                     batch_size=req.batch_size,
                     src_audio=req.src_audio_path,
-                    audio_code_string=req.audio_code_string,
                     repainting_start=req.repainting_start,
                     repainting_end=req.repainting_end,
-                    instruction=req.instruction,
-                    audio_cover_strength=req.audio_cover_strength,
-                    task_type=req.task_type,
                     use_adg=req.use_adg,
                     cfg_interval_start=req.cfg_interval_start,
                     cfg_interval_end=req.cfg_interval_end,
@@ -357,6 +707,7 @@ def create_app() -> FastAPI:
                     "generation_info": gen_info,
                     "status_message": status_msg,
                     "seed_value": seed_value,
                 }
             t0 = time.time()
@@ -428,6 +779,7 @@ def create_app() -> FastAPI:
             return GenerateMusicRequest(
                 caption=str(get("caption", "") or ""),
                 lyrics=str(get("lyrics", "") or ""),
                 bpm=_to_int(get("bpm"), None),
                 key_scale=str(get("key_scale", "") or ""),
                 time_signature=str(get("time_signature", "") or ""),
@@ -443,7 +795,7 @@ def create_app() -> FastAPI:
                 audio_code_string=str(get("audio_code_string", "") or ""),
                 repainting_start=_to_float(get("repainting_start"), 0.0) or 0.0,
                 repainting_end=_to_float(get("repainting_end"), None),
-                instruction=str(get("instruction", "Fill the audio semantic mask based on the given conditions:") or ""),
                 audio_cover_strength=_to_float(get("audio_cover_strength"), 1.0) or 1.0,
                 task_type=str(get("task_type", "text2music") or "text2music"),
                 use_adg=_to_bool(get("use_adg"), False),
@@ -451,6 +803,16 @@ def create_app() -> FastAPI:
                 cfg_interval_end=_to_float(get("cfg_interval_end"), 1.0) or 1.0,
                 audio_format=str(get("audio_format", "mp3") or "mp3"),
                 use_tiled_decode=_to_bool(get("use_tiled_decode"), True),
             )
         def _first_value(v: Any) -> Any:

 import asyncio
 import json
 import os
+import re
 import sys
 import time
 import traceback
 from starlette.datastructures import UploadFile as StarletteUploadFile
 from .handler import AceStepHandler
+from .llm_inference import LLMHandler
 JobStatus = Literal["queued", "running", "succeeded", "failed"]
     caption: str = Field(default="", description="Text caption describing the music")
     lyrics: str = Field(default="", description="Lyric text")
+    # Match feishu bot semantics: `dit` (metas only) vs `llm_dit` (metas + audio codes)
+    infer_type: Optional[Literal["dit", "llm_dit"]] = None
     bpm: Optional[int] = None
     key_scale: str = ""
     time_signature: str = ""
     audio_format: str = "mp3"
     use_tiled_decode: bool = True
+    # 5Hz LM generation (server-side, like gradio's generate_lm_hints_wrapper)
+    use_5hz_lm: bool = False
+    lm_model_path: Optional[str] = None  # e.g. "acestep-5Hz-lm-0.6B"
+    lm_backend: Literal["vllm", "pt"] = "vllm"
+    # Align defaults with `acestep/gradio_ui.py` and `feishu_bot/config.py`
+    # to improve lyric adherence in lm-dit mode.
+    lm_temperature: float = 0.85
+    lm_cfg_scale: float = 2.0
+    lm_top_k: Optional[int] = None
+    lm_top_p: Optional[float] = 0.9
+    lm_repetition_penalty: float = 1.0
+    lm_negative_prompt: str = "NO USER INPUT"
+_LM_DEFAULT_TEMPERATURE = 0.85
+_LM_DEFAULT_CFG_SCALE = 2.0
+_LM_DEFAULT_TOP_P = 0.9
+_DEFAULT_DIT_INSTRUCTION = "Fill the audio semantic mask based on the given conditions:"
+_DEFAULT_LM_INSTRUCTION = "Generate audio semantic tokens based on the given conditions:"
+def _normalize_infer_type(v: Any) -> Optional[str]:
+    s = str(v or "").strip().lower()
+    if not s:
+        return None
+    if s in {"dit", "llm_dit"}:
+        return s
+    return None
 class CreateJobResponse(BaseModel):
     job_id: str
     status_message: str = ""
     seed_value: str = ""
+    # 5Hz LM metadata (present when `use_5hz_lm=true` and server generates codes)
+    # Keep a raw-ish dict for clients that expect a `metas` object.
+    metas: Dict[str, Any] = Field(default_factory=dict)
+    bpm: Optional[int] = None
+    duration: Optional[float] = None
+    genres: Optional[str] = None
+    keyscale: Optional[str] = None
+    timesignature: Optional[str] = None
 class JobResponse(BaseModel):
     job_id: str
         for proxy_var in ["http_proxy", "https_proxy", "HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY"]:
             os.environ.pop(proxy_var, None)
+        # Ensure compilation/temp caches do not fill up small default /tmp.
+        # Triton/Inductor (and the system compiler) can create large temporary files.
+        project_root = _get_project_root()
+        cache_root = os.path.join(project_root, ".cache", "acestep")
+        tmp_root = (os.getenv("ACESTEP_TMPDIR") or os.path.join(cache_root, "tmp")).strip()
+        triton_cache_root = (os.getenv("TRITON_CACHE_DIR") or os.path.join(cache_root, "triton")).strip()
+        inductor_cache_root = (os.getenv("TORCHINDUCTOR_CACHE_DIR") or os.path.join(cache_root, "torchinductor")).strip()
+        for p in [cache_root, tmp_root, triton_cache_root, inductor_cache_root]:
+            try:
+                os.makedirs(p, exist_ok=True)
+            except Exception:
+                # Best-effort: do not block startup if directory creation fails.
+                pass
+        # Respect explicit user overrides; if ACESTEP_TMPDIR is set, it should win.
+        if os.getenv("ACESTEP_TMPDIR"):
+            os.environ["TMPDIR"] = tmp_root
+            os.environ["TEMP"] = tmp_root
+            os.environ["TMP"] = tmp_root
+        else:
+            os.environ.setdefault("TMPDIR", tmp_root)
+            os.environ.setdefault("TEMP", tmp_root)
+            os.environ.setdefault("TMP", tmp_root)
+        os.environ.setdefault("TRITON_CACHE_DIR", triton_cache_root)
+        os.environ.setdefault("TORCHINDUCTOR_CACHE_DIR", inductor_cache_root)
         handler = AceStepHandler()
+        llm_handler = LLMHandler()
         init_lock = asyncio.Lock()
         app.state._initialized = False
         app.state._init_error = None
         app.state._init_lock = init_lock
+        app.state.llm_handler = llm_handler
+        app.state._llm_initialized = False
+        app.state._llm_init_error = None
+        app.state._llm_init_lock = Lock()
         max_workers = int(os.getenv("ACESTEP_API_WORKERS", "1"))
         executor = ThreadPoolExecutor(max_workers=max_workers)
         async def _run_one_job(job_id: str, req: GenerateMusicRequest) -> None:
             job_store: _JobStore = app.state.job_store
             h: AceStepHandler = app.state.handler
+            llm: LLMHandler = app.state.llm_handler
             executor: ThreadPoolExecutor = app.state.executor
             await _ensure_initialized()
             job_store.mark_running(job_id)
             def _blocking_generate() -> Dict[str, Any]:
+                def _normalize_optional_int(v: Any) -> Optional[int]:
+                    if v is None:
+                        return None
+                    try:
+                        iv = int(v)
+                    except Exception:
+                        return None
+                    return None if iv == 0 else iv
+                def _normalize_optional_float(v: Any) -> Optional[float]:
+                    if v is None:
+                        return None
+                    try:
+                        fv = float(v)
+                    except Exception:
+                        return None
+                    # gradio treats 1.0 as disabled for top_p
+                    return None if fv >= 1.0 else fv
+                def _maybe_fill_from_metadata(current: GenerateMusicRequest, meta: Dict[str, Any]) -> tuple[Optional[int], str, str, Optional[float]]:
+                    # Fill only when user did not provide values
+                    bpm_val = current.bpm
+                    if bpm_val is None:
+                        try:
+                            m = meta.get("bpm")
+                            if m not in (None, "", "N/A"):
+                                bpm_val = int(float(m))
+                        except Exception:
+                            bpm_val = current.bpm
+                    key_scale_val = current.key_scale
+                    if not key_scale_val:
+                        m = meta.get("keyscale", meta.get("key_scale", ""))
+                        if m not in (None, "", "N/A"):
+                            key_scale_val = str(m)
+                    time_sig_val = current.time_signature
+                    if not time_sig_val:
+                        m = meta.get("timesignature", meta.get("time_signature", ""))
+                        if m not in (None, "", "N/A"):
+                            time_sig_val = str(m)
+                    dur_val = current.audio_duration
+                    if dur_val is None:
+                        m = meta.get("duration", meta.get("audio_duration"))
+                        try:
+                            if m not in (None, "", "N/A"):
+                                dur_val = float(m)
+                                if dur_val <= 0:
+                                    dur_val = None
+                                # Avoid truncating lyrical songs when LM predicts a very short duration.
+                                # (Users can still force a short duration by explicitly setting `audio_duration`.)
+                                if dur_val is not None and (current.lyrics or "").strip():
+                                    min_dur = float(os.getenv("ACESTEP_LM_MIN_DURATION_SECONDS", "30"))
+                                    if dur_val < min_dur:
+                                        dur_val = None
+                        except Exception:
+                            dur_val = current.audio_duration
+                    return bpm_val, key_scale_val, time_sig_val, dur_val
+                def _estimate_duration_from_lyrics(lyrics: str) -> Optional[float]:
+                    lyrics = (lyrics or "").strip()
+                    if not lyrics:
+                        return None
+                    # Best-effort heuristic: singing rate ~ 2.2 words/sec for English-like lyrics.
+                    # For languages without spaces, fall back to non-space char count.
+                    words = re.findall(r"[A-Za-z0-9']+", lyrics)
+                    if len(words) >= 8:
+                        words_per_sec = float(os.getenv("ACESTEP_LYRICS_WORDS_PER_SEC", "2.2"))
+                        est = len(words) / max(0.5, words_per_sec)
+                    else:
+                        non_space = len(re.sub(r"\s+", "", lyrics))
+                        chars_per_sec = float(os.getenv("ACESTEP_LYRICS_CHARS_PER_SEC", "12"))
+                        est = non_space / max(4.0, chars_per_sec)
+                    min_dur = float(os.getenv("ACESTEP_LYRICS_MIN_DURATION_SECONDS", "45"))
+                    max_dur = float(os.getenv("ACESTEP_LYRICS_MAX_DURATION_SECONDS", "180"))
+                    return float(min(max(est, min_dur), max_dur))
+                def _extract_lm_fields(meta: Dict[str, Any]) -> Dict[str, Any]:
+                    def _none_if_na(v: Any) -> Any:
+                        if v is None:
+                            return None
+                        if isinstance(v, str) and v.strip() in {"", "N/A"}:
+                            return None
+                        return v
+                    out: Dict[str, Any] = {}
+                    bpm_raw = _none_if_na(meta.get("bpm"))
+                    try:
+                        out["bpm"] = int(float(bpm_raw)) if bpm_raw is not None else None
+                    except Exception:
+                        out["bpm"] = None
+                    dur_raw = _none_if_na(meta.get("duration"))
+                    try:
+                        out["duration"] = float(dur_raw) if dur_raw is not None else None
+                    except Exception:
+                        out["duration"] = None
+                    genres_raw = _none_if_na(meta.get("genres"))
+                    out["genres"] = str(genres_raw) if genres_raw is not None else None
+                    keyscale_raw = _none_if_na(meta.get("keyscale", meta.get("key_scale")))
+                    out["keyscale"] = str(keyscale_raw) if keyscale_raw is not None else None
+                    ts_raw = _none_if_na(meta.get("timesignature", meta.get("time_signature")))
+                    out["timesignature"] = str(ts_raw) if ts_raw is not None else None
+                    return out
+                def _normalize_metas(meta: Dict[str, Any]) -> Dict[str, Any]:
+                    """Ensure a stable `metas` dict (keys always present)."""
+                    meta = meta or {}
+                    out: Dict[str, Any] = dict(meta)
+                    # Normalize key aliases
+                    if "keyscale" not in out and "key_scale" in out:
+                        out["keyscale"] = out.get("key_scale")
+                    if "timesignature" not in out and "time_signature" in out:
+                        out["timesignature"] = out.get("time_signature")
+                    # Ensure required keys exist
+                    for k in ["bpm", "duration", "genres", "keyscale", "timesignature"]:
+                        if out.get(k) in (None, ""):
+                            out[k] = "N/A"
+                    return out
+                # Optional: generate 5Hz LM codes server-side
+                audio_code_string = req.audio_code_string
+                bpm_val = req.bpm
+                key_scale_val = req.key_scale
+                time_sig_val = req.time_signature
+                audio_duration_val = req.audio_duration
+                # Infer type semantics: `dit` => metas only, `llm_dit` => metas + audio codes.
+                # Default to llm_dit only when we actually have (or will generate) codes.
+                explicit_infer = (req.infer_type or "").strip().lower() in {"dit", "llm_dit"}
+                infer_type = (req.infer_type or "").strip().lower()
+                if infer_type not in {"dit", "llm_dit"}:
+                    has_codes = bool(audio_code_string and str(audio_code_string).strip())
+                    infer_type = "llm_dit" if (req.use_5hz_lm or has_codes) else "dit"
+                # If LM-generated code hints are used, a too-strong cover strength can suppress lyric/vocal conditioning.
+                # We keep backward compatibility: only auto-adjust when user didn't override (still at default 1.0).
+                audio_cover_strength_val = float(req.audio_cover_strength)
+                lm_fields: Dict[str, Any] = {}
+                # Determine effective batch size (used for per-sample LM code diversity)
+                effective_batch_size = req.batch_size
+                if effective_batch_size is None:
+                    try:
+                        effective_batch_size = int(getattr(h, "batch_size", 1))
+                    except Exception:
+                        effective_batch_size = 1
+                effective_batch_size = max(1, int(effective_batch_size))
+                if req.use_5hz_lm and not (audio_code_string and str(audio_code_string).strip()):
+                    # Lazy init 5Hz LM once
+                    with app.state._llm_init_lock:
+                        if getattr(app.state, "_llm_initialized", False) is False and getattr(app.state, "_llm_init_error", None) is None:
+                            project_root = _get_project_root()
+                            checkpoint_dir = os.path.join(project_root, "checkpoints")
+                            lm_model_path = (req.lm_model_path or os.getenv("ACESTEP_LM_MODEL_PATH") or "acestep-5Hz-lm-0.6B").strip()
+                            backend = (req.lm_backend or os.getenv("ACESTEP_LM_BACKEND") or "vllm").strip().lower()
+                            if backend not in {"vllm", "pt"}:
+                                backend = "vllm"
+                            lm_device = os.getenv("ACESTEP_LM_DEVICE", os.getenv("ACESTEP_DEVICE", "auto"))
+                            lm_offload = _env_bool("ACESTEP_LM_OFFLOAD_TO_CPU", False)
+                            status, ok = llm.initialize(
+                                checkpoint_dir=checkpoint_dir,
+                                lm_model_path=lm_model_path,
+                                backend=backend,
+                                device=lm_device,
+                                offload_to_cpu=lm_offload,
+                                dtype=h.dtype,
+                            )
+                            if not ok:
+                                app.state._llm_init_error = status
+                            else:
+                                app.state._llm_initialized = True
+                    if getattr(app.state, "_llm_init_error", None):
+                        raise RuntimeError(f"5Hz LM init failed: {app.state._llm_init_error}")
+                    def _lm_call() -> tuple[Dict[str, Any], str, str]:
+                        return llm.generate_with_stop_condition(
+                            caption=req.caption,
+                            lyrics=req.lyrics,
+                            infer_type=infer_type,
+                            temperature=float(req.lm_temperature),
+                            cfg_scale=max(1.0, float(req.lm_cfg_scale)),
+                            negative_prompt=str(req.lm_negative_prompt or "NO USER INPUT"),
+                            top_k=_normalize_optional_int(req.lm_top_k),
+                            top_p=_normalize_optional_float(req.lm_top_p),
+                            repetition_penalty=float(req.lm_repetition_penalty),
+                        )
+                    meta, codes, status = _lm_call()
+                    if infer_type == "llm_dit":
+                        if not codes:
+                            raise RuntimeError(f"5Hz LM generation failed: {status}")
+                        # LM once per job; rely on DiT seeds for batch diversity.
+                        # For convenience, replicate the same codes across the batch.
+                        if effective_batch_size > 1:
+                            # use the same codes for all in the batch
+                            audio_code_string = [codes] * effective_batch_size
+                            # If needed in future: call LM multiple times for more diverse codes.
+                            # codes_list: list[str] = [codes]
+                            # for _ in range(effective_batch_size - 1):
+                            #     _m2, _c2, _s2 = _lm_call()
+                            #     if not _c2:
+                            #         raise RuntimeError(f"5Hz LM generation failed: {_s2}")
+                            #     codes_list.append(_c2)
+                            # audio_code_string = codes_list
+                        else:
+                            audio_code_string = codes
+                    lm_fields = {
+                        "metas": _normalize_metas(meta),
+                        **_extract_lm_fields(meta),
+                    }
+                    bpm_val, key_scale_val, time_sig_val, audio_duration_val = _maybe_fill_from_metadata(req, meta)
+                    # If user provided long lyrics but LM didn't provide a usable duration, estimate a longer duration.
+                    if infer_type == "llm_dit" and audio_duration_val is None and (req.audio_duration is None):
+                        est = _estimate_duration_from_lyrics(req.lyrics)
+                        if est is not None:
+                            audio_duration_val = est
+                    # Optional: auto-tune LM cover strength (opt-in) to avoid suppressing lyric/vocal conditioning.
+                    if infer_type == "llm_dit" and audio_cover_strength_val >= 0.999 and (req.lyrics or "").strip():
+                        tuned = os.getenv("ACESTEP_LM_COVER_STRENGTH")
+                        if tuned is not None and tuned.strip() != "":
+                            audio_cover_strength_val = float(tuned)
+                # Align behavior with feishu bot:
+                # - dit: metas only (ignore audio codes), keep text2music.
+                # - llm_dit: metas + audio codes, run in cover mode with LM instruction.
+                instruction_val = req.instruction
+                task_type_val = (req.task_type or "").strip() or "text2music"
+                if infer_type == "dit":
+                    audio_code_string = ""
+                    if task_type_val == "cover":
+                        task_type_val = "text2music"
+                    if (instruction_val or "").strip() in {"", _DEFAULT_LM_INSTRUCTION}:
+                        instruction_val = _DEFAULT_DIT_INSTRUCTION
+                if infer_type == "llm_dit":
+                    task_type_val = "cover"
+                    if (instruction_val or "").strip() in {"", _DEFAULT_DIT_INSTRUCTION}:
+                        instruction_val = _DEFAULT_LM_INSTRUCTION
+                    if not (audio_code_string and str(audio_code_string).strip()):
+                        if explicit_infer or req.use_5hz_lm:
+                            raise RuntimeError("llm_dit requires non-empty audio codes: provide 'audio_code_string' or set 'use_5hz_lm=true'.")
+                        # If not explicitly requested, fall back to dit semantics.
+                        infer_type = "dit"
+                        task_type_val = "text2music"
+                        instruction_val = _DEFAULT_DIT_INSTRUCTION
                 first, second, paths, gen_info, status_msg, seed_value, *_ = h.generate_music(
                     captions=req.caption,
                     lyrics=req.lyrics,
+                    bpm=bpm_val,
+                    key_scale=key_scale_val,
+                    time_signature=time_sig_val,
                     vocal_language=req.vocal_language,
                     inference_steps=req.inference_steps,
                     guidance_scale=req.guidance_scale,
                     use_random_seed=req.use_random_seed,
+                    seed=("-1" if (req.use_random_seed and int(req.seed) < 0) else str(req.seed)),
                     reference_audio=req.reference_audio_path,
+                    audio_duration=audio_duration_val,
                     batch_size=req.batch_size,
                     src_audio=req.src_audio_path,
+                    audio_code_string=audio_code_string,
                     repainting_start=req.repainting_start,
                     repainting_end=req.repainting_end,
+                    instruction=instruction_val,
+                    audio_cover_strength=audio_cover_strength_val,
+                    task_type=task_type_val,
                     use_adg=req.use_adg,
                     cfg_interval_start=req.cfg_interval_start,
                     cfg_interval_end=req.cfg_interval_end,
                     "generation_info": gen_info,
                     "status_message": status_msg,
                     "seed_value": seed_value,
+                    **lm_fields,
                 }
             t0 = time.time()
             return GenerateMusicRequest(
                 caption=str(get("caption", "") or ""),
                 lyrics=str(get("lyrics", "") or ""),
+                infer_type=_normalize_infer_type(get("infer_type")),
                 bpm=_to_int(get("bpm"), None),
                 key_scale=str(get("key_scale", "") or ""),
                 time_signature=str(get("time_signature", "") or ""),
                 audio_code_string=str(get("audio_code_string", "") or ""),
                 repainting_start=_to_float(get("repainting_start"), 0.0) or 0.0,
                 repainting_end=_to_float(get("repainting_end"), None),
+                instruction=str(get("instruction", _DEFAULT_DIT_INSTRUCTION) or ""),
                 audio_cover_strength=_to_float(get("audio_cover_strength"), 1.0) or 1.0,
                 task_type=str(get("task_type", "text2music") or "text2music"),
                 use_adg=_to_bool(get("use_adg"), False),
                 cfg_interval_end=_to_float(get("cfg_interval_end"), 1.0) or 1.0,
                 audio_format=str(get("audio_format", "mp3") or "mp3"),
                 use_tiled_decode=_to_bool(get("use_tiled_decode"), True),
+                use_5hz_lm=_to_bool(get("use_5hz_lm"), False),
+                lm_model_path=str(get("lm_model_path") or "").strip() or None,
+                lm_backend=str(get("lm_backend", "vllm") or "vllm"),
+                lm_temperature=_to_float(get("lm_temperature"), _LM_DEFAULT_TEMPERATURE) or _LM_DEFAULT_TEMPERATURE,
+                lm_cfg_scale=_to_float(get("lm_cfg_scale"), _LM_DEFAULT_CFG_SCALE) or _LM_DEFAULT_CFG_SCALE,
+                lm_top_k=_to_int(get("lm_top_k"), None),
+                lm_top_p=_to_float(get("lm_top_p"), _LM_DEFAULT_TOP_P),
+                lm_repetition_penalty=_to_float(get("lm_repetition_penalty"), 1.0) or 1.0,
+                lm_negative_prompt=str(get("lm_negative_prompt", "NO USER INPUT") or "NO USER INPUT"),
             )
         def _first_value(v: Any) -> Any: