Spaces:

ACE-Step
/

Ace-Step-v1.5

Running on A100

App Files Files Community

Sayoyo commited on 23 days ago

Commit

59ce525

1 Parent(s): 8e83122

feat: huggingface_space support

Browse files

Files changed (4) hide show

acestep/acestep_v15_pipeline.py +8 -3
acestep/handler.py +79 -24
acestep/inference.py +19 -1
acestep/llm_inference.py +26 -12

acestep/acestep_v15_pipeline.py CHANGED Viewed

@@ -64,14 +64,19 @@ def create_demo(init_params=None, language='en'):
     Returns:
         Gradio Blocks instance
     """
     # Use pre-initialized handlers if available, otherwise create new ones
     if init_params and init_params.get('pre_initialized') and 'dit_handler' in init_params:
         dit_handler = init_params['dit_handler']
         llm_handler = init_params['llm_handler']
     else:
-        dit_handler = AceStepHandler()  # DiT handler
-        llm_handler = LLMHandler()      # LM handler
     dataset_handler = DatasetHandler()  # Dataset handler
     # Create Gradio interface with all handlers and initialization parameters

     Returns:
         Gradio Blocks instance
     """
+    # Get persistent storage path from init_params (for HuggingFace Space)
+    persistent_storage_path = None
+    if init_params:
+        persistent_storage_path = init_params.get('persistent_storage_path')
     # Use pre-initialized handlers if available, otherwise create new ones
     if init_params and init_params.get('pre_initialized') and 'dit_handler' in init_params:
         dit_handler = init_params['dit_handler']
         llm_handler = init_params['llm_handler']
     else:
+        dit_handler = AceStepHandler(persistent_storage_path=persistent_storage_path)
+        llm_handler = LLMHandler(persistent_storage_path=persistent_storage_path)
     dataset_handler = DatasetHandler()  # Dataset handler
     # Create Gradio interface with all handlers and initialization parameters

acestep/handler.py CHANGED Viewed

@@ -43,72 +43,121 @@ warnings.filterwarnings("ignore")
 class AceStepHandler:
     """ACE-Step Business Logic Handler"""
-    def __init__(self):
         self.model = None
         self.config = None
         self.device = "cpu"
         self.dtype = torch.float32  # Will be set based on device in initialize_service
         # VAE for audio encoding/decoding
         self.vae = None
         # Text encoder and tokenizer
         self.text_encoder = None
         self.text_tokenizer = None
         # Silence latent for initialization
         self.silence_latent = None
         # Sample rate
         self.sample_rate = 48000
         # Reward model (temporarily disabled)
         self.reward_model = None
         # Batch size
         self.batch_size = 2
         # Custom layers config
         self.custom_layers_config = {2: [6], 3: [10, 11], 4: [3], 5: [8, 9], 6: [8]}
         self.offload_to_cpu = False
         self.offload_dit_to_cpu = False
         self.current_offload_cost = 0.0
         # LoRA state
         self.lora_loaded = False
         self.use_lora = False
         self._base_decoder = None  # Backup of original decoder
     def get_available_checkpoints(self) -> str:
         """Return project root directory path"""
-        # Get project root (handler.py is in acestep/, so go up two levels to project root)
-        project_root = self._get_project_root()
-        # default checkpoints
-        checkpoint_dir = os.path.join(project_root, "checkpoints")
         if os.path.exists(checkpoint_dir):
             return [checkpoint_dir]
         else:
             return []
     def get_available_acestep_v15_models(self) -> List[str]:
         """Scan and return all model directory names starting with 'acestep-v15-'"""
-        # Get project root
-        project_root = self._get_project_root()
-        checkpoint_dir = os.path.join(project_root, "checkpoints")
         models = []
         if os.path.exists(checkpoint_dir):
-            # Scan all directories starting with 'acestep-v15-' in checkpoints folder
             for item in os.listdir(checkpoint_dir):
                 item_path = os.path.join(checkpoint_dir, item)
                 if os.path.isdir(item_path) and item.startswith("acestep-v15-"):
                     models.append(item)
-        # Sort by name
         models.sort()
         return models
     def is_flash_attention_available(self) -> bool:
         """Check if flash attention is available on the system"""
         try:
@@ -309,11 +358,17 @@ class AceStepHandler:
             # Auto-detect project root (independent of passed project_root parameter)
             actual_project_root = self._get_project_root()
-            checkpoint_dir = os.path.join(actual_project_root, "checkpoints")
             # 1. Load main model
             # config_path is relative path (e.g., "acestep-v15-turbo"), concatenate to checkpoints directory
             acestep_v15_checkpoint_path = os.path.join(checkpoint_dir, config_path)
             if os.path.exists(acestep_v15_checkpoint_path):
                 # Determine attention implementation
                 if use_flash_attention and self.is_flash_attention_available():

 class AceStepHandler:
     """ACE-Step Business Logic Handler"""
+    # HuggingFace Space environment detection
+    IS_HUGGINGFACE_SPACE = os.environ.get("SPACE_ID") is not None
+    def __init__(self, persistent_storage_path: Optional[str] = None):
         self.model = None
         self.config = None
         self.device = "cpu"
         self.dtype = torch.float32  # Will be set based on device in initialize_service
+        # HuggingFace Space persistent storage support
+        if persistent_storage_path is None and self.IS_HUGGINGFACE_SPACE:
+            persistent_storage_path = "/data"
+        self.persistent_storage_path = persistent_storage_path
         # VAE for audio encoding/decoding
         self.vae = None
         # Text encoder and tokenizer
         self.text_encoder = None
         self.text_tokenizer = None
         # Silence latent for initialization
         self.silence_latent = None
         # Sample rate
         self.sample_rate = 48000
         # Reward model (temporarily disabled)
         self.reward_model = None
         # Batch size
         self.batch_size = 2
         # Custom layers config
         self.custom_layers_config = {2: [6], 3: [10, 11], 4: [3], 5: [8, 9], 6: [8]}
         self.offload_to_cpu = False
         self.offload_dit_to_cpu = False
         self.current_offload_cost = 0.0
         # LoRA state
         self.lora_loaded = False
         self.use_lora = False
         self._base_decoder = None  # Backup of original decoder
+    def _get_checkpoint_dir(self) -> str:
+        """Get checkpoint directory, prioritizing persistent storage if available"""
+        if self.persistent_storage_path:
+            return os.path.join(self.persistent_storage_path, "checkpoints")
+        project_root = self._get_project_root()
+        return os.path.join(project_root, "checkpoints")
     def get_available_checkpoints(self) -> str:
         """Return project root directory path"""
+        checkpoint_dir = self._get_checkpoint_dir()
         if os.path.exists(checkpoint_dir):
             return [checkpoint_dir]
         else:
             return []
     def get_available_acestep_v15_models(self) -> List[str]:
         """Scan and return all model directory names starting with 'acestep-v15-'"""
+        checkpoint_dir = self._get_checkpoint_dir()
         models = []
         if os.path.exists(checkpoint_dir):
             for item in os.listdir(checkpoint_dir):
                 item_path = os.path.join(checkpoint_dir, item)
                 if os.path.isdir(item_path) and item.startswith("acestep-v15-"):
                     models.append(item)
         models.sort()
         return models
+    def _ensure_model_downloaded(self, model_name: str, checkpoint_dir: str) -> str:
+        """
+        Ensure model is downloaded from HuggingFace Hub.
+        Used for HuggingFace Space auto-download support.
+        Args:
+            model_name: Model directory name (e.g., "acestep-v15-turbo")
+            checkpoint_dir: Target checkpoint directory
+        Returns:
+            Path to the downloaded model
+        """
+        from huggingface_hub import snapshot_download
+        # Model name to HuggingFace repo ID mapping
+        MODEL_REPO_MAP = {
+            "acestep-v15-turbo": "ACE-Step/ACE-Step-v1-3.5B-turbo",
+            "acestep-v15-base": "ACE-Step/ACE-Step-v1-3.5B",
+        }
+        repo_id = MODEL_REPO_MAP.get(model_name)
+        if repo_id is None:
+            # Try using model_name as repo_id directly
+            repo_id = f"ACE-Step/{model_name}"
+        model_path = os.path.join(checkpoint_dir, model_name)
+        logger.info(f"Downloading model {repo_id} to {model_path}...")
+        try:
+            snapshot_download(
+                repo_id=repo_id,
+                local_dir=model_path,
+                local_dir_use_symlinks=False,
+            )
+            logger.info(f"Model {repo_id} downloaded successfully")
+        except Exception as e:
+            logger.error(f"Failed to download model {repo_id}: {e}")
+            raise
+        return model_path
     def is_flash_attention_available(self) -> bool:
         """Check if flash attention is available on the system"""
         try:
             # Auto-detect project root (independent of passed project_root parameter)
             actual_project_root = self._get_project_root()
+            checkpoint_dir = self._get_checkpoint_dir()
+            os.makedirs(checkpoint_dir, exist_ok=True)
             # 1. Load main model
             # config_path is relative path (e.g., "acestep-v15-turbo"), concatenate to checkpoints directory
             acestep_v15_checkpoint_path = os.path.join(checkpoint_dir, config_path)
+            # Auto-download model if not exists (HuggingFace Space support)
+            if not os.path.exists(acestep_v15_checkpoint_path):
+                acestep_v15_checkpoint_path = self._ensure_model_downloaded(config_path, checkpoint_dir)
             if os.path.exists(acestep_v15_checkpoint_path):
                 # Determine attention implementation
                 if use_flash_attention and self.is_flash_attention_available():

acestep/inference.py CHANGED Viewed

@@ -2,7 +2,7 @@
 ACE-Step Inference API Module
 This module provides a standardized inference interface for music generation,
-designed for third-party integration. It offers both a simplified API and
 backward-compatible Gradio UI support.
 """
@@ -15,6 +15,23 @@ from loguru import logger
 from acestep.audio_utils import AudioSaver, generate_uuid_from_params
 @dataclass
 class GenerationParams:
@@ -272,6 +289,7 @@ def _update_metadata_from_lm(
     return bpm, key_scale, time_signature, audio_duration, vocal_language, caption, lyrics
 def generate_music(
     dit_handler,
     llm_handler,

 ACE-Step Inference API Module
 This module provides a standardized inference interface for music generation,
+designed for third-party integration. It offers both a simplified API and
 backward-compatible Gradio UI support.
 """
 from acestep.audio_utils import AudioSaver, generate_uuid_from_params
+# HuggingFace Space environment detection
+IS_HUGGINGFACE_SPACE = os.environ.get("SPACE_ID") is not None
+def _get_spaces_gpu_decorator(duration=180):
+    """
+    Get the @spaces.GPU decorator if running in HuggingFace Space environment.
+    Returns identity decorator if not in Space environment.
+    """
+    if IS_HUGGINGFACE_SPACE:
+        try:
+            import spaces
+            return spaces.GPU(duration=duration)
+        except ImportError:
+            logger.warning("spaces package not found, GPU decorator disabled")
+            return lambda func: func
+    return lambda func: func
 @dataclass
 class GenerationParams:
     return bpm, key_scale, time_signature, audio_duration, vocal_language, caption, lyrics
+@_get_spaces_gpu_decorator(duration=180)
 def generate_music(
     dit_handler,
     llm_handler,

acestep/llm_inference.py CHANGED Viewed

@@ -26,8 +26,11 @@ class LLMHandler:
     """5Hz LM Handler for audio code generation"""
     STOP_REASONING_TAG = "</think>"
-    def __init__(self):
         """Initialize LLMHandler with default values"""
         self.llm = None
         self.llm_tokenizer = None
@@ -37,26 +40,37 @@ class LLMHandler:
         self.device = "cpu"
         self.dtype = torch.float32
         self.offload_to_cpu = False
-        # Shared constrained decoding processor (initialized once when LLM is loaded)
         self.constrained_processor: Optional[MetadataConstrainedLogitsProcessor] = None
-        # Shared HuggingFace model for perplexity calculation (when using vllm backend)
         self._hf_model_for_scoring = None
-    def get_available_5hz_lm_models(self) -> List[str]:
-        """Scan and return all model directory names starting with 'acestep-5Hz-lm-'"""
         current_file = os.path.abspath(__file__)
         project_root = os.path.dirname(os.path.dirname(current_file))
-        checkpoint_dir = os.path.join(project_root, "checkpoints")
         models = []
         if os.path.exists(checkpoint_dir):
             for item in os.listdir(checkpoint_dir):
                 item_path = os.path.join(checkpoint_dir, item)
                 if os.path.isdir(item_path) and item.startswith("acestep-5Hz-lm-"):
                     models.append(item)
         models.sort()
         return models

     """5Hz LM Handler for audio code generation"""
     STOP_REASONING_TAG = "</think>"
+    # HuggingFace Space environment detection
+    IS_HUGGINGFACE_SPACE = os.environ.get("SPACE_ID") is not None
+    def __init__(self, persistent_storage_path: Optional[str] = None):
         """Initialize LLMHandler with default values"""
         self.llm = None
         self.llm_tokenizer = None
         self.device = "cpu"
         self.dtype = torch.float32
         self.offload_to_cpu = False
+        # HuggingFace Space persistent storage support
+        if persistent_storage_path is None and self.IS_HUGGINGFACE_SPACE:
+            persistent_storage_path = "/data"
+        self.persistent_storage_path = persistent_storage_path
+        # Shared constrained decoding processor
         self.constrained_processor: Optional[MetadataConstrainedLogitsProcessor] = None
+        # Shared HuggingFace model for perplexity calculation
         self._hf_model_for_scoring = None
+    def _get_checkpoint_dir(self) -> str:
+        """Get checkpoint directory, prioritizing persistent storage"""
+        if self.persistent_storage_path:
+            return os.path.join(self.persistent_storage_path, "checkpoints")
         current_file = os.path.abspath(__file__)
         project_root = os.path.dirname(os.path.dirname(current_file))
+        return os.path.join(project_root, "checkpoints")
+    def get_available_5hz_lm_models(self) -> List[str]:
+        """Scan and return all model directory names starting with 'acestep-5Hz-lm-'"""
+        checkpoint_dir = self._get_checkpoint_dir()
         models = []
         if os.path.exists(checkpoint_dir):
             for item in os.listdir(checkpoint_dir):
                 item_path = os.path.join(checkpoint_dir, item)
                 if os.path.isdir(item_path) and item.startswith("acestep-5Hz-lm-"):
                     models.append(item)
         models.sort()
         return models