ChuxiJ commited on
Commit
288b94f
·
1 Parent(s): 96b5f27

load audio fallback

Browse files
Files changed (2) hide show
  1. acestep/audio_utils.py +26 -7
  2. acestep/handler.py +25 -7
acestep/audio_utils.py CHANGED
@@ -135,11 +135,9 @@ class AudioSaver:
135
 
136
  def _load_audio_file(self, audio_file: Union[str, Path]) -> Tuple[torch.Tensor, int]:
137
  """
138
- Load audio file using torchaudio.
139
 
140
- Note: TORCHAUDIO_USE_TORCHCODEC=0 is set at module level to disable
141
- torchcodec backend and avoid CUDA dependency issues on HuggingFace Space.
142
- This makes torchaudio use ffmpeg backend by default.
143
 
144
  Args:
145
  audio_file: Path to the audio file
@@ -149,6 +147,7 @@ class AudioSaver:
149
 
150
  Raises:
151
  FileNotFoundError: If the audio file doesn't exist
 
152
  """
153
  audio_file = str(audio_file)
154
 
@@ -156,9 +155,29 @@ class AudioSaver:
156
  if not Path(audio_file).exists():
157
  raise FileNotFoundError(f"Audio file not found: {audio_file}")
158
 
159
- # Load audio using default backend (ffmpeg, since torchcodec is disabled)
160
- audio, sr = torchaudio.load(audio_file)
161
- return audio, sr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
  def convert_audio(
164
  self,
 
135
 
136
  def _load_audio_file(self, audio_file: Union[str, Path]) -> Tuple[torch.Tensor, int]:
137
  """
138
+ Load audio file with ffmpeg backend, fallback to soundfile if failed.
139
 
140
+ This handles CUDA dependency issues with torchcodec on HuggingFace Space.
 
 
141
 
142
  Args:
143
  audio_file: Path to the audio file
 
147
 
148
  Raises:
149
  FileNotFoundError: If the audio file doesn't exist
150
+ Exception: If all methods fail to load the audio
151
  """
152
  audio_file = str(audio_file)
153
 
 
155
  if not Path(audio_file).exists():
156
  raise FileNotFoundError(f"Audio file not found: {audio_file}")
157
 
158
+ # Try torchaudio with explicit ffmpeg backend first
159
+ try:
160
+ audio, sr = torchaudio.load(audio_file, backend="ffmpeg")
161
+ return audio, sr
162
+ except Exception as e:
163
+ logger.debug(f"[AudioSaver._load_audio_file] ffmpeg backend failed: {e}, trying soundfile fallback")
164
+
165
+ # Fallback: use soundfile directly (most compatible)
166
+ try:
167
+ import soundfile as sf
168
+ audio_np, sr = sf.read(audio_file)
169
+ # soundfile returns [samples, channels] or [samples], convert to [channels, samples]
170
+ audio = torch.from_numpy(audio_np).float()
171
+ if audio.dim() == 1:
172
+ # Mono: [samples] -> [1, samples]
173
+ audio = audio.unsqueeze(0)
174
+ else:
175
+ # Stereo: [samples, channels] -> [channels, samples]
176
+ audio = audio.T
177
+ return audio, sr
178
+ except Exception as e:
179
+ logger.error(f"[AudioSaver._load_audio_file] All methods failed to load audio: {audio_file}, error: {e}")
180
+ raise
181
 
182
  def convert_audio(
183
  self,
acestep/handler.py CHANGED
@@ -1068,11 +1068,9 @@ class AceStepHandler:
1068
 
1069
  def _load_audio_file(self, audio_file) -> Tuple[torch.Tensor, int]:
1070
  """
1071
- Load audio file using torchaudio.
1072
 
1073
- Note: TORCHAUDIO_USE_TORCHCODEC=0 is set at module level to disable
1074
- torchcodec backend and avoid CUDA dependency issues on HuggingFace Space.
1075
- This makes torchaudio use ffmpeg backend by default.
1076
 
1077
  Args:
1078
  audio_file: Path to the audio file
@@ -1082,14 +1080,34 @@ class AceStepHandler:
1082
 
1083
  Raises:
1084
  FileNotFoundError: If the audio file doesn't exist
 
1085
  """
1086
  # Check if file exists first
1087
  if not os.path.exists(audio_file):
1088
  raise FileNotFoundError(f"Audio file not found: {audio_file}")
1089
 
1090
- # Load audio using default backend (ffmpeg, since torchcodec is disabled)
1091
- audio, sr = torchaudio.load(audio_file)
1092
- return audio, sr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1093
 
1094
  def _normalize_audio_to_stereo_48k(self, audio: torch.Tensor, sr: int) -> torch.Tensor:
1095
  """
 
1068
 
1069
  def _load_audio_file(self, audio_file) -> Tuple[torch.Tensor, int]:
1070
  """
1071
+ Load audio file with ffmpeg backend, fallback to soundfile if failed.
1072
 
1073
+ This handles CUDA dependency issues with torchcodec on HuggingFace Space.
 
 
1074
 
1075
  Args:
1076
  audio_file: Path to the audio file
 
1080
 
1081
  Raises:
1082
  FileNotFoundError: If the audio file doesn't exist
1083
+ Exception: If all methods fail to load the audio
1084
  """
1085
  # Check if file exists first
1086
  if not os.path.exists(audio_file):
1087
  raise FileNotFoundError(f"Audio file not found: {audio_file}")
1088
 
1089
+ # Try torchaudio with explicit ffmpeg backend first
1090
+ try:
1091
+ audio, sr = torchaudio.load(audio_file, backend="ffmpeg")
1092
+ return audio, sr
1093
+ except Exception as e:
1094
+ logger.debug(f"[_load_audio_file] ffmpeg backend failed: {e}, trying soundfile fallback")
1095
+
1096
+ # Fallback: use soundfile directly (most compatible)
1097
+ try:
1098
+ audio_np, sr = sf.read(audio_file)
1099
+ # soundfile returns [samples, channels] or [samples], convert to [channels, samples]
1100
+ audio = torch.from_numpy(audio_np).float()
1101
+ if audio.dim() == 1:
1102
+ # Mono: [samples] -> [1, samples]
1103
+ audio = audio.unsqueeze(0)
1104
+ else:
1105
+ # Stereo: [samples, channels] -> [channels, samples]
1106
+ audio = audio.T
1107
+ return audio, sr
1108
+ except Exception as e:
1109
+ logger.error(f"[_load_audio_file] All methods failed to load audio: {audio_file}, error: {e}")
1110
+ raise
1111
 
1112
  def _normalize_audio_to_stereo_48k(self, audio: torch.Tensor, sr: int) -> torch.Tensor:
1113
  """