Spaces:

jhj0517
/

Whisper-WebUI

Running

App Files Files Community

jhj0517 commited on Sep 22, 2024

Commit

f4c648c

1 Parent(s): 633c360

Remove duplicates parameter

Browse files

Files changed (3) hide show

app.py +1 -3
configs/default_parameters.yaml +1 -2
modules/whisper/insanely_fast_whisper_inference.py +1 -1

app.py CHANGED Viewed

@@ -113,7 +113,7 @@ class App:
                 nb_max_new_tokens = gr.Number(label="Max New Tokens", value=lambda: whisper_params["max_new_tokens"],
                                               precision=0,
                                               info="Maximum number of new tokens to generate per-chunk. If not set, the maximum will be set by the default max_length.")
-                nb_chunk_length = gr.Number(label="Chunk Length", value=lambda: whisper_params["chunk_length"],
                                             precision=0,
                                             info="The length of audio segments. If it is not None, it will overwrite the default chunk_length of the FeatureExtractor.")
                 nb_hallucination_silence_threshold = gr.Number(label="Hallucination Silence Threshold (sec)",
@@ -127,8 +127,6 @@ class App:
                                                            precision=0,
                                                            info="Number of segments to consider for the language detection.")
             with gr.Group(visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
-                nb_chunk_length_s = gr.Number(label="Chunk Lengths (sec)", value=whisper_params["chunk_length_s"],
-                                              precision=0)
                 nb_batch_size = gr.Number(label="Batch Size", value=whisper_params["batch_size"], precision=0)
         with gr.Accordion("BGM Separation", open=False):

                 nb_max_new_tokens = gr.Number(label="Max New Tokens", value=lambda: whisper_params["max_new_tokens"],
                                               precision=0,
                                               info="Maximum number of new tokens to generate per-chunk. If not set, the maximum will be set by the default max_length.")
+                nb_chunk_length = gr.Number(label="Chunk Length (s)", value=lambda: whisper_params["chunk_length"],
                                             precision=0,
                                             info="The length of audio segments. If it is not None, it will overwrite the default chunk_length of the FeatureExtractor.")
                 nb_hallucination_silence_threshold = gr.Number(label="Hallucination Silence Threshold (sec)",
                                                            precision=0,
                                                            info="Number of segments to consider for the language detection.")
             with gr.Group(visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
                 nb_batch_size = gr.Number(label="Batch Size", value=whisper_params["batch_size"], precision=0)
         with gr.Accordion("BGM Separation", open=False):

configs/default_parameters.yaml CHANGED Viewed

@@ -12,7 +12,7 @@ whisper:
   initial_prompt: null
   temperature: 0
   compression_ratio_threshold: 2.4
-  chunk_length_s: 30
   batch_size: 24
   length_penalty: 1
   repetition_penalty: 1
@@ -25,7 +25,6 @@ whisper:
   prepend_punctuations: "\"'“¿([{-"
   append_punctuations: "\"'.。,，!！?？:：”)]}、"
   max_new_tokens: null
-  chunk_length: null
   hallucination_silence_threshold: null
   hotwords: null
   language_detection_threshold: null

   initial_prompt: null
   temperature: 0
   compression_ratio_threshold: 2.4
+  chunk_length: 30
   batch_size: 24
   length_penalty: 1
   repetition_penalty: 1
   prepend_punctuations: "\"'“¿([{-"
   append_punctuations: "\"'.。,，!！?？:：”)]}、"
   max_new_tokens: null
   hallucination_silence_threshold: null
   hotwords: null
   language_detection_threshold: null

modules/whisper/insanely_fast_whisper_inference.py CHANGED Viewed

@@ -78,7 +78,7 @@ class InsanelyFastWhisperInference(WhisperBase):
             segments = self.model(
                 inputs=audio,
                 return_timestamps=True,
-                chunk_length_s=params.chunk_length_s,
                 batch_size=params.batch_size,
                 generate_kwargs={
                     "language": params.lang,

             segments = self.model(
                 inputs=audio,
                 return_timestamps=True,
+                chunk_length_s=params.chunk_length,
                 batch_size=params.batch_size,
                 generate_kwargs={
                     "language": params.lang,