File size: 1,444 Bytes
a820d32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
from transformers import PretrainedConfig


class SpearConfig(PretrainedConfig):
    model_type = "spear"

    def __init__(
        self,
        num_mel_bins: int = 128,
        pos_dim: int = 48,
        output_downsampling_factor: int = 1,
        downsampling_factor: str = "1,2,4,8,4,2,1",
        num_encoder_layers: str = "1,2,3,3,1,1,1",
        feedforward_dim: str = "1536,1536,1536,1536,1536,1536,1536",
        encoder_dim: str = "512,512,512,512,512,512,512",
        encoder_unmasked_dim: str = "256,256,256,256,256,256,256",
        cnn_module_kernel: str = "31,31,15,15,15,31,31",
        num_heads: str = "8,8,8,8,8,8,8",
        causal: bool = False,
        chunk_size: int = 8,
        left_context_frames: int = 128,
        **kwargs,
    ):
        super().__init__(**kwargs)

        self.output_downsampling_factor = output_downsampling_factor
        self.num_mel_bins = num_mel_bins
        self.pos_dim = pos_dim
        self.downsampling_factor = downsampling_factor
        self.num_encoder_layers = num_encoder_layers
        self.feedforward_dim = feedforward_dim
        self.encoder_dim = encoder_dim
        self.encoder_unmasked_dim = encoder_unmasked_dim
        self.cnn_module_kernel = cnn_module_kernel
        self.num_heads = num_heads
        
        # streaming related
        self.causal = causal
        self.chunk_size = chunk_size
        self.left_context_frames = left_context_frames