mfuntowicz HF Staff commited on
Commit
4f644ac
·
verified ·
1 Parent(s): 548484d

Build uploaded using `kernels`.

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ build/torch29-cu130-x86_64-windows/activation/_activation_a793e44.pyd filter=lfs diff=lfs merge=lfs -text
build/torch29-cu130-x86_64-windows/activation/__init__.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ from ._ops import ops
4
+
5
+ from . import layers
6
+
7
+
8
+ def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
9
+ ops.silu_and_mul(out, x)
10
+ return out
11
+
12
+
13
+ def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
14
+ ops.mul_and_silu(out, x)
15
+ return out
16
+
17
+
18
+ def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
19
+ ops.gelu_and_mul(out, x)
20
+ return out
21
+
22
+
23
+ def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
24
+ ops.gelu_tanh_and_mul(out, x)
25
+ return out
26
+
27
+
28
+ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None:
29
+ ops.fatrelu_and_mul(out, x, threshold)
30
+ return out
31
+
32
+
33
+ def gelu(out: torch.Tensor, x: torch.Tensor) -> None:
34
+ ops.gelu(out, x)
35
+ return out
36
+
37
+ def silu(out: torch.Tensor, x: torch.Tensor) -> None:
38
+ ops.silu(out, x)
39
+ return out
40
+
41
+
42
+ def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None:
43
+ ops.gelu_tanh(out, x)
44
+ return out
45
+
46
+
47
+ def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
48
+ ops.gelu_fast(out, x)
49
+ return out
50
+
51
+
52
+ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
53
+ ops.gelu_new(out, x)
54
+ return out
55
+
56
+
57
+ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
58
+ ops.gelu_quick(out, x)
59
+ return out
60
+
61
+
62
+ __all__ = [
63
+ "silu_and_mul",
64
+ "mul_and_silu",
65
+ "gelu_and_mul",
66
+ "gelu_tanh_and_mul",
67
+ "fatrelu_and_mul",
68
+ "gelu_fast",
69
+ "gelu_new",
70
+ "gelu_quick",
71
+ "gelu_tanh",
72
+ "silu",
73
+ "gelu",
74
+ "layers",
75
+ ]
build/torch29-cu130-x86_64-windows/activation/_activation_a793e44.pyd ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8de6eeb73be0a0d63b6c8f824f90f8b2fe9b1977d27d36bdf5187603a45a7b8f
3
+ size 2230784
build/torch29-cu130-x86_64-windows/activation/_ops.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from . import _activation_a793e44
3
+ ops = torch.ops._activation_a793e44
4
+
5
+ def add_op_namespace_prefix(op_name: str):
6
+ """
7
+ Prefix op by namespace.
8
+ """
9
+ return f"_activation_a793e44::{op_name}"
build/torch29-cu130-x86_64-windows/activation/layers.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ from ._ops import ops
5
+
6
+
7
+ class SiluAndMul(nn.Module):
8
+ """An activation function for SwiGLU.
9
+
10
+ The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
11
+
12
+ Shapes:
13
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
14
+ return: (num_tokens, d) or (batch_size, seq_len, d)
15
+ """
16
+
17
+ can_torch_compile: bool = True
18
+
19
+ def forward(self, x: torch.Tensor):
20
+ d = x.shape[-1] // 2
21
+ output_shape = x.shape[:-1] + (d,)
22
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
23
+ ops.silu_and_mul(out, x)
24
+ return out
25
+
26
+ class Silu(nn.Module):
27
+ """An activation function for SiLU.
28
+
29
+ The function computes x -> silu(x).
30
+
31
+ Shapes:
32
+ x: (num_tokens, d) or (batch_size, seq_len, d)
33
+ return: (num_tokens, d) or (batch_size, seq_len, d)
34
+ """
35
+
36
+ can_torch_compile: bool = True
37
+
38
+ def forward(self, x: torch.Tensor):
39
+ out = torch.empty_like(x)
40
+ ops.silu(out, x)
41
+ return out
42
+
43
+ class Gelu(nn.Module):
44
+ """An activation function for GELU.
45
+
46
+ The function computes x -> gelu(x).
47
+
48
+ Shapes:
49
+ x: (num_tokens, d) or (batch_size, seq_len, d)
50
+ return: (num_tokens, d) or (batch_size, seq_len, d)
51
+ """
52
+
53
+ can_torch_compile: bool = True
54
+
55
+ def forward(self, x: torch.Tensor):
56
+ out = torch.empty_like(x)
57
+ ops.gelu(out, x)
58
+ return out
59
+
60
+ class GeluTanh(nn.Module):
61
+ """An activation function for GELU with `tanh` approximation.
62
+
63
+ The function computes x -> gelu_tanh(x).
64
+
65
+ Shapes:
66
+ x: (num_tokens, d) or (batch_size, seq_len, d)
67
+ return: (num_tokens, d) or (batch_size, seq_len, d)
68
+ """
69
+
70
+ can_torch_compile: bool = True
71
+
72
+ def forward(self, x: torch.Tensor):
73
+ out = torch.empty_like(x)
74
+ ops.gelu_tanh(out, x)
75
+ return out
76
+
77
+
78
+ class MulAndSilu(nn.Module):
79
+ """An activation function for SwiGLU.
80
+
81
+ The function computes x -> x[:d] * silu(x[d:]) where d = x.shape[-1] // 2.
82
+
83
+ Shapes:
84
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
85
+ return: (num_tokens, d) or (batch_size, seq_len, d)
86
+ """
87
+
88
+ can_torch_compile: bool = True
89
+
90
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
91
+ d = x.shape[-1] // 2
92
+ output_shape = x.shape[:-1] + (d,)
93
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
94
+ ops.mul_and_silu(out, x)
95
+ return out
96
+
97
+
98
+ class GeluAndMul(nn.Module):
99
+ """An activation function for GeGLU.
100
+
101
+ The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
102
+
103
+ Shapes:
104
+ x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
105
+ return: (batch_size, seq_len, d) or (num_tokens, d)
106
+ """
107
+
108
+ can_torch_compile: bool = True
109
+
110
+ def forward(self, x: torch.Tensor):
111
+ d = x.shape[-1] // 2
112
+ output_shape = x.shape[:-1] + (d,)
113
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
114
+ ops.gelu_and_mul(out, x)
115
+ return out
116
+
117
+
118
+ class GeluTanhAndMul(nn.Module):
119
+ can_torch_compile: bool = True
120
+
121
+ def forward(self, x: torch.Tensor):
122
+ d = x.shape[-1] // 2
123
+ output_shape = x.shape[:-1] + (d,)
124
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
125
+ ops.gelu_tanh_and_mul(out, x)
126
+ return out
127
+
128
+
129
+ class FatreluAndMul(nn.Module):
130
+ """An activation function for FATReLU.
131
+
132
+ The function computes x -> FATReLU(x[:d]) * x[d:] where
133
+ d = x.shape[-1] // 2.
134
+ This is used in openbmb/MiniCPM-S-1B-sft.
135
+
136
+ Shapes:
137
+ x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
138
+ return: (num_tokens, d) or (batch_size, seq_len, d)
139
+ """
140
+
141
+ can_torch_compile: bool = True
142
+
143
+ def __init__(self, threshold: float = 0.0):
144
+ super().__init__()
145
+ self.threshold = threshold
146
+
147
+ def forward(self, x: torch.Tensor):
148
+ d = x.shape[-1] // 2
149
+ output_shape = x.shape[:-1] + (d,)
150
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
151
+ ops.fatrelu_and_mul(out, x, self.threshold)
152
+ return out
153
+
154
+
155
+ class FastGELU(nn.Module):
156
+ can_torch_compile: bool = True
157
+
158
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
159
+ out = torch.empty_like(x)
160
+ ops.gelu_fast(out, x)
161
+ return out
162
+
163
+
164
+ class NewGELU(nn.Module):
165
+ can_torch_compile: bool = True
166
+
167
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
168
+ out = torch.empty_like(x)
169
+ ops.gelu_new(out, x)
170
+ return out
171
+
172
+
173
+ class QuickGELU(nn.Module):
174
+ can_torch_compile: bool = True
175
+
176
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
177
+ out = torch.empty_like(x)
178
+ ops.gelu_quick(out, x)
179
+ return out