alrivalda commited on
Commit
109c8cb
·
verified ·
1 Parent(s): 011ab85

Upload folder using huggingface_hub

Browse files
checkpoint-24806/config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SiglipForImageClassification"
4
+ ],
5
+ "dtype": "float32",
6
+ "id2label": {
7
+ "0": "Real",
8
+ "1": "Fake"
9
+ },
10
+ "initializer_factor": 1.0,
11
+ "label2id": {
12
+ "Fake": 1,
13
+ "Real": 0
14
+ },
15
+ "model_type": "siglip",
16
+ "problem_type": "single_label_classification",
17
+ "text_config": {
18
+ "attention_dropout": 0.0,
19
+ "dtype": "float32",
20
+ "hidden_act": "gelu_pytorch_tanh",
21
+ "hidden_size": 768,
22
+ "intermediate_size": 3072,
23
+ "layer_norm_eps": 1e-06,
24
+ "max_position_embeddings": 64,
25
+ "model_type": "siglip_text_model",
26
+ "num_attention_heads": 12,
27
+ "num_hidden_layers": 12,
28
+ "projection_size": 768,
29
+ "vocab_size": 256000
30
+ },
31
+ "transformers_version": "4.57.3",
32
+ "vision_config": {
33
+ "attention_dropout": 0.0,
34
+ "dtype": "float32",
35
+ "hidden_act": "gelu_pytorch_tanh",
36
+ "hidden_size": 768,
37
+ "image_size": 224,
38
+ "intermediate_size": 3072,
39
+ "layer_norm_eps": 1e-06,
40
+ "model_type": "siglip_vision_model",
41
+ "num_attention_heads": 12,
42
+ "num_channels": 3,
43
+ "num_hidden_layers": 12,
44
+ "patch_size": 16
45
+ }
46
+ }
checkpoint-24806/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6320fd74fec19de2fc50116961611110107f67d80bbc0bdfd0dddaa4540680dc
3
+ size 371567992
checkpoint-24806/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:607cdefe725e62a3fc88b3bfcf2b52df2a5a14a7f3739c67ff6184f0499a9639
3
+ size 686558987
checkpoint-24806/preprocessor_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_processor_type": "ViTImageProcessor",
12
+ "image_std": [
13
+ 0.5,
14
+ 0.5,
15
+ 0.5
16
+ ],
17
+ "processor_class": "SiglipProcessor",
18
+ "resample": 2,
19
+ "rescale_factor": 0.00392156862745098,
20
+ "size": {
21
+ "height": 224,
22
+ "width": 224
23
+ }
24
+ }
checkpoint-24806/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:718a0f3db00824213036a2c0441849791319b7d9cf189065873bb26a7020738e
3
+ size 14645
checkpoint-24806/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1da3ebf38e6de5f93a849d5bb335be74fb8ae94c32eaa092e9a693e9450f5852
3
+ size 1383
checkpoint-24806/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc7ddea0d1f2dc6ca856a813ff9b649048593d430e4f4d9f1bfc5e67e5549f7c
3
+ size 1465
checkpoint-24806/trainer_state.json ADDED
@@ -0,0 +1,397 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 24806,
3
+ "best_metric": 0.13123206794261932,
4
+ "best_model_checkpoint": "Model1-v1-Rival/checkpoint-24806",
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 24806,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.04031282754172378,
14
+ "grad_norm": 35.80685043334961,
15
+ "learning_rate": 2.011285771866183e-07,
16
+ "loss": 0.7492,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 0.08062565508344756,
21
+ "grad_norm": 27.830657958984375,
22
+ "learning_rate": 4.0266021765417167e-07,
23
+ "loss": 0.564,
24
+ "step": 1000
25
+ },
26
+ {
27
+ "epoch": 0.12093848262517133,
28
+ "grad_norm": 22.993253707885742,
29
+ "learning_rate": 6.041918581217252e-07,
30
+ "loss": 0.497,
31
+ "step": 1500
32
+ },
33
+ {
34
+ "epoch": 0.16125131016689512,
35
+ "grad_norm": 31.184377670288086,
36
+ "learning_rate": 8.057234985892785e-07,
37
+ "loss": 0.4362,
38
+ "step": 2000
39
+ },
40
+ {
41
+ "epoch": 0.2015641377086189,
42
+ "grad_norm": 31.91282844543457,
43
+ "learning_rate": 9.991937290033595e-07,
44
+ "loss": 0.41,
45
+ "step": 2500
46
+ },
47
+ {
48
+ "epoch": 0.24187696525034266,
49
+ "grad_norm": 42.18364715576172,
50
+ "learning_rate": 9.767973124300112e-07,
51
+ "loss": 0.3815,
52
+ "step": 3000
53
+ },
54
+ {
55
+ "epoch": 0.28218979279206646,
56
+ "grad_norm": 31.624061584472656,
57
+ "learning_rate": 9.544008958566627e-07,
58
+ "loss": 0.3568,
59
+ "step": 3500
60
+ },
61
+ {
62
+ "epoch": 0.32250262033379024,
63
+ "grad_norm": 36.55485534667969,
64
+ "learning_rate": 9.320044792833146e-07,
65
+ "loss": 0.341,
66
+ "step": 4000
67
+ },
68
+ {
69
+ "epoch": 0.362815447875514,
70
+ "grad_norm": 34.942264556884766,
71
+ "learning_rate": 9.096080627099664e-07,
72
+ "loss": 0.3293,
73
+ "step": 4500
74
+ },
75
+ {
76
+ "epoch": 0.4031282754172378,
77
+ "grad_norm": 30.162443161010742,
78
+ "learning_rate": 8.872116461366181e-07,
79
+ "loss": 0.3184,
80
+ "step": 5000
81
+ },
82
+ {
83
+ "epoch": 0.44344110295896155,
84
+ "grad_norm": 42.151607513427734,
85
+ "learning_rate": 8.648152295632698e-07,
86
+ "loss": 0.3149,
87
+ "step": 5500
88
+ },
89
+ {
90
+ "epoch": 0.4837539305006853,
91
+ "grad_norm": 21.629465103149414,
92
+ "learning_rate": 8.424188129899216e-07,
93
+ "loss": 0.2969,
94
+ "step": 6000
95
+ },
96
+ {
97
+ "epoch": 0.5240667580424091,
98
+ "grad_norm": 45.252803802490234,
99
+ "learning_rate": 8.200223964165734e-07,
100
+ "loss": 0.2962,
101
+ "step": 6500
102
+ },
103
+ {
104
+ "epoch": 0.5643795855841329,
105
+ "grad_norm": 31.193838119506836,
106
+ "learning_rate": 7.97625979843225e-07,
107
+ "loss": 0.2858,
108
+ "step": 7000
109
+ },
110
+ {
111
+ "epoch": 0.6046924131258566,
112
+ "grad_norm": 96.9237289428711,
113
+ "learning_rate": 7.752295632698768e-07,
114
+ "loss": 0.268,
115
+ "step": 7500
116
+ },
117
+ {
118
+ "epoch": 0.6450052406675805,
119
+ "grad_norm": 21.470144271850586,
120
+ "learning_rate": 7.528331466965285e-07,
121
+ "loss": 0.2622,
122
+ "step": 8000
123
+ },
124
+ {
125
+ "epoch": 0.6853180682093042,
126
+ "grad_norm": 35.70467758178711,
127
+ "learning_rate": 7.304367301231803e-07,
128
+ "loss": 0.2741,
129
+ "step": 8500
130
+ },
131
+ {
132
+ "epoch": 0.725630895751028,
133
+ "grad_norm": 19.73318862915039,
134
+ "learning_rate": 7.080403135498319e-07,
135
+ "loss": 0.2626,
136
+ "step": 9000
137
+ },
138
+ {
139
+ "epoch": 0.7659437232927517,
140
+ "grad_norm": 23.529918670654297,
141
+ "learning_rate": 6.856438969764838e-07,
142
+ "loss": 0.2518,
143
+ "step": 9500
144
+ },
145
+ {
146
+ "epoch": 0.8062565508344756,
147
+ "grad_norm": 41.51838302612305,
148
+ "learning_rate": 6.632474804031355e-07,
149
+ "loss": 0.2495,
150
+ "step": 10000
151
+ },
152
+ {
153
+ "epoch": 0.8465693783761993,
154
+ "grad_norm": 14.217453956604004,
155
+ "learning_rate": 6.408510638297872e-07,
156
+ "loss": 0.2437,
157
+ "step": 10500
158
+ },
159
+ {
160
+ "epoch": 0.8868822059179231,
161
+ "grad_norm": 18.972307205200195,
162
+ "learning_rate": 6.184546472564389e-07,
163
+ "loss": 0.2482,
164
+ "step": 11000
165
+ },
166
+ {
167
+ "epoch": 0.9271950334596468,
168
+ "grad_norm": 47.28855514526367,
169
+ "learning_rate": 5.960582306830907e-07,
170
+ "loss": 0.2419,
171
+ "step": 11500
172
+ },
173
+ {
174
+ "epoch": 0.9675078610013707,
175
+ "grad_norm": 48.309165954589844,
176
+ "learning_rate": 5.736618141097424e-07,
177
+ "loss": 0.2421,
178
+ "step": 12000
179
+ },
180
+ {
181
+ "epoch": 1.0,
182
+ "eval_accuracy": 0.9340860036526648,
183
+ "eval_loss": 0.17090356349945068,
184
+ "eval_model_preparation_time": 0.0026,
185
+ "eval_runtime": 109.2506,
186
+ "eval_samples_per_second": 441.087,
187
+ "eval_steps_per_second": 55.139,
188
+ "step": 12403
189
+ },
190
+ {
191
+ "epoch": 1.0078206885430945,
192
+ "grad_norm": 47.087913513183594,
193
+ "learning_rate": 5.512653975363942e-07,
194
+ "loss": 0.2372,
195
+ "step": 12500
196
+ },
197
+ {
198
+ "epoch": 1.0481335160848182,
199
+ "grad_norm": 40.675697326660156,
200
+ "learning_rate": 5.288689809630459e-07,
201
+ "loss": 0.2249,
202
+ "step": 13000
203
+ },
204
+ {
205
+ "epoch": 1.088446343626542,
206
+ "grad_norm": 46.77396774291992,
207
+ "learning_rate": 5.064725643896977e-07,
208
+ "loss": 0.2207,
209
+ "step": 13500
210
+ },
211
+ {
212
+ "epoch": 1.1287591711682659,
213
+ "grad_norm": 69.7470703125,
214
+ "learning_rate": 4.840761478163493e-07,
215
+ "loss": 0.2276,
216
+ "step": 14000
217
+ },
218
+ {
219
+ "epoch": 1.1690719987099896,
220
+ "grad_norm": 35.32638168334961,
221
+ "learning_rate": 4.6167973124300107e-07,
222
+ "loss": 0.2235,
223
+ "step": 14500
224
+ },
225
+ {
226
+ "epoch": 1.2093848262517133,
227
+ "grad_norm": 63.080291748046875,
228
+ "learning_rate": 4.3928331466965287e-07,
229
+ "loss": 0.2219,
230
+ "step": 15000
231
+ },
232
+ {
233
+ "epoch": 1.249697653793437,
234
+ "grad_norm": 45.029239654541016,
235
+ "learning_rate": 4.1688689809630457e-07,
236
+ "loss": 0.2184,
237
+ "step": 15500
238
+ },
239
+ {
240
+ "epoch": 1.2900104813351607,
241
+ "grad_norm": 20.416336059570312,
242
+ "learning_rate": 3.944904815229563e-07,
243
+ "loss": 0.2081,
244
+ "step": 16000
245
+ },
246
+ {
247
+ "epoch": 1.3303233088768847,
248
+ "grad_norm": 28.367952346801758,
249
+ "learning_rate": 3.72094064949608e-07,
250
+ "loss": 0.2157,
251
+ "step": 16500
252
+ },
253
+ {
254
+ "epoch": 1.3706361364186084,
255
+ "grad_norm": 13.548885345458984,
256
+ "learning_rate": 3.4969764837625977e-07,
257
+ "loss": 0.212,
258
+ "step": 17000
259
+ },
260
+ {
261
+ "epoch": 1.4109489639603323,
262
+ "grad_norm": 94.12673950195312,
263
+ "learning_rate": 3.273012318029115e-07,
264
+ "loss": 0.2075,
265
+ "step": 17500
266
+ },
267
+ {
268
+ "epoch": 1.451261791502056,
269
+ "grad_norm": 38.12338638305664,
270
+ "learning_rate": 3.049048152295632e-07,
271
+ "loss": 0.2148,
272
+ "step": 18000
273
+ },
274
+ {
275
+ "epoch": 1.4915746190437797,
276
+ "grad_norm": 45.156455993652344,
277
+ "learning_rate": 2.82508398656215e-07,
278
+ "loss": 0.2075,
279
+ "step": 18500
280
+ },
281
+ {
282
+ "epoch": 1.5318874465855035,
283
+ "grad_norm": 18.083568572998047,
284
+ "learning_rate": 2.601119820828667e-07,
285
+ "loss": 0.2092,
286
+ "step": 19000
287
+ },
288
+ {
289
+ "epoch": 1.5722002741272272,
290
+ "grad_norm": 27.93409538269043,
291
+ "learning_rate": 2.3771556550951847e-07,
292
+ "loss": 0.205,
293
+ "step": 19500
294
+ },
295
+ {
296
+ "epoch": 1.612513101668951,
297
+ "grad_norm": 39.768192291259766,
298
+ "learning_rate": 2.153191489361702e-07,
299
+ "loss": 0.2039,
300
+ "step": 20000
301
+ },
302
+ {
303
+ "epoch": 1.6528259292106748,
304
+ "grad_norm": 28.78324317932129,
305
+ "learning_rate": 1.9292273236282195e-07,
306
+ "loss": 0.204,
307
+ "step": 20500
308
+ },
309
+ {
310
+ "epoch": 1.6931387567523988,
311
+ "grad_norm": 94.95115661621094,
312
+ "learning_rate": 1.7052631578947368e-07,
313
+ "loss": 0.2154,
314
+ "step": 21000
315
+ },
316
+ {
317
+ "epoch": 1.7334515842941225,
318
+ "grad_norm": 51.76600646972656,
319
+ "learning_rate": 1.4812989921612543e-07,
320
+ "loss": 0.208,
321
+ "step": 21500
322
+ },
323
+ {
324
+ "epoch": 1.7737644118358462,
325
+ "grad_norm": 24.089155197143555,
326
+ "learning_rate": 1.2573348264277713e-07,
327
+ "loss": 0.2071,
328
+ "step": 22000
329
+ },
330
+ {
331
+ "epoch": 1.81407723937757,
332
+ "grad_norm": 46.96974182128906,
333
+ "learning_rate": 1.0333706606942888e-07,
334
+ "loss": 0.198,
335
+ "step": 22500
336
+ },
337
+ {
338
+ "epoch": 1.8543900669192936,
339
+ "grad_norm": 66.37223815917969,
340
+ "learning_rate": 8.094064949608062e-08,
341
+ "loss": 0.2015,
342
+ "step": 23000
343
+ },
344
+ {
345
+ "epoch": 1.8947028944610174,
346
+ "grad_norm": 42.39664077758789,
347
+ "learning_rate": 5.854423292273236e-08,
348
+ "loss": 0.1982,
349
+ "step": 23500
350
+ },
351
+ {
352
+ "epoch": 1.9350157220027413,
353
+ "grad_norm": 42.68307113647461,
354
+ "learning_rate": 3.61478163493841e-08,
355
+ "loss": 0.1958,
356
+ "step": 24000
357
+ },
358
+ {
359
+ "epoch": 1.975328549544465,
360
+ "grad_norm": 17.30171012878418,
361
+ "learning_rate": 1.3751399776035833e-08,
362
+ "loss": 0.2024,
363
+ "step": 24500
364
+ },
365
+ {
366
+ "epoch": 2.0,
367
+ "eval_accuracy": 0.9520795284741823,
368
+ "eval_loss": 0.13123206794261932,
369
+ "eval_model_preparation_time": 0.0026,
370
+ "eval_runtime": 100.8699,
371
+ "eval_samples_per_second": 477.734,
372
+ "eval_steps_per_second": 59.72,
373
+ "step": 24806
374
+ }
375
+ ],
376
+ "logging_steps": 500,
377
+ "max_steps": 24806,
378
+ "num_input_tokens_seen": 0,
379
+ "num_train_epochs": 2,
380
+ "save_steps": 500,
381
+ "stateful_callbacks": {
382
+ "TrainerControl": {
383
+ "args": {
384
+ "should_epoch_stop": false,
385
+ "should_evaluate": false,
386
+ "should_log": false,
387
+ "should_save": true,
388
+ "should_training_stop": true
389
+ },
390
+ "attributes": {}
391
+ }
392
+ },
393
+ "total_flos": 6.6484442538990895e+19,
394
+ "train_batch_size": 32,
395
+ "trial_name": null,
396
+ "trial_params": null
397
+ }
checkpoint-24806/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08e688467c209d00232b65e50ab8ebc6572f3e99dff061cbd3186e7727855016
3
+ size 5777
config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SiglipForImageClassification"
4
+ ],
5
+ "dtype": "float32",
6
+ "id2label": {
7
+ "0": "Real",
8
+ "1": "Fake"
9
+ },
10
+ "initializer_factor": 1.0,
11
+ "label2id": {
12
+ "Fake": 1,
13
+ "Real": 0
14
+ },
15
+ "model_type": "siglip",
16
+ "problem_type": "single_label_classification",
17
+ "text_config": {
18
+ "attention_dropout": 0.0,
19
+ "dtype": "float32",
20
+ "hidden_act": "gelu_pytorch_tanh",
21
+ "hidden_size": 768,
22
+ "intermediate_size": 3072,
23
+ "layer_norm_eps": 1e-06,
24
+ "max_position_embeddings": 64,
25
+ "model_type": "siglip_text_model",
26
+ "num_attention_heads": 12,
27
+ "num_hidden_layers": 12,
28
+ "projection_size": 768,
29
+ "vocab_size": 256000
30
+ },
31
+ "transformers_version": "4.57.3",
32
+ "vision_config": {
33
+ "attention_dropout": 0.0,
34
+ "dtype": "float32",
35
+ "hidden_act": "gelu_pytorch_tanh",
36
+ "hidden_size": 768,
37
+ "image_size": 224,
38
+ "intermediate_size": 3072,
39
+ "layer_norm_eps": 1e-06,
40
+ "model_type": "siglip_vision_model",
41
+ "num_attention_heads": 12,
42
+ "num_channels": 3,
43
+ "num_hidden_layers": 12,
44
+ "patch_size": 16
45
+ }
46
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6320fd74fec19de2fc50116961611110107f67d80bbc0bdfd0dddaa4540680dc
3
+ size 371567992
preprocessor_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_processor_type": "ViTImageProcessor",
12
+ "image_std": [
13
+ 0.5,
14
+ 0.5,
15
+ 0.5
16
+ ],
17
+ "processor_class": "SiglipProcessor",
18
+ "resample": 2,
19
+ "rescale_factor": 0.00392156862745098,
20
+ "size": {
21
+ "height": 224,
22
+ "width": 224
23
+ }
24
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08e688467c209d00232b65e50ab8ebc6572f3e99dff061cbd3186e7727855016
3
+ size 5777