AMontiB commited on
Commit
9c4b1c4
·
1 Parent(s): 0402b73

Your original commit message (now includes LFS pointer)

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .DS_Store +0 -0
  2. .gitattributes +6 -32
  3. DEPLOYMENT.md +124 -0
  4. README.md +269 -10
  5. README_HF.md +46 -0
  6. __pycache__/app.cpython-310.pyc +0 -0
  7. app.py +117 -0
  8. configs/CLIP-D.yaml +72 -0
  9. configs/NPR.yaml +56 -0
  10. configs/P2G.yaml +56 -0
  11. configs/R50_TF.yaml +61 -0
  12. configs/R50_nodown.yaml +84 -0
  13. demo_images/README.md +21 -0
  14. detectors/.DS_Store +0 -0
  15. detectors/.gitattributes +1 -0
  16. detectors/CLIP-D/.DS_Store +0 -0
  17. detectors/CLIP-D/LICENSE +202 -0
  18. detectors/CLIP-D/README.md +29 -0
  19. detectors/CLIP-D/__pycache__/parser.cpython-310.pyc +0 -0
  20. detectors/CLIP-D/checkpoint/.DS_Store +0 -0
  21. detectors/CLIP-D/checkpoint/pretrained/.DS_Store +0 -0
  22. detectors/CLIP-D/checkpoint/pretrained/weights/best.pt +3 -0
  23. detectors/CLIP-D/detect.py +109 -0
  24. detectors/CLIP-D/networks/__init__.py +70 -0
  25. detectors/CLIP-D/networks/__pycache__/__init__.cpython-310.pyc +0 -0
  26. detectors/CLIP-D/networks/__pycache__/openclipnet.cpython-310.pyc +0 -0
  27. detectors/CLIP-D/networks/__pycache__/resnet_mod.cpython-310.pyc +0 -0
  28. detectors/CLIP-D/networks/openclipnet.py +85 -0
  29. detectors/CLIP-D/networks/resnet_mod.py +335 -0
  30. detectors/CLIP-D/parser.py +25 -0
  31. detectors/CLIP-D/test.py +167 -0
  32. detectors/CLIP-D/train.py +65 -0
  33. detectors/CLIP-D/utils/__init__.py +52 -0
  34. detectors/CLIP-D/utils/__pycache__/__init__.cpython-310.pyc +0 -0
  35. detectors/CLIP-D/utils/__pycache__/dataset.cpython-310.pyc +0 -0
  36. detectors/CLIP-D/utils/__pycache__/processing.cpython-310.pyc +0 -0
  37. detectors/CLIP-D/utils/__pycache__/training.cpython-310.pyc +0 -0
  38. detectors/CLIP-D/utils/dataset.py +144 -0
  39. detectors/CLIP-D/utils/processing.py +133 -0
  40. detectors/CLIP-D/utils/training.py +105 -0
  41. detectors/NPR/README.md +49 -0
  42. detectors/NPR/__pycache__/util.cpython-310.pyc +0 -0
  43. detectors/NPR/__pycache__/validate.cpython-310.pyc +0 -0
  44. detectors/NPR/checkpoint/pretrained/weights/best.pt +3 -0
  45. detectors/NPR/data/__init__.py +201 -0
  46. detectors/NPR/data/__pycache__/__init__.cpython-310.pyc +0 -0
  47. detectors/NPR/data/__pycache__/datasets.cpython-310.pyc +0 -0
  48. detectors/NPR/data/datasets.py +139 -0
  49. detectors/NPR/detect.py +106 -0
  50. detectors/NPR/networks/__init__.py +0 -0
.DS_Store ADDED
Binary file (8.2 kB). View file
 
.gitattributes CHANGED
@@ -1,35 +1,9 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  *.zip filter=lfs diff=lfs merge=lfs -text
2
  *.zst filter=lfs diff=lfs merge=lfs -text
3
  *tfevents* filter=lfs diff=lfs merge=lfs -text
4
+ *.pt filter=lfs diff=lfs merge=lfs -text
5
+ *.pth filter=lfs diff=lfs merge=lfs -text
6
+ *.pkl filter=lfs diff=lfs merge=lfs -text
7
+ *.json filter=lfs diff=lfs merge=lfs -text
8
+ *.gz filter=lfs diff=lfs merge=lfs -text
9
+ detectors/P2G/src/models/clip/bpe_simple_vocab_16e6.txt.gz filter=lfs diff=lfs merge=lfs -text
DEPLOYMENT.md ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hugging Face Spaces Deployment Guide
2
+
3
+ ## Prerequisites
4
+
5
+ 1. A Hugging Face account
6
+ 2. Git LFS installed locally: `git lfs install`
7
+ 3. Model weights downloaded to the correct directories
8
+
9
+ ## Deployment Steps
10
+
11
+ ### 1. Prepare Model Weights
12
+
13
+ You have two options:
14
+
15
+ #### Option A: Upload weights via Git LFS (Recommended for public spaces)
16
+
17
+ ```bash
18
+ # Initialize Git LFS
19
+ git lfs install
20
+
21
+ # Track large files
22
+ git lfs track "*.pt"
23
+ git lfs track "*.pth"
24
+ git lfs track "*.pkl"
25
+
26
+ # Add weights
27
+ git add .gitattributes
28
+ git add detectors/*/checkpoint/pretrained/weights/best.pt
29
+ git add detectors/P2G/src/utils/classes.pkl
30
+ git commit -m "Add model weights"
31
+ ```
32
+
33
+ #### Option B: Configure automatic download
34
+
35
+ 1. Upload your model weights to Google Drive or another host
36
+ 2. Update `download_weights.py` with the correct URLs
37
+ 3. Weights will download automatically when the Space starts
38
+
39
+ ### 2. Create Hugging Face Space
40
+
41
+ 1. Go to https://huggingface.co/spaces
42
+ 2. Click "Create new Space"
43
+ 3. Choose:
44
+ - **Name**: deepfake-detection-library (or your preferred name)
45
+ - ** SDK**: Gradio
46
+ - **License**: MIT
47
+ - **Hardware**: CPU Basic (free) or upgrade to GPU if needed
48
+
49
+ ### 3. Push to Hugging Face
50
+
51
+ ```bash
52
+ # Add HF remote (replace YOUR_USERNAME and SPACE_NAME)
53
+ git remote add hf https://huggingface.co/spaces/YOUR_USERNAME/SPACE_NAME
54
+
55
+ # Rename README for HF
56
+ mv README.md README_github.md
57
+ mv README_HF.md README.md
58
+
59
+ # Push to Hugging Face
60
+ git add .
61
+ git commit -m "Initial commit for HF Spaces"
62
+ git push hf main
63
+ ```
64
+
65
+ ### 4. Configure Space
66
+
67
+ In your Space settings on Hugging Face:
68
+
69
+ - **Hardware**: Start with CPU Basic (free), upgrade to GPU if needed
70
+ - **Secrets**: Add any API keys if needed (none required currently)
71
+ - **Variables**: No special environment variables needed
72
+
73
+ ### 5. Verify Deployment
74
+
75
+ 1. Wait for the Space to build (may take 5-10 minutes)
76
+ 2. Test each detector with sample images
77
+ 3. Check logs for any errors
78
+
79
+ ## File Size Considerations
80
+
81
+ - **Git LFS** is required for files >10MB
82
+ - Each model weight file (~100-500MB) will be stored via LFS
83
+ - Free HF Spaces have storage limits; consider:
84
+ - Upgrading to Pro for more storage
85
+ - Using automatic download instead of uploading weights
86
+
87
+ ## Troubleshooting
88
+
89
+ ### Space fails to build
90
+
91
+ - Check `requirements.txt` for incompatible versions
92
+ - Review build logs in the Space interface
93
+ - Ensure all dependencies are listed
94
+
95
+ ### Weights not loading
96
+
97
+ - Verify Git LFS tracked the files: `git lfs ls-files`
98
+ - Check file sizes: LFS pointer files are ~130 bytes
99
+ - Update `download_weights.py` if using automatic download
100
+
101
+ ### Out of memory errors
102
+
103
+ - Upgrade to GPU hardware (T4 small recommended)
104
+ - Reduce batch size or model size if possible
105
+ - Use CPU inference for deployment (already configured)
106
+
107
+ ## Cost Optimization
108
+
109
+ - **CPU Basic** (free): Works but slower
110
+ - **CPU Upgrade** ($0.03/hour): Faster inference
111
+ - **T4 Small GPU** ($0.60/hour): Needed for real-time performance
112
+
113
+ ## Maintenance
114
+
115
+ - Monitor Space usage in HF dashboard
116
+ - Update models by pushing new weights via Git LFS
117
+ - Check Gradio version compatibility: `pip list | grep gradio`
118
+
119
+ ## Support
120
+
121
+ For issues specific to this deployment, check:
122
+ - [Gradio Documentation](https://gradio.app/docs/)
123
+ - [HF Spaces Documentation](https://huggingface.co/docs/hub/spaces)
124
+ - [GitHub Repository](https://github.com/truebees-ai/Image-Deepfake-Detectors-Public-Library)
README.md CHANGED
@@ -1,13 +1,272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
- title: MDS Demonstrator
3
- emoji: 🐢
4
- colorFrom: green
5
- colorTo: yellow
6
- sdk: gradio
7
- sdk_version: 6.0.1
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
 
 
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Image Deepfake Detectors Public Library
2
+
3
+ ## Overview
4
+
5
+ This repository provides a unified framework for training, testing, and benchmarking multiple state-of-the-art (SoA) deepfake detection models. It supports automated benchmarking, training, demo runs, and single-image detection, with modular configuration and extensible detector support.
6
+
7
+ ### Main Features
8
+
9
+ - **Multiple Detectors:** Supports CLIP-D [1], NPR [2], P2G [3], R50_TF [4], and R50_nodown [5] (an overview for each method is provided in its `README.md`: `./detectors/<DETECTOR>/README.md`)
10
+ - **Pretrained Weights:** All models have been pretrained on images generated with StyleGAN2 and StableDiffusionXL, and real images from the FFHQ Dataset [6] and the FORLAB Dataset [7].
11
+ - **Automated Training & Testing:** Use `launcher.py` to run experiments across detectors and datasets.
12
+ - **Demo Mode:** Easily test all detectors on sample images in `demo_images/`.
13
+ - **Single Image Detection:** Run detection on individual images via the command line.
14
+ - **Flexible Configuration:** All experiment parameters are set via YAML files in `configs/`.
15
+ - **Logging & Results:** Logs and results are saved per detector and scenario for easy analysis.
16
+
17
+ ---
18
+
19
+ ## Set-Up
20
+
21
+ ### Prerequisites
22
+
23
+ `Ubuntu>=22.04.3`, `Python>=3.10` and `CUDA:12.0`
24
+
25
+ ---
26
+
27
+ ### Download Weights
28
+
29
+ You can download the weights for each model from this [link](https://drive.google.com/file/d/1F60FN2B9skRcb3YrZwhFTZQihbj3ipJQ/view?usp=sharing).
30
+
31
+ Then, copy them into the `pretrained` folder for the corresponding model, following this structure: `./detectors/<DETECTOR>/checkpoint/pretrained/weights/best.pt`
32
+
33
  ---
34
+
35
+ ### Download Benchmarking Dataset
36
+
37
+ DeepShield Dataset: [Zenodo link](https://zenodo.org/records/15648378)
38
+
39
+ Download the dataset and change the corresponding `dataset_path` in `./configs/<DETECTOR>.yaml`.
40
+
41
+ > The DeepShield dataset is a large-scale benchmark for evaluating the robustness of fake image detection systems. It contains 100,000 images, divided between real and AI-generated content produced using advanced generative models, including StyleGAN, StyleGAN2, StyleGAN3, Stable Diffusion 1.5, 2.1, 3, and XL, as well as Flux 1.0.
42
+ >
43
+ > To simulate real-world distortions, 30,000 images were shared on Facebook, X (formerly Twitter), and Telegram, then re-collected to include platform-induced compression and artifacts. This approach ensures that the dataset captures authentic distribution noise and artifacts encountered in real-world scenarios.
44
+
45
  ---
46
 
47
+ ### VirtualEnv
48
+
49
+ Create a virtual environment using:
50
+
51
+ ```bash
52
+ python -m venv IDFD_VENV
53
+ source IDFD_VENV/bin/activate
54
+ pip install -r requirements.txt
55
+ ````
56
+
57
+ Or use conda:
58
+
59
+ ```bash
60
+ conda env create -f environment.yml
61
+ conda activate IDFD_VENV
62
+ ```
63
+
64
+ -----
65
+
66
+ ### Download Demo Dataset
67
+
68
+ You can download the demo dataset from this [link](https://drive.google.com/file/d/134Bw8l9tEC7oZJpTAeMO80QRqgdJfJS9/view?usp=sharing). The demo dataset contains 200 images randomly sampled from the DeepShield Dataset.
69
+
70
+ Place sample images for quick testing in `demo_images/`, organized by platform and label:
71
+
72
+ ```
73
+ demo_images/
74
+ Facebook/
75
+ Fake/
76
+ Real/
77
+ PreSocial/
78
+ Fake/
79
+ Real/
80
+ Telegram/
81
+ Fake/
82
+ Real/
83
+ X/
84
+ Fake/
85
+ Real/
86
+ ```
87
+
88
+ -----
89
+
90
+ ## Running Experiments
91
+
92
+ **1. Run Demo:**
93
+
94
+ Test all detectors on sample images:
95
+
96
+ ```bash
97
+ python launcher.py --demo --demo-detector all
98
+ ```
99
+
100
+ Test a single detector on sample images:
101
+
102
+ ```bash
103
+ python launcher.py --demo --demo-detector <DETECTOR>
104
+ ```
105
+
106
+ **2. Automated Train-Test (Recommended) on DeepShield Dataset:**
107
+
108
+ To run both train and test using a specific `<DETECTOR>`:
109
+
110
+ ```bash
111
+ python launcher.py --detector <DETECTOR> --phases both
112
+ ```
113
+
114
+ - `<DETECTOR>`: One of `CLIP-D`, `NPR`, `P2G`, `R50_TF`, `R50_nodown`
115
+ - `--phases`: `train`, `test`, or `both`
116
+
117
+ By doing so, the selected detector will be trained on images generated by StyleGAN2 and StableDiffusionXL and real images from the FORLAB and FFHQ Datasets, **not shared on social networks**.
118
+
119
+ **3. Manual Train-Test on DeepShield Dataset:**
120
+
121
+ ```bash
122
+ python launcher.py --detector <DETECTOR> --phases <PHASE> --config-dir <CONFIG_FILE_PATH> --weights_name <WEIGHTS_NAME>
123
+ ```
124
+
125
+ - `<DETECTOR>`: One of `CLIP-D`, `NPR`, `P2G`, `R50_TF`, `R50_nodown`
126
+ - `--phases`: `train`, `test`, or `both`
127
+ - `--config-dir`: Path to the detector config files (default: `configs/`)
128
+ - `--weights_name`: Model weights name. The default is defined in `configs/<DETECTOR>.yaml` by these lines:
129
+
130
+ <!-- end list -->
131
+
132
+ ```
133
+ training:
134
+ - data: gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre
135
+ ```
136
+
137
+ This corresponds to the training subsets used to train a detector (see the "Train on Different Generators from the DeepShield Dataset" section for more information).
138
+
139
+ **4. Test the model using pretrained weights on the DeepShield Dataset:**
140
+
141
+ ```bash
142
+ python launcher.py --detector <DETECTOR> --phases test --weights_name pretrained
143
+ ```
144
+
145
+ **5. Train the model using a custom weights name on the DeepShield Dataset:**
146
+
147
+ ```bash
148
+ python launcher.py --detector <DETECTOR> --phases train --weights_name <WEIGHTS_NAME>
149
+ ```
150
+
151
+ **6. Perform Detection on Single Images:**
152
+
153
+ ```bash
154
+ python launcher.py --detect --detector <DETECTOR> --image <PATH_TO_IMAGE> --weights <WEIGHTS_NAME> --output <OUTPUT_PATH>
155
+ ```
156
+
157
+ - `<DETECTOR>`: One of `CLIP-D`, `NPR`, `P2G`, `R50_TF`, `R50_nodown`
158
+ - `--image`: Path to the input image
159
+ - `--weights`: Weights name (default: `pretrained`)
160
+ - `--output`: Path to save detection results (default: `detection_results`)
161
+
162
+ -----
163
+
164
+ ## Train on Different Generators from the DeepShield Dataset
165
+
166
+ To train a detector on generators different from StyleGAN2 and StableDiffusionXL, modify these lines in `configs/<DETECTOR>.yaml`:
167
+
168
+ ```json
169
+ training:
170
+ - data: gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre
171
+ ```
172
+
173
+ Currently supported pairs of `(key, sub-dataset)` are:
174
+
175
+ ```
176
+ 'gan1':['StyleGAN']
177
+ 'gan2':['StyleGAN2']
178
+ 'gan3':['StyleGAN3']
179
+ 'sd15':['StableDiffusion1.5']
180
+ 'sd2':['StableDiffusion2']
181
+ 'sd3':['StableDiffusion3']
182
+ 'sdXL':['StableDiffusionXL']
183
+ 'flux':['FLUX.1']
184
+ 'realFFHQ':['FFHQ']
185
+ 'realFORLAB':['FORLAB']
186
+ ```
187
+
188
+ And supported pairs of `(key, social)` are:
189
+
190
+ ```
191
+ 'pre':[Not Shared on Social Networks],
192
+ 'fb': [Facebook]
193
+ 'tl': [Telegram]
194
+ 'tw': [X Social]
195
+ ```
196
+
197
+ Then, generate the corresponding `split.json` using `python support/json_compile.py` and use it to replace `./split.json`. NOTE: change line 9 `dataset_path=...` in `support/json_compile.py` accordingly.
198
+
199
+ ## Results & Logs
200
+
201
+ - **Results:** Saved in `detectors/<DETECTOR>/results/`
202
+ - **Logs:** Saved in `logs/` per run and scenario
203
+
204
+ -----
205
+
206
+ ## Train/Test on a New Dataset
207
+
208
+ ### Data Organization
209
+
210
+ Organize your data by platform and label:
211
+
212
+ ```
213
+ <DATASET_NAME>/
214
+ Facebook/
215
+ Fake/
216
+ Real/
217
+ PreSocial/
218
+ Fake/
219
+ Real/
220
+ Telegram/
221
+ Fake/
222
+ Real/
223
+ X/
224
+ Fake/
225
+ Real/
226
+ ```
227
+
228
+ Generate the corresponding `split.json` using `python support/json_compile.py` and use it to replace `./split.json`. NOTE: change line 9 `dataset_path=...` in `support/json_compile.py` accordingly.
229
+
230
+ ### Split Files
231
+
232
+ - **`split.json`:** Main split file for experiments. Format: JSON with `train`/`test` keys and lists of sample IDs.
233
+ - **`split_demo.json`:** Auto-generated for demo mode, covering all images in `demo_images/`.
234
+
235
+ -----
236
+
237
+ ## Additional Configuration Options
238
+
239
+ - **YAML Files:** All detectors have a config file in `configs/` (e.g., `CLIP-D.yaml`, `NPR.yaml`).
240
+ - **Config Options:**
241
+ - `global`: Dataset path, device, split file, threads, etc.
242
+ - `detector_args`: Model-specific arguments.
243
+ - `training`: List of training scenarios.
244
+ - `testing`: List of test scenarios.
245
+
246
+ -----
247
+
248
+ ## References
249
+
250
+ [1] D. Cozzolino, G. Poggi, R. Corvi, M. Nießner, and L. Verdoliva,
251
+ “Raising the Bar of AI-generated Image Detection with CLIP,” in 2024
252
+ IEEE/CVF Conference on Computer Vision and Pattern Recognition
253
+ Workshops (CVPRW), pp. 4356–4366, June 2024. ISSN: 2160-7516.
254
+
255
+ [2]C. Tan, H. Liu, Y. Zhao, S. Wei, G. Gu, P. Liu, and Y. Wei, “Rethinking the Up-Sampling Operations in CNN-Based Generative Network for
256
+ Generalizable Deepfake Detection,” in 2024 IEEE/CVF Conference on
257
+ Computer Vision and Pattern Recognition (CVPR), pp. 28130–28139,
258
+ June 2024. ISSN: 2575-7075.
259
+
260
+ [3] F. Laiti, B. Liberatori, T. De Min, and E. Ricci, “Conditioned Prompt-Optimization for Continual Deepfake Detection,” in Pattern Recognition (A. Antonacopoulos, S. Chaudhuri, R. Chellappa, C.L. Liu, S. Bhatacharya, and U. Pal, eds.), (Cham), pp. 64–79, Springer Nature Switzerland, 2025.
261
+
262
+ [4] Dell'Anna, Stefano, Andrea Montibeller, and Giulia Boato. "TrueFake: A Real World Case Dataset of Last Generation Fake Images also Shared on Social Networks." arXiv preprint arXiv:2504.20658 (2025).
263
+
264
+ [5]R. Corvi, D. Cozzolino, G. Zingarini, G. Poggi, K. Nagano, and L. Verdoliva, “On The Detection of Synthetic Images Generated by Diffusion
265
+ Models,” in ICASSP 2023 - 2023 IEEE International Conference on
266
+ Acoustics, Speech and Signal Processing (ICASSP), pp. 1–5, June 2023.
267
+ ISSN: 2379-190X.
268
+
269
+ [6] NVlabs, “Flickr faces hq dataset.” https://github.com/NVlabs/ffhq-dataset, n.d. Accessed: 2025-03-04
270
+
271
+ [7] M. Iuliani, M. Fontani, and A. Piva, “A leak in prnu based source
272
+ identification—questioning fingerprint uniqueness,” IEEE Access, vol. 9, pp. 52455–52463, 2021.
README_HF.md ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Deepfake Detection Library
3
+ emoji: 🔍
4
+ colorFrom: red
5
+ colorTo: orange
6
+ sdk: gradio
7
+ sdk_version: 4.44.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ # Deepfake Detection Library
14
+
15
+ This Space provides a unified interface to test multiple state-of-the-art deepfake detection models on your images.
16
+
17
+ ## Available Detectors
18
+
19
+ - **R50_TF** - ResNet-50 based detector trained on TrueFake dataset
20
+ - **R50_nodown** - ResNet-50 without downsampling operations
21
+ - **CLIP-D** - CLIP-based deepfake detector
22
+ - **P2G** - Prompt2Guard: Conditioned prompt-optimization for continual deepfake detection
23
+ - **NPR** - Neural Posterior Regularization
24
+
25
+ ## Usage
26
+
27
+ 1. Upload an image
28
+ 2. Select a detector from the dropdown
29
+ 3. Click "Detect" to get the prediction
30
+
31
+ The detector will return:
32
+ - **Prediction**: Real or Fake
33
+ - **Confidence**: Model confidence score (0-1)
34
+ - **Elapsed Time**: Processing time
35
+
36
+ ## Models
37
+
38
+ All models have been pretrained on images generated with StyleGAN2 and StableDiffusionXL, and real images from the FFHQ Dataset and the FORLAB Dataset.
39
+
40
+ ## References
41
+
42
+ For more information about the implementation and benchmarking, visit the [GitHub repository](https://github.com/truebees-ai/Image-Deepfake-Detectors-Public-Library).
43
+
44
+ ## Note
45
+
46
+ ⚠️ Due to file size limitations, model weights need to be downloaded automatically on first use. This may take a few moments.
__pycache__/app.cpython-310.pyc ADDED
Binary file (2.31 kB). View file
 
app.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import sys
4
+ import json
5
+ import argparse
6
+ from types import SimpleNamespace
7
+ from support.detect import run_detect
8
+
9
+ # Download weights on first run (for HF Spaces)
10
+ if os.environ.get("SPACE_ID"):
11
+ try:
12
+ from download_weights import download_all_weights
13
+ download_all_weights()
14
+ except Exception as e:
15
+ print(f"Warning: Could not download weights: {e}")
16
+
17
+ # Available detectors based on launcher.py
18
+ DETECTORS = ['R50_TF', 'R50_nodown', 'CLIP-D', 'P2G', 'NPR']
19
+
20
+ def predict(image_path, detector_name):
21
+ if not image_path:
22
+ return {"error": "Please upload an image."}
23
+
24
+ # Create a temporary output file path
25
+ output_path = "temp_result.json"
26
+
27
+ # Mock args object
28
+ args = SimpleNamespace(
29
+ image=image_path,
30
+ detector=detector_name,
31
+ config_dir='configs',
32
+ output=output_path,
33
+ weights='pretrained', # Use default/pretrained
34
+ device='cpu', # Force CPU
35
+ dry_run=False,
36
+ verbose=False
37
+ )
38
+
39
+ try:
40
+ # Run detection
41
+ # We need to capture stdout/stderr or just trust the function
42
+ # run_detect might raise FileNotFoundError if weights are missing
43
+ run_detect(args)
44
+
45
+ # Read results
46
+ if os.path.exists(output_path):
47
+ with open(output_path, 'r') as f:
48
+ result = json.load(f)
49
+
50
+ # Format output
51
+ prediction = result.get('prediction', 'Unknown')
52
+ confidence = result.get('confidence', 0.0)
53
+ elapsed_time = result.get('elapsed_time', 0.0)
54
+
55
+ return {
56
+ "Prediction": prediction,
57
+ "Confidence": f"{confidence:.4f}",
58
+ "Elapsed Time": f"{elapsed_time:.3f}s"
59
+ }
60
+ else:
61
+ return {"error": "No result file generated. Check console logs for details."}
62
+
63
+ except FileNotFoundError as e:
64
+ return {"error": str(e), "message": f"Please ensure you have downloaded the weights for {detector_name}."}
65
+ except Exception as e:
66
+ return {"error": str(e)}
67
+ finally:
68
+ # Cleanup
69
+ if os.path.exists(output_path):
70
+ os.remove(output_path)
71
+
72
+ # Create Gradio Interface
73
+ with gr.Blocks(title="Deepfake Detection", theme=gr.themes.Soft()) as demo:
74
+ gr.Markdown("# 🔍 Deepfake Detection Library")
75
+ gr.Markdown("""
76
+ Upload an image and select a detector to check if it's real or fake.
77
+
78
+ **Available Detectors:**
79
+ - **R50_TF**: ResNet-50 based detector
80
+ - **R50_nodown**: ResNet-50 without downsampling
81
+ - **CLIP-D**: CLIP-based detector
82
+ - **P2G**: Prompt2Guard detector
83
+ - **NPR**: Neural Posterior Regularization
84
+ """)
85
+
86
+ with gr.Row():
87
+ with gr.Column():
88
+ image_input = gr.Image(type="filepath", label="Input Image", height=400)
89
+ detector_input = gr.Dropdown(
90
+ choices=DETECTORS,
91
+ value=DETECTORS[0],
92
+ label="Select Detector",
93
+ info="Choose which deepfake detection model to use"
94
+ )
95
+ submit_btn = gr.Button("🔍 Detect", variant="primary")
96
+
97
+ with gr.Column():
98
+ output_json = gr.JSON(label="Detection Results")
99
+
100
+ gr.Markdown("""
101
+ ---
102
+ ### About
103
+ This Space provides access to multiple state-of-the-art deepfake detection models.
104
+ All models are trained on StyleGAN2, StableDiffusionXL, FFHQ, and FORLAB datasets.
105
+
106
+ **Note:** First detection may be slower due to model loading.
107
+ """)
108
+
109
+ submit_btn.click(
110
+ fn=predict,
111
+ inputs=[image_input, detector_input],
112
+ outputs=output_json
113
+ )
114
+
115
+ if __name__ == "__main__":
116
+ # For HF Spaces, share is automatically enabled
117
+ demo.launch()
configs/CLIP-D.yaml ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ global:
2
+ dataset_path: <PATH_TO_DATASET>
3
+ device_override: cpu # null for auto-selection
4
+ min_vram: 16000
5
+ split_file: ./split.json
6
+ num_threads: 8
7
+ dry_run: false
8
+ only_list: false
9
+
10
+ detector_args:
11
+ - "--arch"
12
+ - "opencliplinearnext_clipL14commonpool"
13
+ - "--norm_type"
14
+ - "clip"
15
+ - "--resize_size"
16
+ - "200"
17
+ - "--resize_ratio"
18
+ - "1"
19
+ - "--resize_prob"
20
+ - "0.2"
21
+ - "--cmp_qual"
22
+ - "65,100"
23
+ - "--cmp_prob"
24
+ - "0.5"
25
+ - "--resizeSize"
26
+ - "224"
27
+
28
+ training:
29
+ - data: gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre
30
+
31
+ testing:
32
+ - realFFHQ:fb
33
+ - realFORLAB:fb
34
+ - gan1:fb
35
+ - gan2:fb
36
+ - gan3:fb
37
+ - sd15:fb
38
+ - sd2:fb
39
+ - sd3:fb
40
+ - sdXL:fb
41
+ - flux:fb
42
+ - realFFHQ:tl
43
+ - realFORLAB:tl
44
+ - gan1:tl
45
+ - gan2:tl
46
+ - gan3:tl
47
+ - sd15:tl
48
+ - sd2:tl
49
+ - sd3:tl
50
+ - sdXL:tl
51
+ - flux:tl
52
+ - realFFHQ:tw
53
+ - realFORLAB:tw
54
+ - gan1:tw
55
+ - gan2:tw
56
+ - gan3:tw
57
+ - sd15:tw
58
+ - sd2:tw
59
+ - sd3:tw
60
+ - sdXL:tw
61
+ - flux:tw
62
+ - realFFHQ:pre
63
+ - realFORLAB:pre
64
+ - gan1:pre
65
+ - gan2:pre
66
+ - gan3:pre
67
+ - sd15:pre
68
+ - sd2:pre
69
+ - sd3:pre
70
+ - sdXL:pre
71
+ - flux:pre
72
+
configs/NPR.yaml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ global:
2
+ dataset_path: <PATH_TO_DATASET>
3
+ device_override: cpu # null for auto-selection
4
+ min_vram: 16000
5
+ split_file: ./split.json
6
+ num_threads: 8
7
+ dry_run: false
8
+ only_list: false
9
+
10
+ detector_args: []
11
+
12
+ training:
13
+ - data: gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre
14
+
15
+ testing:
16
+ - realFFHQ:fb
17
+ - realFORLAB:fb
18
+ - gan1:fb
19
+ - gan2:fb
20
+ - gan3:fb
21
+ - sd15:fb
22
+ - sd2:fb
23
+ - sd3:fb
24
+ - sdXL:fb
25
+ - flux:fb
26
+ - realFFHQ:tl
27
+ - realFORLAB:tl
28
+ - gan1:tl
29
+ - gan2:tl
30
+ - gan3:tl
31
+ - sd15:tl
32
+ - sd2:tl
33
+ - sd3:tl
34
+ - sdXL:tl
35
+ - flux:tl
36
+ - realFFHQ:tw
37
+ - realFORLAB:tw
38
+ - gan1:tw
39
+ - gan2:tw
40
+ - gan3:tw
41
+ - sd15:tw
42
+ - sd2:tw
43
+ - sd3:tw
44
+ - sdXL:tw
45
+ - flux:tw
46
+ - realFFHQ:pre
47
+ - realFORLAB:pre
48
+ - gan1:pre
49
+ - gan2:pre
50
+ - gan3:pre
51
+ - sd15:pre
52
+ - sd2:pre
53
+ - sd3:pre
54
+ - sdXL:pre
55
+ - flux:pre
56
+
configs/P2G.yaml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ global:
2
+ dataset_path: <PATH_TO_DATASET>
3
+ device_override: cpu # null for auto-selection
4
+ min_vram: 16000
5
+ split_file: ./split.json
6
+ num_threads: 8
7
+ dry_run: false
8
+ only_list: false
9
+
10
+ detector_args: []
11
+
12
+ training:
13
+ - data: gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre
14
+
15
+ testing:
16
+ - realFFHQ:fb
17
+ - realFORLAB:fb
18
+ - gan1:fb
19
+ - gan2:fb
20
+ - gan3:fb
21
+ - sd15:fb
22
+ - sd2:fb
23
+ - sd3:fb
24
+ - sdXL:fb
25
+ - flux:fb
26
+ - realFFHQ:tl
27
+ - realFORLAB:tl
28
+ - gan1:tl
29
+ - gan2:tl
30
+ - gan3:tl
31
+ - sd15:tl
32
+ - sd2:tl
33
+ - sd3:tl
34
+ - sdXL:tl
35
+ - flux:tl
36
+ - realFFHQ:tw
37
+ - realFORLAB:tw
38
+ - gan1:tw
39
+ - gan2:tw
40
+ - gan3:tw
41
+ - sd15:tw
42
+ - sd2:tw
43
+ - sd3:tw
44
+ - sdXL:tw
45
+ - flux:tw
46
+ - realFFHQ:pre
47
+ - realFORLAB:pre
48
+ - gan1:pre
49
+ - gan2:pre
50
+ - gan3:pre
51
+ - sd15:pre
52
+ - sd2:pre
53
+ - sd3:pre
54
+ - sdXL:pre
55
+ - flux:pre
56
+
configs/R50_TF.yaml ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ global:
2
+ dataset_path: <PATH_TO_DATASET>
3
+ device_override: cpu # null for auto-selection
4
+ min_vram: 16000
5
+ split_file: ./split.json
6
+ num_threads: 8
7
+ dry_run: false
8
+ only_list: false
9
+
10
+
11
+ detector_args:
12
+ - "--arch"
13
+ - "nodown"
14
+ - "--prototype"
15
+ - "--freeze"
16
+
17
+ training:
18
+ - data: gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre
19
+
20
+ testing:
21
+ - realFFHQ:fb
22
+ - realFORLAB:fb
23
+ - gan1:fb
24
+ - gan2:fb
25
+ - gan3:fb
26
+ - sd15:fb
27
+ - sd2:fb
28
+ - sd3:fb
29
+ - sdXL:fb
30
+ - flux:fb
31
+ - realFFHQ:tl
32
+ - realFORLAB:tl
33
+ - gan1:tl
34
+ - gan2:tl
35
+ - gan3:tl
36
+ - sd15:tl
37
+ - sd2:tl
38
+ - sd3:tl
39
+ - sdXL:tl
40
+ - flux:tl
41
+ - realFFHQ:tw
42
+ - realFORLAB:tw
43
+ - gan1:tw
44
+ - gan2:tw
45
+ - gan3:tw
46
+ - sd15:tw
47
+ - sd2:tw
48
+ - sd3:tw
49
+ - sdXL:tw
50
+ - flux:tw
51
+ - realFFHQ:pre
52
+ - realFORLAB:pre
53
+ - gan1:pre
54
+ - gan2:pre
55
+ - gan3:pre
56
+ - sd15:pre
57
+ - sd2:pre
58
+ - sd3:pre
59
+ - sdXL:pre
60
+ - flux:pre
61
+
configs/R50_nodown.yaml ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ global:
2
+ dataset_path: <PATH_TO_DATASET>
3
+ device_override: cpu # null for auto-selection
4
+ min_vram: 16000
5
+ split_file: ./split.json
6
+ num_threads: 8
7
+ dry_run: false
8
+ only_list: false
9
+
10
+ detector_args:
11
+ - "--arch"
12
+ - "res50nodown"
13
+ - "--norm_type"
14
+ - "resnet"
15
+ - "--resize_size"
16
+ - "256"
17
+ - "--resize_ratio"
18
+ - "0.75"
19
+ - "--resize_prob"
20
+ - "0.2"
21
+ - "--cmp_qual"
22
+ - "30,100"
23
+ - "--cmp_prob"
24
+ - "0.5"
25
+ - "--cropSize"
26
+ - "96"
27
+ - "--blur_sig"
28
+ - "0.1,3.0"
29
+ - "--blur_prob"
30
+ - "0.5"
31
+ - "--jitter_prob"
32
+ - "0.8"
33
+ - "--colordist_prob"
34
+ - "0.2"
35
+ - "--cutout_prob"
36
+ - "0.2"
37
+ - "--noise_prob"
38
+ - "0.2"
39
+
40
+ training:
41
+ - data: gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre
42
+
43
+ testing:
44
+ - realFFHQ:fb
45
+ - realFORLAB:fb
46
+ - gan1:fb
47
+ - gan2:fb
48
+ - gan3:fb
49
+ - sd15:fb
50
+ - sd2:fb
51
+ - sd3:fb
52
+ - sdXL:fb
53
+ - flux:fb
54
+ - realFFHQ:tl
55
+ - realFORLAB:tl
56
+ - gan1:tl
57
+ - gan2:tl
58
+ - gan3:tl
59
+ - sd15:tl
60
+ - sd2:tl
61
+ - sd3:tl
62
+ - sdXL:tl
63
+ - flux:tl
64
+ - realFFHQ:tw
65
+ - realFORLAB:tw
66
+ - gan1:tw
67
+ - gan2:tw
68
+ - gan3:tw
69
+ - sd15:tw
70
+ - sd2:tw
71
+ - sd3:tw
72
+ - sdXL:tw
73
+ - flux:tw
74
+ - realFFHQ:pre
75
+ - realFORLAB:pre
76
+ - gan1:pre
77
+ - gan2:pre
78
+ - gan3:pre
79
+ - sd15:pre
80
+ - sd2:pre
81
+ - sd3:pre
82
+ - sdXL:pre
83
+ - flux:pre
84
+
demo_images/README.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Download Demo Dataset
2
+
3
+ You can download of demo dataset from this [link](https://drive.google.com/file/d/134Bw8l9tEC7oZJpTAeMO80QRqgdJfJS9/view?usp=sharing). The demo dataset contains 200 images randomly samples from the DeepShield Dataset. \
4
+
5
+ Place sample images for quick testing in `demo_images/`, organized by platform and label:
6
+
7
+ ```
8
+ demo_images/
9
+ Facebook/
10
+ Fake/
11
+ Real/
12
+ PreSocial/
13
+ Fake/
14
+ Real/
15
+ Telegram/
16
+ Fake/
17
+ Real/
18
+ X/
19
+ Fake/
20
+ Real/
21
+ ```
detectors/.DS_Store ADDED
Binary file (10.2 kB). View file
 
detectors/.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ *.pkl filter=lfs diff=lfs merge=lfs -text
detectors/CLIP-D/.DS_Store ADDED
Binary file (8.2 kB). View file
 
detectors/CLIP-D/LICENSE ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ Apache License
3
+ Version 2.0, January 2004
4
+ http://www.apache.org/licenses/
5
+
6
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7
+
8
+ 1. Definitions.
9
+
10
+ "License" shall mean the terms and conditions for use, reproduction,
11
+ and distribution as defined by Sections 1 through 9 of this document.
12
+
13
+ "Licensor" shall mean the copyright owner or entity authorized by
14
+ the copyright owner that is granting the License.
15
+
16
+ "Legal Entity" shall mean the union of the acting entity and all
17
+ other entities that control, are controlled by, or are under common
18
+ control with that entity. For the purposes of this definition,
19
+ "control" means (i) the power, direct or indirect, to cause the
20
+ direction or management of such entity, whether by contract or
21
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
22
+ outstanding shares, or (iii) beneficial ownership of such entity.
23
+
24
+ "You" (or "Your") shall mean an individual or Legal Entity
25
+ exercising permissions granted by this License.
26
+
27
+ "Source" form shall mean the preferred form for making modifications,
28
+ including but not limited to software source code, documentation
29
+ source, and configuration files.
30
+
31
+ "Object" form shall mean any form resulting from mechanical
32
+ transformation or translation of a Source form, including but
33
+ not limited to compiled object code, generated documentation,
34
+ and conversions to other media types.
35
+
36
+ "Work" shall mean the work of authorship, whether in Source or
37
+ Object form, made available under the License, as indicated by a
38
+ copyright notice that is included in or attached to the work
39
+ (an example is provided in the Appendix below).
40
+
41
+ "Derivative Works" shall mean any work, whether in Source or Object
42
+ form, that is based on (or derived from) the Work and for which the
43
+ editorial revisions, annotations, elaborations, or other modifications
44
+ represent, as a whole, an original work of authorship. For the purposes
45
+ of this License, Derivative Works shall not include works that remain
46
+ separable from, or merely link (or bind by name) to the interfaces of,
47
+ the Work and Derivative Works thereof.
48
+
49
+ "Contribution" shall mean any work of authorship, including
50
+ the original version of the Work and any modifications or additions
51
+ to that Work or Derivative Works thereof, that is intentionally
52
+ submitted to Licensor for inclusion in the Work by the copyright owner
53
+ or by an individual or Legal Entity authorized to submit on behalf of
54
+ the copyright owner. For the purposes of this definition, "submitted"
55
+ means any form of electronic, verbal, or written communication sent
56
+ to the Licensor or its representatives, including but not limited to
57
+ communication on electronic mailing lists, source code control systems,
58
+ and issue tracking systems that are managed by, or on behalf of, the
59
+ Licensor for the purpose of discussing and improving the Work, but
60
+ excluding communication that is conspicuously marked or otherwise
61
+ designated in writing by the copyright owner as "Not a Contribution."
62
+
63
+ "Contributor" shall mean Licensor and any individual or Legal Entity
64
+ on behalf of whom a Contribution has been received by Licensor and
65
+ subsequently incorporated within the Work.
66
+
67
+ 2. Grant of Copyright License. Subject to the terms and conditions of
68
+ this License, each Contributor hereby grants to You a perpetual,
69
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70
+ copyright license to reproduce, prepare Derivative Works of,
71
+ publicly display, publicly perform, sublicense, and distribute the
72
+ Work and such Derivative Works in Source or Object form.
73
+
74
+ 3. Grant of Patent License. Subject to the terms and conditions of
75
+ this License, each Contributor hereby grants to You a perpetual,
76
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77
+ (except as stated in this section) patent license to make, have made,
78
+ use, offer to sell, sell, import, and otherwise transfer the Work,
79
+ where such license applies only to those patent claims licensable
80
+ by such Contributor that are necessarily infringed by their
81
+ Contribution(s) alone or by combination of their Contribution(s)
82
+ with the Work to which such Contribution(s) was submitted. If You
83
+ institute patent litigation against any entity (including a
84
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
85
+ or a Contribution incorporated within the Work constitutes direct
86
+ or contributory patent infringement, then any patent licenses
87
+ granted to You under this License for that Work shall terminate
88
+ as of the date such litigation is filed.
89
+
90
+ 4. Redistribution. You may reproduce and distribute copies of the
91
+ Work or Derivative Works thereof in any medium, with or without
92
+ modifications, and in Source or Object form, provided that You
93
+ meet the following conditions:
94
+
95
+ (a) You must give any other recipients of the Work or
96
+ Derivative Works a copy of this License; and
97
+
98
+ (b) You must cause any modified files to carry prominent notices
99
+ stating that You changed the files; and
100
+
101
+ (c) You must retain, in the Source form of any Derivative Works
102
+ that You distribute, all copyright, patent, trademark, and
103
+ attribution notices from the Source form of the Work,
104
+ excluding those notices that do not pertain to any part of
105
+ the Derivative Works; and
106
+
107
+ (d) If the Work includes a "NOTICE" text file as part of its
108
+ distribution, then any Derivative Works that You distribute must
109
+ include a readable copy of the attribution notices contained
110
+ within such NOTICE file, excluding those notices that do not
111
+ pertain to any part of the Derivative Works, in at least one
112
+ of the following places: within a NOTICE text file distributed
113
+ as part of the Derivative Works; within the Source form or
114
+ documentation, if provided along with the Derivative Works; or,
115
+ within a display generated by the Derivative Works, if and
116
+ wherever such third-party notices normally appear. The contents
117
+ of the NOTICE file are for informational purposes only and
118
+ do not modify the License. You may add Your own attribution
119
+ notices within Derivative Works that You distribute, alongside
120
+ or as an addendum to the NOTICE text from the Work, provided
121
+ that such additional attribution notices cannot be construed
122
+ as modifying the License.
123
+
124
+ You may add Your own copyright statement to Your modifications and
125
+ may provide additional or different license terms and conditions
126
+ for use, reproduction, or distribution of Your modifications, or
127
+ for any such Derivative Works as a whole, provided Your use,
128
+ reproduction, and distribution of the Work otherwise complies with
129
+ the conditions stated in this License.
130
+
131
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
132
+ any Contribution intentionally submitted for inclusion in the Work
133
+ by You to the Licensor shall be under the terms and conditions of
134
+ this License, without any additional terms or conditions.
135
+ Notwithstanding the above, nothing herein shall supersede or modify
136
+ the terms of any separate license agreement you may have executed
137
+ with Licensor regarding such Contributions.
138
+
139
+ 6. Trademarks. This License does not grant permission to use the trade
140
+ names, trademarks, service marks, or product names of the Licensor,
141
+ except as required for reasonable and customary use in describing the
142
+ origin of the Work and reproducing the content of the NOTICE file.
143
+
144
+ 7. Disclaimer of Warranty. Unless required by applicable law or
145
+ agreed to in writing, Licensor provides the Work (and each
146
+ Contributor provides its Contributions) on an "AS IS" BASIS,
147
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148
+ implied, including, without limitation, any warranties or conditions
149
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150
+ PARTICULAR PURPOSE. You are solely responsible for determining the
151
+ appropriateness of using or redistributing the Work and assume any
152
+ risks associated with Your exercise of permissions under this License.
153
+
154
+ 8. Limitation of Liability. In no event and under no legal theory,
155
+ whether in tort (including negligence), contract, or otherwise,
156
+ unless required by applicable law (such as deliberate and grossly
157
+ negligent acts) or agreed to in writing, shall any Contributor be
158
+ liable to You for damages, including any direct, indirect, special,
159
+ incidental, or consequential damages of any character arising as a
160
+ result of this License or out of the use or inability to use the
161
+ Work (including but not limited to damages for loss of goodwill,
162
+ work stoppage, computer failure or malfunction, or any and all
163
+ other commercial damages or losses), even if such Contributor
164
+ has been advised of the possibility of such damages.
165
+
166
+ 9. Accepting Warranty or Additional Liability. While redistributing
167
+ the Work or Derivative Works thereof, You may choose to offer,
168
+ and charge a fee for, acceptance of support, warranty, indemnity,
169
+ or other liability obligations and/or rights consistent with this
170
+ License. However, in accepting such obligations, You may act only
171
+ on Your own behalf and on Your sole responsibility, not on behalf
172
+ of any other Contributor, and only if You agree to indemnify,
173
+ defend, and hold each Contributor harmless for any liability
174
+ incurred by, or claims asserted against, such Contributor by reason
175
+ of your accepting any such warranty or additional liability.
176
+
177
+ END OF TERMS AND CONDITIONS
178
+
179
+ APPENDIX: How to apply the Apache License to your work.
180
+
181
+ To apply the Apache License to your work, attach the following
182
+ boilerplate notice, with the fields enclosed by brackets "[]"
183
+ replaced with your own identifying information. (Don't include
184
+ the brackets!) The text should be enclosed in the appropriate
185
+ comment syntax for the file format. We also recommend that a
186
+ file or class name and description of purpose be included on the
187
+ same "printed page" as the copyright notice for easier
188
+ identification within third-party archives.
189
+
190
+ Copyright [yyyy] [name of copyright owner]
191
+
192
+ Licensed under the Apache License, Version 2.0 (the "License");
193
+ you may not use this file except in compliance with the License.
194
+ You may obtain a copy of the License at
195
+
196
+ http://www.apache.org/licenses/LICENSE-2.0
197
+
198
+ Unless required by applicable law or agreed to in writing, software
199
+ distributed under the License is distributed on an "AS IS" BASIS,
200
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201
+ See the License for the specific language governing permissions and
202
+ limitations under the License.
detectors/CLIP-D/README.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ClipBased-SyntheticImageDetection
2
+
3
+ [![Official Github Repo](https://img.shields.io/badge/Github%20page-222222.svg?style=for-the-badge&logo=github)](https://grip-unina.github.io/ClipBased-SyntheticImageDetection/)
4
+ [![Paper](https://img.shields.io/badge/-arXiv-B31B1B.svg?style=for-the-badge)](https://arxiv.org/abs/2312.00195v2)
5
+ [![GRIP Research Group Website](https://img.shields.io/badge/-GRIP-0888ef.svg?style=for-the-badge)](https://www.grip.unina.it)
6
+
7
+ Original Paper:
8
+ [Raising the Bar of AI-generated Image Detection with CLIP](https://arxiv.org/abs/2312.00195v2).
9
+
10
+ Authors: Davide Cozzolino, Giovanni Poggi, Riccardo Corvi, Matthias Nießner, and Luisa Verdoliva.
11
+
12
+ ## Abstract
13
+
14
+ The aim of this work is to explore the potential of pre-trained vision-language models (VLMs) for universal detection of AI-generated images. We develop a lightweight detection strategy based on CLIP features and study its performance in a wide variety of challenging scenarios. We find that, contrary to previous beliefs, it is neither necessary nor convenient to use a large domain-specific dataset for training. On the contrary, by using only a handful of example images from a single generative model, a CLIP-based detector exhibits surprising generalization ability and high robustness across different architectures, including recent commercial tools such as Dalle-3, Midjourney v5, and Firefly. We match the state-of-the-art (SoTA) on in-distribution data and significantly improve upon it in terms of generalization to out-of-distribution data (+6% AUC) and robustness to impaired/laundered data (+13%).
15
+
16
+ ## Please Cite
17
+
18
+ ```
19
+ @inproceedings{cozzolino2023raising,
20
+ author={Davide Cozzolino and Giovanni Poggi and
21
+ Riccardo Corvi and Matthias Nießner and Luisa
22
+ Verdoliva},
23
+ title={{Raising the Bar of AI-generated Image
24
+ Detection with CLIP}},
25
+ booktitle={IEEE/CVF Conference on Computer Vision
26
+ and Pattern Recognition Workshops (CVPRW)},
27
+ year={2024},
28
+ }
29
+ ```
detectors/CLIP-D/__pycache__/parser.cpython-310.pyc ADDED
Binary file (1.3 kB). View file
 
detectors/CLIP-D/checkpoint/.DS_Store ADDED
Binary file (6.15 kB). View file
 
detectors/CLIP-D/checkpoint/pretrained/.DS_Store ADDED
Binary file (6.15 kB). View file
 
detectors/CLIP-D/checkpoint/pretrained/weights/best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34315ca92adbf17921f5aa1e28f8fe5bf7d56dd8126205c9e264cbfb26582d12
3
+ size 15452
detectors/CLIP-D/detect.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ----------------------------------------------------------------------------
2
+ # IMPORTS
3
+ # ----------------------------------------------------------------------------
4
+ import os
5
+ import sys
6
+ import time
7
+ import torch
8
+ import argparse
9
+ from PIL import Image
10
+ import torchvision.transforms as transforms
11
+
12
+ # Add project root to path for imports
13
+ project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
14
+ sys.path.append(project_root)
15
+ from support.detect_utils import format_result, save_result, get_device
16
+
17
+ from networks import create_architecture
18
+
19
+ # ----------------------------------------------------------------------------
20
+ # IMAGE PREPROCESSING
21
+ # ----------------------------------------------------------------------------
22
+ def preprocess_image(image_path, size=224):
23
+ """Load and preprocess a single image for model input."""
24
+ if not os.path.exists(image_path):
25
+ raise FileNotFoundError(f"Image not found: {image_path}")
26
+
27
+ image = Image.open(image_path).convert('RGB')
28
+ transform = transforms.Compose([
29
+ transforms.Resize(size),
30
+ transforms.CenterCrop(size),
31
+ transforms.ToTensor(),
32
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
33
+ ])
34
+ return transform(image).unsqueeze(0) # Add batch dimension
35
+
36
+ # ----------------------------------------------------------------------------
37
+ # ARGUMENT PARSING
38
+ # ----------------------------------------------------------------------------
39
+ def parse_args():
40
+ parser = argparse.ArgumentParser(description='CLIP-D single image detector')
41
+ parser.add_argument('--image', type=str, required=True, help='Path to input image')
42
+ parser.add_argument('--model', type=str, default='pretrained', help='Name of the model checkpoint directory')
43
+ parser.add_argument('--output', type=str, help='Path to save detection result JSON')
44
+ parser.add_argument('--device', type=str, help='Device to run on (e.g., cuda:0, cuda:1, cpu)')
45
+ return parser.parse_args()
46
+
47
+ def main():
48
+ args = parse_args()
49
+
50
+ # Setup device
51
+ device = torch.device(args.device) if args.device else get_device()
52
+
53
+ # Load model
54
+ try:
55
+ load_path = f'./detectors/CLIP-D/checkpoint/{args.model}/weights/best.pt'
56
+
57
+ if not os.path.exists(load_path):
58
+ raise FileNotFoundError(f"Model weights not found at: {load_path}")
59
+
60
+ checkpoint = torch.load(load_path, map_location=device)
61
+ # Initialize model and load state
62
+ model = create_architecture("opencliplinearnext_clipL14commonpool", pretrained=False, num_classes=1).to(device)
63
+ if 'model' in checkpoint:
64
+ model.load_state_dict(checkpoint['model'])
65
+ else:
66
+ model.load_state_dict(checkpoint)
67
+ model.eval()
68
+ except Exception as e:
69
+ print(f"Error loading model: {e}")
70
+ return
71
+
72
+ # Load and preprocess image
73
+ try:
74
+ image_tensor = preprocess_image(args.image)
75
+ image_tensor = image_tensor.to(device)
76
+ except Exception as e:
77
+ print(f"Error loading image: {e}")
78
+ return
79
+
80
+ # Run detection
81
+ start_time = time.time()
82
+ with torch.no_grad():
83
+ try:
84
+ score = model(image_tensor)
85
+ prediction = torch.sigmoid(score)
86
+
87
+ confidence = prediction.item()
88
+
89
+ result = format_result(
90
+ 'fake' if confidence>0.5 else 'real',
91
+ confidence,
92
+ time.time() - start_time
93
+ )
94
+
95
+ # Print result
96
+ print(f"Prediction: {result['prediction']}")
97
+ print(f"Confidence: {result['confidence']:.4f}")
98
+ print(f"Time: {result['elapsed_time']:.3f}s")
99
+
100
+ # Save result if output path provided
101
+ if args.output:
102
+ save_result(result, args.output)
103
+
104
+ except Exception as e:
105
+ print(f"Error during detection: {e}")
106
+ return
107
+
108
+ if __name__ == '__main__':
109
+ main()
detectors/CLIP-D/networks/__init__.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Copyright 2024 Image Processing Research Group of University Federico
3
+ II of Naples ('GRIP-UNINA'). All rights reserved.
4
+
5
+ Licensed under the Apache License, Version 2.0 (the "License");
6
+ you may not use this file except in compliance with the License.
7
+ You may obtain a copy of the License at
8
+
9
+ http://www.apache.org/licenses/LICENSE-2.0
10
+
11
+ Unless required by applicable law or agreed to in writing, software
12
+ distributed under the License is distributed on an "AS IS" BASIS,
13
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ See the License for the specific language governing permissions and
15
+ limitations under the License.
16
+ '''
17
+
18
+ def create_architecture(name_arch, pretrained=False, num_classes=1):
19
+ if name_arch == "res50nodown":
20
+ from .resnet_mod import resnet50
21
+
22
+ if pretrained:
23
+ model = resnet50(pretrained=True, stride0=1, dropout=0.5).change_output(num_classes)
24
+ else:
25
+ model = resnet50(num_classes=num_classes, stride0=1, dropout=0.5)
26
+ elif name_arch == "res50":
27
+ from .resnet_mod import resnet50
28
+
29
+ if pretrained:
30
+ model = resnet50(pretrained=True, stride0=2).change_output(num_classes)
31
+ else:
32
+ model = resnet50(num_classes=num_classes, stride0=2)
33
+ elif name_arch.startswith('opencliplinear_'):
34
+ from .openclipnet import OpenClipLinear
35
+ model = OpenClipLinear(num_classes=num_classes, pretrain=name_arch[15:], normalize=True)
36
+ elif name_arch.startswith('opencliplinearnext_'):
37
+ from .openclipnet import OpenClipLinear
38
+ model = OpenClipLinear(num_classes=num_classes, pretrain=name_arch[19:], normalize=True, next_to_last=True)
39
+ else:
40
+ assert False
41
+ return model
42
+
43
+ def count_parameters(model):
44
+ return sum(p.numel() for p in model.parameters() if p.requires_grad)
45
+
46
+ def load_weights(model, model_path):
47
+ from torch import load
48
+ dat = load(model_path, map_location='cpu')
49
+ if 'model' in dat:
50
+ if ('module._conv_stem.weight' in dat['model']) or \
51
+ ('module.fc.fc1.weight' in dat['model']) or \
52
+ ('module.fc.weight' in dat['model']):
53
+ model.load_state_dict(
54
+ {key[7:]: dat['model'][key] for key in dat['model']})
55
+ else:
56
+ model.load_state_dict(dat['model'])
57
+ elif 'state_dict' in dat:
58
+ model.load_state_dict(dat['state_dict'])
59
+ elif 'net' in dat:
60
+ model.load_state_dict(dat['net'])
61
+ elif 'main.0.weight' in dat:
62
+ model.load_state_dict(dat)
63
+ elif '_fc.weight' in dat:
64
+ model.load_state_dict(dat)
65
+ elif 'conv1.weight' in dat:
66
+ model.load_state_dict(dat)
67
+ else:
68
+ print(list(dat.keys()))
69
+ assert False
70
+ return model
detectors/CLIP-D/networks/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (2.96 kB). View file
 
detectors/CLIP-D/networks/__pycache__/openclipnet.cpython-310.pyc ADDED
Binary file (3.81 kB). View file
 
detectors/CLIP-D/networks/__pycache__/resnet_mod.cpython-310.pyc ADDED
Binary file (9.57 kB). View file
 
detectors/CLIP-D/networks/openclipnet.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Copyright 2024 Image Processing Research Group of University Federico
3
+ II of Naples ('GRIP-UNINA'). All rights reserved.
4
+
5
+ Licensed under the Apache License, Version 2.0 (the "License");
6
+ you may not use this file except in compliance with the License.
7
+ You may obtain a copy of the License at
8
+
9
+ http://www.apache.org/licenses/LICENSE-2.0
10
+
11
+ Unless required by applicable law or agreed to in writing, software
12
+ distributed under the License is distributed on an "AS IS" BASIS,
13
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ See the License for the specific language governing permissions and
15
+ limitations under the License.
16
+ '''
17
+
18
+ import torch
19
+ import torch.nn as nn
20
+ import torch.nn.functional as F
21
+ import open_clip
22
+ from .resnet_mod import ChannelLinear
23
+
24
+ dict_pretrain = {
25
+ 'clipL14openai' : ('ViT-L-14', 'openai'),
26
+ 'clipL14laion400m' : ('ViT-L-14', 'laion400m_e32'),
27
+ 'clipL14laion2B' : ('ViT-L-14', 'laion2b_s32b_b82k'),
28
+ 'clipL14datacomp' : ('ViT-L-14', 'laion/CLIP-ViT-L-14-DataComp.XL-s13B-b90K', 'open_clip_pytorch_model.bin'),
29
+ 'clipL14commonpool' : ('ViT-L-14', "laion/CLIP-ViT-L-14-CommonPool.XL-s13B-b90K", 'open_clip_pytorch_model.bin'),
30
+ 'clipaL14datacomp' : ('ViT-L-14-CLIPA', 'datacomp1b'),
31
+ 'cocaL14laion2B' : ('coca_ViT-L-14', 'laion2b_s13b_b90k'),
32
+ 'clipg14laion2B' : ('ViT-g-14', 'laion2b_s34b_b88k'),
33
+ 'eva2L14merged2b' : ('EVA02-L-14', 'merged2b_s4b_b131k'),
34
+ 'clipB16laion2B' : ('ViT-B-16', 'laion2b_s34b_b88k'),
35
+ }
36
+
37
+
38
+ class OpenClipLinear(nn.Module):
39
+ def __init__(self, num_classes=1, pretrain='clipL14commonpool', normalize=True, next_to_last=False):
40
+ super(OpenClipLinear, self).__init__()
41
+
42
+ # Modified to handle download failures gracefully
43
+ # The checkpoint only contains fc weights, so we need the pretrained backbone
44
+ if len(dict_pretrain[pretrain])==2:
45
+ try:
46
+ backbone = open_clip.create_model(dict_pretrain[pretrain][0], pretrained=dict_pretrain[pretrain][1])
47
+ except Exception as e:
48
+ print(f"WARNING: Could not download pretrained weights ({e}). Using random initialization.")
49
+ backbone = open_clip.create_model(dict_pretrain[pretrain][0], pretrained=None)
50
+ else:
51
+ try:
52
+ from huggingface_hub import hf_hub_download
53
+ backbone = open_clip.create_model(dict_pretrain[pretrain][0], pretrained=hf_hub_download(*dict_pretrain[pretrain][1:]))
54
+ except Exception as e:
55
+ print(f"WARNING: Could not download pretrained weights ({e}). Using random initialization.")
56
+ backbone = open_clip.create_model(dict_pretrain[pretrain][0], pretrained=None)
57
+
58
+ if next_to_last:
59
+ self.num_features = backbone.visual.proj.shape[0]
60
+ backbone.visual.proj = None
61
+ else:
62
+ self.num_features = backbone.visual.output_dim
63
+
64
+ self.bb = [backbone, ]
65
+ self.normalize = normalize
66
+
67
+ self.fc = ChannelLinear(self.num_features, num_classes)
68
+ torch.nn.init.normal_(self.fc.weight.data, 0.0, 0.02)
69
+
70
+ def to(self, *args, **kwargs):
71
+ self.bb[0].to(*args, **kwargs)
72
+ super(OpenClipLinear, self).to(*args, **kwargs)
73
+ return self
74
+
75
+ def forward_features(self, x):
76
+ with torch.no_grad():
77
+ self.bb[0].eval()
78
+ features = self.bb[0].encode_image(x, normalize=self.normalize)
79
+ return features
80
+
81
+ def forward_head(self, x):
82
+ return self.fc(x)
83
+
84
+ def forward(self, x):
85
+ return self.forward_head(self.forward_features(x))
detectors/CLIP-D/networks/resnet_mod.py ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Copyright 2024 Image Processing Research Group of University Federico
3
+ II of Naples ('GRIP-UNINA'). All rights reserved.
4
+
5
+ Licensed under the Apache License, Version 2.0 (the "License");
6
+ you may not use this file except in compliance with the License.
7
+ You may obtain a copy of the License at
8
+
9
+ http://www.apache.org/licenses/LICENSE-2.0
10
+
11
+ Unless required by applicable law or agreed to in writing, software
12
+ distributed under the License is distributed on an "AS IS" BASIS,
13
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ See the License for the specific language governing permissions and
15
+ limitations under the License.
16
+ '''
17
+
18
+ import torch
19
+ import torch.nn as nn
20
+ import torch.utils.model_zoo as model_zoo
21
+
22
+ __all__ = ["ResNet", "resnet18", "resnet34", "resnet50", "resnet101", "resnet152"]
23
+
24
+
25
+ model_urls = {
26
+ "resnet18": "https://download.pytorch.org/models/resnet18-5c106cde.pth",
27
+ "resnet34": "https://download.pytorch.org/models/resnet34-333f7ec4.pth",
28
+ "resnet50": "https://download.pytorch.org/models/resnet50-19c8e357.pth",
29
+ "resnet101": "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth",
30
+ "resnet152": "https://download.pytorch.org/models/resnet152-b121ed2d.pth",
31
+ }
32
+
33
+ class ChannelLinear(nn.Linear):
34
+ def __init__(
35
+ self, in_features: int, out_features: int, bias: bool = True, pool=None
36
+ ) -> None:
37
+ super(ChannelLinear, self).__init__(in_features, out_features, bias)
38
+ self.compute_axis = 1
39
+ self.pool = pool
40
+
41
+ def forward(self, x):
42
+ axis_ref = len(x.shape) - 1
43
+ x = torch.transpose(x, self.compute_axis, axis_ref)
44
+ out_shape = list(x.shape)
45
+ out_shape[-1] = self.out_features
46
+ x = x.reshape(-1, x.shape[-1])
47
+ x = x.matmul(self.weight.t())
48
+ if self.bias is not None:
49
+ x = x + self.bias[None, :]
50
+ x = torch.transpose(x.view(out_shape), axis_ref, self.compute_axis)
51
+ if self.pool is not None:
52
+ x = self.pool(x)
53
+ return x
54
+
55
+
56
+ def conv3x3(in_planes, out_planes, stride=1, padding=1):
57
+ """3x3 convolution with padding"""
58
+ return nn.Conv2d(
59
+ in_planes, out_planes, kernel_size=3, stride=stride, padding=padding, bias=False
60
+ )
61
+
62
+
63
+ def conv1x1(in_planes, out_planes, stride=1):
64
+ """1x1 convolution"""
65
+ return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
66
+
67
+
68
+ class BasicBlock(nn.Module):
69
+ expansion = 1
70
+
71
+ def __init__(self, inplanes, planes, stride=1, padding=1, downsample=None):
72
+ super(BasicBlock, self).__init__()
73
+ self.conv1 = conv3x3(inplanes, planes, stride, padding=padding)
74
+ self.bn1 = nn.BatchNorm2d(planes)
75
+ self.relu = nn.ReLU(inplace=True)
76
+ self.conv2 = conv3x3(planes, planes, padding=padding)
77
+ self.bn2 = nn.BatchNorm2d(planes)
78
+ self.downsample = downsample
79
+ self.stride = stride
80
+ self.padding = padding
81
+
82
+ def forward(self, x):
83
+ identity = x
84
+
85
+ out = self.conv1(x)
86
+ out = self.bn1(out)
87
+ out = self.relu(out)
88
+
89
+ out = self.conv2(out)
90
+ out = self.bn2(out)
91
+
92
+ if self.padding == 0:
93
+ identity = identity[..., 1:-1, 1:-1]
94
+ if self.downsample is not None:
95
+ identity = self.downsample(identity)
96
+ if self.padding == 0:
97
+ identity = identity[..., 1:-1, 1:-1]
98
+
99
+ out += identity
100
+ out = self.relu(out)
101
+
102
+ return out
103
+
104
+
105
+ class Bottleneck(nn.Module):
106
+ expansion = 4
107
+
108
+ def __init__(self, inplanes, planes, stride=1, padding=1, downsample=None):
109
+ super(Bottleneck, self).__init__()
110
+ self.conv1 = conv1x1(inplanes, planes)
111
+ self.bn1 = nn.BatchNorm2d(planes)
112
+ self.conv2 = conv3x3(planes, planes, stride, padding=padding)
113
+ self.bn2 = nn.BatchNorm2d(planes)
114
+ self.conv3 = conv1x1(planes, planes * self.expansion)
115
+ self.bn3 = nn.BatchNorm2d(planes * self.expansion)
116
+ self.relu = nn.ReLU(inplace=True)
117
+ self.downsample = downsample
118
+ self.stride = stride
119
+ self.padding = padding
120
+
121
+ def forward(self, x):
122
+ identity = x
123
+
124
+ out = self.conv1(x)
125
+ out = self.bn1(out)
126
+ out = self.relu(out)
127
+
128
+ out = self.conv2(out)
129
+ out = self.bn2(out)
130
+ out = self.relu(out)
131
+
132
+ out = self.conv3(out)
133
+ out = self.bn3(out)
134
+
135
+ if self.padding == 0:
136
+ identity = identity[..., 1:-1, 1:-1]
137
+ if self.downsample is not None:
138
+ identity = self.downsample(identity)
139
+
140
+ out += identity
141
+ out = self.relu(out)
142
+
143
+ return out
144
+
145
+ class ResNet(nn.Module):
146
+ def __init__(
147
+ self,
148
+ block,
149
+ layers,
150
+ num_classes=1000,
151
+ zero_init_residual=False,
152
+ stride0=2,
153
+ padding=1,
154
+ dropout=0.0,
155
+ gap_size=None,
156
+ ):
157
+ super(ResNet, self).__init__()
158
+ self.inplanes = 64
159
+
160
+ self.conv1 = nn.Conv2d(
161
+ 3, 64, kernel_size=7, stride=stride0, padding=3 * padding, bias=False
162
+ )
163
+ self.bn1 = nn.BatchNorm2d(64)
164
+ if dropout > 0:
165
+ self.dropout = nn.Dropout(dropout)
166
+ else:
167
+ self.dropout = None
168
+ self.relu = nn.ReLU(inplace=True)
169
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=stride0, padding=padding)
170
+ self.layer1 = self._make_layer(block, 64, layers[0], padding=padding)
171
+ self.layer2 = self._make_layer(block, 128, layers[1], stride=2, padding=padding)
172
+ self.layer3 = self._make_layer(block, 256, layers[2], stride=2, padding=padding)
173
+ self.layer4 = self._make_layer(block, 512, layers[3], stride=2, padding=padding)
174
+
175
+ if gap_size is None:
176
+ self.gap_size = None
177
+ self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
178
+ elif gap_size < 0:
179
+ with torch.no_grad():
180
+ y = self.forward_features(
181
+ torch.zeros((1, 3, -gap_size, -gap_size), dtype=torch.float32)
182
+ ).shape
183
+ print("gap_size:", -gap_size, ">>", y[-1])
184
+ self.gap_size = y[-1]
185
+ self.avgpool = nn.AvgPool2d(kernel_size=self.gap_size, stride=1, padding=0)
186
+ elif gap_size == 1:
187
+ self.gap_size = gap_size
188
+ self.avgpool = None
189
+ else:
190
+ self.gap_size = gap_size
191
+ self.avgpool = nn.AvgPool2d(kernel_size=self.gap_size, stride=1, padding=0)
192
+ self.num_features = 512 * block.expansion
193
+ self.fc = ChannelLinear(self.num_features, num_classes)
194
+
195
+ for m in self.modules():
196
+ if isinstance(m, nn.Conv2d):
197
+ nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
198
+ elif isinstance(m, nn.BatchNorm2d):
199
+ nn.init.constant_(m.weight, 1)
200
+ nn.init.constant_(m.bias, 0)
201
+
202
+ if zero_init_residual:
203
+ for m in self.modules():
204
+ if isinstance(m, Bottleneck):
205
+ nn.init.constant_(m.bn3.weight, 0)
206
+ elif isinstance(m, BasicBlock):
207
+ nn.init.constant_(m.bn2.weight, 0)
208
+
209
+ def _make_layer(self, block, planes, blocks, stride=1, padding=1):
210
+ downsample = None
211
+ if stride != 1 or self.inplanes != planes * block.expansion:
212
+ downsample = nn.Sequential(
213
+ conv1x1(self.inplanes, planes * block.expansion, stride),
214
+ nn.BatchNorm2d(planes * block.expansion),
215
+ )
216
+
217
+ layers = []
218
+ layers.append(
219
+ block(
220
+ self.inplanes,
221
+ planes,
222
+ stride=stride,
223
+ downsample=downsample,
224
+ padding=padding,
225
+ )
226
+ )
227
+ self.inplanes = planes * block.expansion
228
+ for _ in range(1, blocks):
229
+ layers.append(block(self.inplanes, planes, padding=padding))
230
+
231
+ return nn.Sequential(*layers)
232
+
233
+ def change_output(self, num_classes):
234
+ self.fc = ChannelLinear(self.num_features, num_classes)
235
+ torch.nn.init.normal_(self.fc.weight.data, 0.0, 0.02)
236
+ return self
237
+
238
+ def change_input(self, num_inputs):
239
+ data = self.conv1.weight.data
240
+ old_num_inputs = int(data.shape[1])
241
+ if num_inputs > old_num_inputs:
242
+ times = num_inputs // old_num_inputs
243
+ if (times * old_num_inputs) < num_inputs:
244
+ times = times + 1
245
+ data = data.repeat(1, times, 1, 1) / times
246
+ elif num_inputs == old_num_inputs:
247
+ return self
248
+
249
+ data = data[:, :num_inputs, :, :]
250
+ print(self.conv1.weight.data.shape, "->", data.shape)
251
+ self.conv1.weight.data = data
252
+
253
+ return self
254
+
255
+ def forward_features(self, x):
256
+ x = self.conv1(x)
257
+ x = self.bn1(x)
258
+ x = self.relu(x)
259
+ x = self.maxpool(x)
260
+
261
+ x = self.layer1(x)
262
+ x = self.layer2(x)
263
+ x = self.layer3(x)
264
+ x = self.layer4(x)
265
+ return x
266
+
267
+ def forward_head(self, x):
268
+ if self.avgpool is not None:
269
+ x = self.avgpool(x)
270
+ if self.dropout is not None:
271
+ x = self.dropout(x)
272
+ y = self.fc(x)
273
+ if self.gap_size is None:
274
+ y = torch.squeeze(torch.squeeze(y, -1), -1)
275
+ return y
276
+
277
+ def forward(self, x):
278
+ x = self.forward_features(x)
279
+ x = self.forward_head(x)
280
+ return x
281
+
282
+
283
+ def resnet18(pretrained=False, **kwargs):
284
+ """Constructs a ResNet-18 model.
285
+ Args:
286
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
287
+ """
288
+ model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
289
+ if pretrained:
290
+ model.load_state_dict(model_zoo.load_url(model_urls["resnet18"]))
291
+ return model
292
+
293
+
294
+ def resnet34(pretrained=False, **kwargs):
295
+ """Constructs a ResNet-34 model.
296
+ Args:
297
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
298
+ """
299
+ model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
300
+ if pretrained:
301
+ model.load_state_dict(model_zoo.load_url(model_urls["resnet34"]))
302
+ return model
303
+
304
+
305
+ def resnet50(pretrained=False, **kwargs):
306
+ """Constructs a ResNet-50 model.
307
+ Args:
308
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
309
+ """
310
+ model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
311
+ if pretrained:
312
+ model.load_state_dict(model_zoo.load_url(model_urls["resnet50"]))
313
+ return model
314
+
315
+
316
+ def resnet101(pretrained=False, **kwargs):
317
+ """Constructs a ResNet-101 model.
318
+ Args:
319
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
320
+ """
321
+ model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
322
+ if pretrained:
323
+ model.load_state_dict(model_zoo.load_url(model_urls["resnet101"]))
324
+ return model
325
+
326
+
327
+ def resnet152(pretrained=False, **kwargs):
328
+ """Constructs a ResNet-152 model.
329
+ Args:
330
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
331
+ """
332
+ model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
333
+ if pretrained:
334
+ model.load_state_dict(model_zoo.load_url(model_urls["resnet152"]))
335
+ return model
detectors/CLIP-D/parser.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+
3
+ def get_parser():
4
+ parser = argparse.ArgumentParser()
5
+ parser.add_argument("--name", type=str, default="test", help="run name")
6
+ parser.add_argument("--arch", type=str, default="opencliplinearnext_clipL14commonpresool", help="architecture name")
7
+
8
+ parser.add_argument("--task", type=str, help="Task: train/test")
9
+ parser.add_argument("--device", type=str, default="cuda:0", help="cuda device to use")
10
+
11
+ parser.add_argument("--split_file", type=str, help="Path to split json")
12
+ parser.add_argument("--data_root", type=str, help="Path to dataset")
13
+ parser.add_argument("--data_keys", type=str, help="Dataset specifications")
14
+
15
+ parser.add_argument("--batch_size", type=int, default=64, help='Dataloader batch size')
16
+ parser.add_argument("--num_threads", type=int, default=14, help='# threads for loading data')
17
+
18
+ parser.add_argument("--lr", type=float, default=0.0001, help="initial learning rate")
19
+ parser.add_argument("--weight_decay", type=float, default=0.0, help="weight decay")
20
+ parser.add_argument("--beta1", type=float, default=0.9, help="momentum term of adam")
21
+
22
+ parser.add_argument("--num_epoches", type=int, default=1000, help="# of epoches at starting learning rate")
23
+ parser.add_argument("--earlystop_epoch", type=int, default=5, help="Number of epochs without loss reduction before lowering the learning rate")
24
+
25
+ return parser
detectors/CLIP-D/test.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from tqdm import tqdm
3
+ import torch
4
+ import pandas as pd
5
+ import json
6
+ import time
7
+ import numpy as np
8
+ from sklearn.metrics import roc_auc_score, accuracy_score
9
+ from networks import create_architecture, count_parameters
10
+ from utils.dataset import create_dataloader
11
+ from utils.processing import add_processing_arguments
12
+ from parser import get_parser
13
+
14
+ def test(loader, model, settings, device):
15
+ model.eval()
16
+
17
+ start_time = time.time()
18
+
19
+ # File paths
20
+ output_dir = f'./results/{settings.name}/data/{settings.data_keys}'
21
+ os.makedirs(output_dir, exist_ok=True)
22
+
23
+ csv_filename = os.path.join(output_dir, 'results.csv')
24
+ metrics_filename = os.path.join(output_dir, 'metrics.json')
25
+ image_results_filename = os.path.join(output_dir, 'image_results.json')
26
+
27
+ # Collect all results
28
+ all_scores = []
29
+ all_labels = []
30
+ all_paths = []
31
+ image_results = []
32
+
33
+ # Extract training dataset keys from model name (format: "training_keys_freeze_down" or "training_keys")
34
+ training_dataset_keys = []
35
+ model_name = settings.name
36
+ if '_freeze_down' in model_name:
37
+ training_name = model_name.replace('_freeze_down', '')
38
+ else:
39
+ training_name = model_name
40
+ if '&' in training_name:
41
+ training_dataset_keys = training_name.split('&')
42
+ else:
43
+ training_dataset_keys = [training_name]
44
+
45
+ # Write CSV header
46
+ with open(csv_filename, 'w') as f:
47
+ f.write(f"{','.join(['name', 'pro', 'flag'])}\n")
48
+
49
+ with torch.no_grad():
50
+ with tqdm(loader, unit='batch', mininterval=0.5) as tbatch:
51
+ tbatch.set_description(f'Validation')
52
+ for data_dict in tbatch:
53
+ data = data_dict['img'].to(device)
54
+ labels = data_dict['target'].to(device)
55
+ paths = data_dict['path']
56
+
57
+ scores = model(data).squeeze(1)
58
+
59
+ # Collect results
60
+ for score, label, path in zip(scores, labels, paths):
61
+ score_val = score.item()
62
+ label_val = label.item()
63
+
64
+ all_scores.append(score_val)
65
+ all_labels.append(label_val)
66
+ all_paths.append(path)
67
+
68
+ image_results.append({
69
+ 'path': path,
70
+ 'score': score_val,
71
+ 'label': label_val
72
+ })
73
+
74
+ # Write to CSV (maintain backward compatibility)
75
+ with open(csv_filename, 'a') as f:
76
+ for score, label, path in zip(scores, labels, paths):
77
+ f.write(f"{path}, {score.item()}, {label.item()}\n")
78
+
79
+ # Calculate metrics
80
+ all_scores = np.array(all_scores)
81
+ all_labels = np.array(all_labels)
82
+
83
+ # Convert scores to predictions (threshold at 0, as used in train.py: y_pred > 0.0)
84
+ predictions = (all_scores > 0).astype(int)
85
+
86
+ # Calculate overall metrics
87
+ total_accuracy = accuracy_score(all_labels, predictions)
88
+
89
+ # TPR (True Positive Rate) = TP / (TP + FN) = accuracy on fake images (label==1)
90
+ fake_mask = all_labels == 1
91
+ if fake_mask.sum() > 0:
92
+ tpr = accuracy_score(all_labels[fake_mask], predictions[fake_mask])
93
+ else:
94
+ tpr = 0.0
95
+
96
+ # TNR per dataset key (True Negative Rate) = TN / (TN + FP) = accuracy on real images (label==0)
97
+ tnr_per_dataset = {}
98
+
99
+ # Calculate TNR on real images (label==0) in the test set
100
+ real_mask = all_labels == 0
101
+ if real_mask.sum() > 0:
102
+ # Overall TNR calculated on all real images in the test set
103
+ tnr = accuracy_score(all_labels[real_mask], predictions[real_mask])
104
+ else:
105
+ tnr = 0.0
106
+
107
+ # Map TNR to training dataset keys (as shown in the example JSON structure)
108
+ # The TNR is calculated on the test set, but organized by training dataset keys
109
+ #for training_key in training_dataset_keys:
110
+ # tnr_per_dataset[training_key] = overall_tnr
111
+
112
+ # AUC calculation (needs probabilities, so we'll use sigmoid on scores)
113
+ if len(np.unique(all_labels)) > 1: # Need both classes for AUC
114
+ # Apply sigmoid to convert scores to probabilities
115
+ probabilities = torch.sigmoid(torch.tensor(all_scores)).numpy()
116
+ auc = roc_auc_score(all_labels, probabilities)
117
+ else:
118
+ auc = 0.0
119
+
120
+ execution_time = time.time() - start_time
121
+
122
+ # Prepare metrics JSON
123
+ metrics = {
124
+ 'TPR': float(tpr),
125
+ 'TNR': float(tnr),
126
+ 'Acc total': float(total_accuracy),
127
+ 'AUC': float(auc),
128
+ 'execution time': float(execution_time)
129
+ }
130
+
131
+ # Write metrics JSON
132
+ with open(metrics_filename, 'w') as f:
133
+ json.dump(metrics, f, indent=2)
134
+
135
+ # Write individual image results JSON
136
+ with open(image_results_filename, 'w') as f:
137
+ json.dump(image_results, f, indent=2)
138
+
139
+ print(f'\nMetrics saved to {metrics_filename}')
140
+ print(f'Image results saved to {image_results_filename}')
141
+ print(f'\nMetrics:')
142
+ print(f' TPR: {tpr:.4f}')
143
+ print(f' TNR: {tnr:.4f}')
144
+ print(f' Accuracy: {total_accuracy:.4f}')
145
+ print(f' AUC: {auc:.4f}')
146
+ print(f' Execution time: {execution_time:.2f} seconds')
147
+
148
+ if __name__ == '__main__':
149
+ parser = get_parser()
150
+ parser = add_processing_arguments(parser)
151
+ settings = parser.parse_args()
152
+
153
+ device = torch.device(settings.device if torch.cuda.is_available() else 'cpu')
154
+
155
+ test_dataloader = create_dataloader(settings, split='test')
156
+
157
+ model = create_architecture(settings.arch, pretrained=True, num_classes=1).to(device)
158
+ num_parameters = count_parameters(model)
159
+ print(f"Arch: {settings.arch} with #parameters {num_parameters}")
160
+
161
+ load_path = f'./checkpoint/{settings.name}/weights/best.pt'
162
+
163
+ print('loading the model from %s' % load_path)
164
+ model.load_state_dict(torch.load(load_path, map_location=device)['model'])
165
+ model.to(device)
166
+
167
+ test(test_dataloader, model, settings, device)
detectors/CLIP-D/train.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tqdm
3
+ from utils import TrainingModel, create_dataloader, EarlyStopping
4
+ from sklearn.metrics import balanced_accuracy_score, roc_auc_score
5
+ from utils.processing import add_processing_arguments
6
+ from parser import get_parser
7
+
8
+ if __name__ == "__main__":
9
+ parser = get_parser()
10
+ parser = add_processing_arguments(parser)
11
+
12
+ opt = parser.parse_args()
13
+
14
+ os.makedirs(os.path.join('checkpoint', opt.name,'weights'), exist_ok=True)
15
+
16
+ valid_data_loader = create_dataloader(opt, split="val")
17
+ train_data_loader = create_dataloader(opt, split="train")
18
+ print()
19
+ print("# validation batches = %d" % len(valid_data_loader))
20
+ print("# training batches = %d" % len(train_data_loader))
21
+ model = TrainingModel(opt)
22
+ early_stopping = None
23
+ start_epoch = model.total_steps // len(train_data_loader)
24
+ print()
25
+
26
+ for epoch in range(start_epoch, opt.num_epoches+1):
27
+ if epoch > start_epoch:
28
+ # Training
29
+ pbar = tqdm.tqdm(train_data_loader)
30
+ for data in pbar:
31
+ loss = model.train_on_batch(data).item()
32
+ total_steps = model.total_steps
33
+ pbar.set_description(f"Train loss: {loss:.4f}")
34
+
35
+ # Save model
36
+ model.save_networks(epoch)
37
+
38
+ # Validation
39
+ print("Validation ...", flush=True)
40
+ y_true, y_pred, y_path = model.predict(valid_data_loader)
41
+ acc = balanced_accuracy_score(y_true, y_pred > 0.0)
42
+ auc = roc_auc_score(y_true, y_pred)
43
+ lr = model.get_learning_rate()
44
+ print("After {} epoches: val acc = {}; val auc = {}".format(epoch, acc, auc), flush=True)
45
+
46
+ # Early Stopping
47
+ if early_stopping is None:
48
+ early_stopping = EarlyStopping(
49
+ init_score=acc, patience=opt.earlystop_epoch,
50
+ delta=0.001, verbose=True,
51
+ )
52
+ print('Save best model', flush=True)
53
+ model.save_networks('best')
54
+ else:
55
+ if early_stopping(acc):
56
+ print('Save best model', flush=True)
57
+ model.save_networks('best')
58
+ if early_stopping.early_stop:
59
+ cont_train = model.adjust_learning_rate()
60
+ if cont_train:
61
+ print("Learning rate dropped by 10, continue training ...", flush=True)
62
+ early_stopping.reset_counter()
63
+ else:
64
+ print("Early stopping.", flush=True)
65
+ break
detectors/CLIP-D/utils/__init__.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Copyright 2024 Image Processing Research Group of University Federico
3
+ II of Naples ('GRIP-UNINA'). All rights reserved.
4
+
5
+ Licensed under the Apache License, Version 2.0 (the "License");
6
+ you may not use this file except in compliance with the License.
7
+ You may obtain a copy of the License at
8
+
9
+ http://www.apache.org/licenses/LICENSE-2.0
10
+
11
+ Unless required by applicable law or agreed to in writing, software
12
+ distributed under the License is distributed on an "AS IS" BASIS,
13
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ See the License for the specific language governing permissions and
15
+ limitations under the License.
16
+ '''
17
+
18
+ from .dataset import create_dataloader
19
+ from .training import TrainingModel
20
+ class EarlyStopping:
21
+ def __init__(self, init_score=None, patience=1, verbose=False, delta=0):
22
+ self.best_score = init_score
23
+ self.patience = patience
24
+ self.delta = delta
25
+ self.verbose = verbose
26
+ self.count_down = self.patience
27
+ self.early_stop = False
28
+
29
+ def __call__(self, score):
30
+ if self.best_score is None:
31
+ if self.verbose:
32
+ print(f'Score set to {score:.6f}.')
33
+ self.best_score = score
34
+ self.count_down = self.patience
35
+ return True
36
+ elif score <= self.best_score + self.delta:
37
+ self.count_down -= 1
38
+ if self.verbose:
39
+ print(f'EarlyStopping count_down: {self.count_down} on {self.patience}')
40
+ if self.count_down <= 0:
41
+ self.early_stop = True
42
+ return False
43
+ else:
44
+ if self.verbose:
45
+ print(f'Score increased from ({self.best_score:.6f} to {score:.6f}).')
46
+ self.best_score = score
47
+ self.count_down = self.patience
48
+ return True
49
+
50
+ def reset_counter(self):
51
+ self.count_down = self.patience
52
+ self.early_stop = False
detectors/CLIP-D/utils/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (2.26 kB). View file
 
detectors/CLIP-D/utils/__pycache__/dataset.cpython-310.pyc ADDED
Binary file (5.22 kB). View file
 
detectors/CLIP-D/utils/__pycache__/processing.cpython-310.pyc ADDED
Binary file (3.5 kB). View file
 
detectors/CLIP-D/utils/__pycache__/training.cpython-310.pyc ADDED
Binary file (4.21 kB). View file
 
detectors/CLIP-D/utils/dataset.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Copyright 2024 Image Processing Research Group of University Federico
3
+ II of Naples ('GRIP-UNINA'). All rights reserved.
4
+
5
+ Licensed under the Apache License, Version 2.0 (the "License");
6
+ you may not use this file except in compliance with the License.
7
+ You may obtain a copy of the License at
8
+
9
+ http://www.apache.org/licenses/LICENSE-2.0
10
+
11
+ Unless required by applicable law or agreed to in writing, software
12
+ distributed under the License is distributed on an "AS IS" BASIS,
13
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ See the License for the specific language governing permissions and
15
+ limitations under the License.
16
+ '''
17
+
18
+ import os
19
+ import json
20
+ import torch
21
+ import bisect
22
+ import numpy as np
23
+ from torch.utils.data.sampler import WeightedRandomSampler, RandomSampler
24
+ from torchvision import datasets
25
+ from .processing import make_processing
26
+
27
+ from PIL import Image, ImageFile
28
+ ImageFile.LOAD_TRUNCATED_IMAGES = True
29
+
30
+ def create_dataloader(opt, split=None):
31
+ if split == "train":
32
+ opt.split = 'train'
33
+ is_train=True
34
+
35
+ elif split == "val":
36
+ opt.split = 'val'
37
+ is_train=False
38
+
39
+ elif split == "test":
40
+ opt.split = 'test'
41
+ is_train=False
42
+
43
+ else:
44
+ raise ValueError(f"Unknown split {split}")
45
+
46
+ dataset = TrueFake_dataset(opt)
47
+
48
+ data_loader = torch.utils.data.DataLoader(
49
+ dataset,
50
+ batch_size=opt.batch_size,
51
+ shuffle=is_train,
52
+ num_workers=int(opt.num_threads),
53
+ )
54
+ return data_loader
55
+
56
+ def parse_dataset(settings):
57
+ gen_keys = {
58
+ 'gan1':['StyleGAN'],
59
+ 'gan2':['StyleGAN2'],
60
+ 'gan3':['StyleGAN3'],
61
+ 'sd15':['StableDiffusion1.5'],
62
+ 'sd2':['StableDiffusion2'],
63
+ 'sd3':['StableDiffusion3'],
64
+ 'sdXL':['StableDiffusionXL'],
65
+ 'flux':['FLUX.1'],
66
+ 'realFFHQ':['FFHQ'],
67
+ 'realFORLAB':['FORLAB']
68
+ }
69
+
70
+ gen_keys['all'] = [gen_keys[key][0] for key in gen_keys.keys()]
71
+ # gen_keys['gan'] = [gen_keys[key][0] for key in gen_keys.keys() if 'gan' in key]
72
+ # gen_keys['sd'] = [gen_keys[key][0] for key in gen_keys.keys() if 'sd' in key]
73
+ gen_keys['real'] = [gen_keys[key][0] for key in gen_keys.keys() if 'real' in key]
74
+
75
+ mod_keys = {
76
+ 'pre': ['PreSocial'],
77
+ 'fb': ['Facebook'],
78
+ 'tl': ['Telegram'],
79
+ 'tw': ['X'],
80
+ }
81
+
82
+ mod_keys['all'] = [mod_keys[key][0] for key in mod_keys.keys()]
83
+ mod_keys['shr'] = [mod_keys[key][0] for key in mod_keys.keys() if key in ['fb', 'tl', 'tw']]
84
+
85
+ need_real = (settings.split in ['train', 'val'] and not len([data for data in settings.data_keys.split('&') if 'real' in data.split(':')[0]]))
86
+
87
+ assert not need_real, 'Train task without real data, this will not get handeled automatically, terminating'
88
+
89
+ dataset_list = []
90
+ for data in settings.data_keys.split('&'):
91
+ gen, mod = data.split(':')
92
+ dataset_list.append({'gen':gen_keys[gen], 'mod':mod_keys[mod]})
93
+
94
+ return dataset_list
95
+
96
+ class TrueFake_dataset(datasets.DatasetFolder):
97
+ def __init__(self, settings):
98
+ self.data_root = settings.data_root
99
+ self.split = settings.split
100
+
101
+ with open(settings.split_file, "r") as f:
102
+ split_list = sorted(json.load(f)[self.split])
103
+
104
+ dataset_list = parse_dataset(settings)
105
+
106
+ self.samples = []
107
+ self.info = []
108
+ for dict in dataset_list:
109
+ generators = dict['gen']
110
+ modifiers = dict['mod']
111
+
112
+ for mod in modifiers:
113
+ for dataset_root, dataset_dirs, dataset_files in os.walk(os.path.join(self.data_root, mod), topdown=True, followlinks=True):
114
+ if len(dataset_dirs):
115
+ continue
116
+ (label, gen, sub) = f'{dataset_root}/'.replace(os.path.join(self.data_root, mod) + os.sep, '').split(os.sep)[:3][:3]
117
+
118
+ if gen in generators:
119
+ for filename in sorted(dataset_files):
120
+ if os.path.splitext(filename)[1].lower() in ['.png', '.jpg', '.jpeg']:
121
+ if self._in_list(split_list, os.path.join(gen, sub, os.path.splitext(filename)[0])):
122
+ self.samples.append(os.path.join(dataset_root, filename))
123
+ self.info.append((mod, label, gen, sub))
124
+
125
+ self.transform = make_processing(settings)
126
+ print(self.transform)
127
+
128
+ def _in_list(self, split, elem):
129
+ i = bisect.bisect_left(split, elem)
130
+ return i != len(split) and split[i] == elem
131
+
132
+ def __len__(self):
133
+ return len(self.samples)
134
+
135
+ def __getitem__(self, index):
136
+ path = self.samples[index]
137
+ mod, label, gen, sub = self.info[index]
138
+
139
+ sample = Image.open(path).convert('RGB')
140
+ sample = self.transform(sample)
141
+
142
+ target = 1.0 if label == 'Fake' else 0.0
143
+
144
+ return {'img':sample, 'target':target, 'path':path}
detectors/CLIP-D/utils/processing.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Copyright 2024 Image Processing Research Group of University Federico
3
+ II of Naples ('GRIP-UNINA'). All rights reserved.
4
+
5
+ Licensed under the Apache License, Version 2.0 (the "License");
6
+ you may not use this file except in compliance with the License.
7
+ You may obtain a copy of the License at
8
+
9
+ http://www.apache.org/licenses/LICENSE-2.0
10
+
11
+ Unless required by applicable law or agreed to in writing, software
12
+ distributed under the License is distributed on an "AS IS" BASIS,
13
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ See the License for the specific language governing permissions and
15
+ limitations under the License.
16
+ '''
17
+ import torchvision.transforms.v2 as Tv2
18
+
19
+ def make_processing(opt):
20
+ opt = parse_arguments(opt)
21
+ transforms_list = list() # list of transforms
22
+
23
+ if opt.task == 'train':
24
+ transforms_aug = make_aug(opt) # make data-augmentation transforms
25
+ if transforms_aug is not None:
26
+ transforms_list.append(transforms_aug)
27
+
28
+ transforms_post = make_post(opt) # make post-data-augmentation transforms
29
+ if transforms_post is not None:
30
+ transforms_list.append(transforms_post)
31
+
32
+ transforms_list.append(make_normalize(opt)) # make normalization
33
+
34
+ t = Tv2.Compose(transforms_list)
35
+
36
+ return t
37
+
38
+
39
+ def add_processing_arguments(parser):
40
+ # parser is an argparse.ArgumentParser
41
+ #
42
+ # ICASSP2023: --cropSize 96 --loadSize -1 --resizeSize -1 --norm_type resnet --resize_prob 0.2 --jitter_prob 0.8 --colordist_prob 0.2 --cutout_prob 0.2 --noise_prob 0.2 --blur_prob 0.5 --cmp_prob 0.5 --rot90_prob 1.0 --hpf_prob 0.0 --blur_sig 0.0,3.0 --cmp_method cv2,pil --cmp_qual 30,100 --resize_size 256 --resize_ratio 0.75
43
+ # ICME2021 : --cropSize 96 --loadSize -1 --resizeSize -1 --norm_type resnet --resize_prob 0.0 --jitter_prob 0.0 --colordist_prob 0.0 --cutout_prob 0.0 --noise_prob 0.0 --blur_prob 0.5 --cmp_prob 0.5 --rot90_prob 1.0 --hpf_prob 0.0 --blur_sig 0.0,3.0 --cmp_method cv2,pil --cmp_qual 30,100
44
+ #
45
+
46
+ parser.add_argument("--resizeSize", type=int, default=224, help="scale images to this size post augumentation")
47
+
48
+ # data-augmentation probabilities
49
+ parser.add_argument("--resize_prob", type=float, default=0.0)
50
+ parser.add_argument("--cmp_prob", type=float, default=0.0)
51
+
52
+ # data-augmentation parameters
53
+ parser.add_argument("--cmp_qual", default="75")
54
+ parser.add_argument("--resize_size", type=int, default=256)
55
+ parser.add_argument("--resize_ratio", type=float, default=1.0)
56
+
57
+ # other
58
+ parser.add_argument("--norm_type", type=str, default="clip")
59
+
60
+ return parser
61
+
62
+
63
+ def parse_arguments(opt):
64
+ if not isinstance(opt.cmp_qual, list):
65
+ opt.cmp_qual = [int(s) for s in opt.cmp_qual.split(",")]
66
+ return opt
67
+
68
+
69
+ def make_post(opt):
70
+ transforms_list = list()
71
+ if opt.resizeSize > 0:
72
+ print("\nUsing Post Resizing\n")
73
+ transforms_list.append(Tv2.Resize(opt.resizeSize, interpolation=Tv2.InterpolationMode.BICUBIC))
74
+ transforms_list.append(Tv2.CenterCrop((opt.resizeSize, opt.resizeSize)))
75
+
76
+ if len(transforms_list) == 0:
77
+ return None
78
+ else:
79
+ return Tv2.Compose(transforms_list)
80
+
81
+
82
+ def make_aug(opt):
83
+ # AUG
84
+ transforms_list_aug = list()
85
+
86
+ if (opt.resize_size > 0) and (opt.resize_prob > 0): # opt.resized_ratio
87
+ transforms_list_aug.append(
88
+ Tv2.RandomApply(
89
+ [
90
+ Tv2.RandomResizedCrop(
91
+ size=opt.resize_size,
92
+ scale=(5/8, 1.0),
93
+ ratio=(opt.resize_ratio, 1.0 / opt.resize_ratio),
94
+ )
95
+ ],
96
+ opt.resize_prob,
97
+ )
98
+ )
99
+
100
+ if opt.cmp_prob > 0:
101
+ transforms_list_aug.append(
102
+ Tv2.RandomApply(
103
+ [
104
+ Tv2.JPEG(
105
+ opt.cmp_qual
106
+ )
107
+ ],
108
+ opt.cmp_prob,
109
+ )
110
+ )
111
+
112
+ if len(transforms_list_aug) > 0:
113
+ return Tv2.Compose(transforms_list_aug)
114
+ else:
115
+ return None
116
+
117
+
118
+ def make_normalize(opt):
119
+ transforms_list = list()
120
+
121
+ if opt.norm_type == "clip":
122
+ print("normalize CLIP")
123
+ transforms_list.append(Tv2.ToTensor())
124
+ transforms_list.append(
125
+ Tv2.Normalize(
126
+ mean=(0.48145466, 0.4578275, 0.40821073),
127
+ std=(0.26862954, 0.26130258, 0.27577711),
128
+ )
129
+ )
130
+ else:
131
+ assert False
132
+
133
+ return Tv2.Compose(transforms_list)
detectors/CLIP-D/utils/training.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Copyright 2024 Image Processing Research Group of University Federico
3
+ II of Naples ('GRIP-UNINA'). All rights reserved.
4
+
5
+ Licensed under the Apache License, Version 2.0 (the "License");
6
+ you may not use this file except in compliance with the License.
7
+ You may obtain a copy of the License at
8
+
9
+ http://www.apache.org/licenses/LICENSE-2.0
10
+
11
+ Unless required by applicable law or agreed to in writing, software
12
+ distributed under the License is distributed on an "AS IS" BASIS,
13
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ See the License for the specific language governing permissions and
15
+ limitations under the License.
16
+ '''
17
+
18
+ import os
19
+ import torch
20
+ import numpy as np
21
+ import tqdm
22
+ from networks import create_architecture, count_parameters
23
+
24
+ class TrainingModel(torch.nn.Module):
25
+
26
+ def __init__(self, opt):
27
+ super(TrainingModel, self).__init__()
28
+
29
+ self.opt = opt
30
+ self.total_steps = 0
31
+ self.save_dir = os.path.join('checkpoint', opt.name,'weights')
32
+ self.device = torch.device(opt.device if torch.cuda.is_available() else 'cpu')
33
+
34
+ self.model = create_architecture(opt.arch, pretrained=True, num_classes=1)
35
+ num_parameters = count_parameters(self.model)
36
+ print(f"Arch: {opt.arch} with #trainable {num_parameters}")
37
+
38
+ self.loss_fn = torch.nn.BCEWithLogitsLoss().to(self.device)
39
+ parameters = filter(lambda p: p.requires_grad, self.model.parameters())
40
+ self.optimizer = torch.optim.Adam(parameters, lr=opt.lr, betas=(opt.beta1, 0.999), weight_decay=opt.weight_decay)
41
+
42
+ self.model.to(self.device)
43
+
44
+ def adjust_learning_rate(self, min_lr=1e-6):
45
+ for param_group in self.optimizer.param_groups:
46
+ param_group["lr"] /= 10.0
47
+ if param_group["lr"] < min_lr:
48
+ return False
49
+ return True
50
+
51
+ def get_learning_rate(self):
52
+ for param_group in self.optimizer.param_groups:
53
+ return param_group["lr"]
54
+
55
+ def train_on_batch(self, data):
56
+ self.total_steps += 1
57
+ self.model.train()
58
+ input = data['img'].to(self.device)
59
+ label = data['target'].to(self.device).float()
60
+ output = self.model(input)
61
+ if len(output.shape) == 4:
62
+ ss = output.shape
63
+ loss = self.loss_fn(
64
+ output,
65
+ label[:, None, None, None].repeat(
66
+ (1, int(ss[1]), int(ss[2]), int(ss[3]))
67
+ ),
68
+ )
69
+ else:
70
+ loss = self.loss_fn(output.squeeze(1), label)
71
+ self.optimizer.zero_grad()
72
+ loss.backward()
73
+ self.optimizer.step()
74
+ return loss.cpu()
75
+
76
+ def save_networks(self, epoch):
77
+ save_filename = f'{epoch}.pt'
78
+ save_path = os.path.join(self.save_dir, save_filename)
79
+
80
+ # serialize model and optimizer to dict
81
+ state_dict = {
82
+ 'model': self.model.state_dict(),
83
+ 'optimizer': self.optimizer.state_dict(),
84
+ 'total_steps': self.total_steps,
85
+ }
86
+
87
+ torch.save(state_dict, save_path)
88
+
89
+ def predict(self, data_loader):
90
+ model = self.model.eval()
91
+ with torch.no_grad():
92
+ y_true, y_pred, y_path = [], [], []
93
+ for data in tqdm.tqdm(data_loader):
94
+ img = data['img']
95
+ label = data['target'].cpu().numpy()
96
+ paths = list(data['path'])
97
+ out_tens = model(img.to(self.device)).cpu().numpy()[:, -1]
98
+ assert label.shape == out_tens.shape
99
+
100
+ y_pred.extend(out_tens.tolist())
101
+ y_true.extend(label.tolist())
102
+ y_path.extend(paths)
103
+
104
+ y_true, y_pred = np.array(y_true), np.array(y_pred)
105
+ return y_true, y_pred, y_path
detectors/NPR/README.md ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Rethinking the Up-Sampling Operations in CNN-based Generative Network for Generalizable Deepfake Detection
2
+
3
+ [![Official Github Repo](https://img.shields.io/badge/Github%20page-222222.svg?style=for-the-badge&logo=github)](https://github.com/chuangchuangtan/NPR-DeepfakeDetection)
4
+ [![Paper](https://img.shields.io/badge/-arXiv-B31B1B.svg?style=for-the-badge)](https://arxiv.org/abs/2312.10461)
5
+
6
+ Original Paper:
7
+ [Rethinking the Up-Sampling Operations in CNN-based Generative Network for Generalizable Deepfake Detection](https://arxiv.org/abs/2312.10461).
8
+
9
+ Authors: Chuangchuang Tan, Huan Liu, Yao Zhao, Shikui Wei, Guanghua Gu, Ping Liu, Yunchao Wei.
10
+
11
+ ## Abstract
12
+
13
+ Recently, the proliferation of highly realistic synthetic
14
+ images, facilitated through a variety of GANs and Diffu-
15
+ sions, has significantly heightened the susceptibility to mis-
16
+ use. While the primary focus of deepfake detection has tra-
17
+ ditionally centered on the design of detection algorithms,
18
+ an investigative inquiry into the generator architectures has
19
+ remained conspicuously absent in recent years. This paper
20
+ contributes to this lacuna by rethinking the architectures of
21
+ CNN-based generator, thereby establishing a generalized
22
+ representation of synthetic artifacts. Our findings illumi-
23
+ nate that the up-sampling operator can, beyond frequency-
24
+ based artifacts, produce generalized forgery artifacts. In
25
+ particular, the local interdependence among image pixels
26
+ caused by upsampling operators is significantly demon-
27
+ strated in synthetic images generated by GAN or diffusion.
28
+ Building upon this observation, we introduce the concept of
29
+ Neighboring Pixel Relationships(NPR) as a means to cap-
30
+ ture and characterize the generalized structural artifacts
31
+ stemming from up-sampling operations. A comprehensive
32
+ analysis is conducted on an open-world dataset, comprising
33
+ samples generated by 28 distinct generative models. This
34
+ analysis culminates in the establishment of a novel state-of-
35
+ the-art performance, showcasing a remarkable 11.6% im-
36
+ provement over existing methods
37
+
38
+
39
+ ## Please Cite
40
+
41
+ ```
42
+ @inproceedings{tan2024rethinking,
43
+ title={Rethinking the up-sampling operations in cnn-based generative network for generalizable deepfake detection},
44
+ author={Tan, Chuangchuang and Zhao, Yao and Wei, Shikui and Gu, Guanghua and Liu, Ping and Wei, Yunchao},
45
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
46
+ pages={28130--28139},
47
+ year={2024}
48
+ }
49
+ ```
detectors/NPR/__pycache__/util.cpython-310.pyc ADDED
Binary file (1.73 kB). View file
 
detectors/NPR/__pycache__/validate.cpython-310.pyc ADDED
Binary file (1.7 kB). View file
 
detectors/NPR/checkpoint/pretrained/weights/best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb76594b7304c23249206ab2ac434dbd153c4114660ecc11a23eb82848f0721d
3
+ size 5831180
detectors/NPR/data/__init__.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import numpy as np
3
+ from torch.utils.data.sampler import WeightedRandomSampler
4
+
5
+ from .datasets import dataset_folder
6
+ from torchvision.datasets import DatasetFolder
7
+ import json
8
+ import bisect
9
+ from PIL import Image
10
+ import torchvision.transforms.v2 as Tv2
11
+ '''
12
+ def get_dataset(opt):
13
+ dset_lst = []
14
+ for cls in opt.classes:
15
+ root = opt.dataroot + '/' + cls
16
+ dset = dataset_folder(opt, root)
17
+ dset_lst.append(dset)
18
+ return torch.utils.data.ConcatDataset(dset_lst)
19
+ '''
20
+
21
+ import os
22
+ # def get_dataset(opt):
23
+ # classes = os.listdir(opt.dataroot) if len(opt.classes) == 0 else opt.classes
24
+ # if '0_real' not in classes or '1_fake' not in classes:
25
+ # dset_lst = []
26
+ # for cls in classes:
27
+ # root = opt.dataroot + '/' + cls
28
+ # dset = dataset_folder(opt, root)
29
+ # dset_lst.append(dset)
30
+ # return torch.utils.data.ConcatDataset(dset_lst)
31
+ # return dataset_folder(opt, opt.dataroot)
32
+
33
+ # def get_bal_sampler(dataset):
34
+ # targets = []
35
+ # for d in dataset.datasets:
36
+ # targets.extend(d.targets)
37
+
38
+ # ratio = np.bincount(targets)
39
+ # w = 1. / torch.tensor(ratio, dtype=torch.float)
40
+ # sample_weights = w[targets]
41
+ # sampler = WeightedRandomSampler(weights=sample_weights,
42
+ # num_samples=len(sample_weights))
43
+ # return sampler
44
+
45
+
46
+ # def create_dataloader(opt):
47
+ # shuffle = not opt.serial_batches if (opt.isTrain and not opt.class_bal) else False
48
+ # dataset = get_dataset(opt)
49
+ # sampler = get_bal_sampler(dataset) if opt.class_bal else None
50
+
51
+ # data_loader = torch.utils.data.DataLoader(dataset,
52
+ # batch_size=opt.batch_size,
53
+ # shuffle=shuffle,
54
+ # sampler=sampler,
55
+ # num_workers=int(opt.num_threads))
56
+ # return data_loader
57
+
58
+
59
+ def parse_dataset(settings):
60
+ gen_keys = {
61
+ 'gan1':['StyleGAN'],
62
+ 'gan2':['StyleGAN2'],
63
+ 'gan3':['StyleGAN3'],
64
+ 'sd15':['StableDiffusion1.5'],
65
+ 'sd2':['StableDiffusion2'],
66
+ 'sd3':['StableDiffusion3'],
67
+ 'sdXL':['StableDiffusionXL'],
68
+ 'flux':['FLUX.1'],
69
+ 'realFFHQ':['FFHQ'],
70
+ 'realFORLAB':['FORLAB']
71
+ }
72
+
73
+ gen_keys['all'] = [gen_keys[key][0] for key in gen_keys.keys()]
74
+ # gen_keys['gan'] = [gen_keys[key][0] for key in gen_keys.keys() if 'gan' in key]
75
+ # gen_keys['sd'] = [gen_keys[key][0] for key in gen_keys.keys() if 'sd' in key]
76
+ gen_keys['real'] = [gen_keys[key][0] for key in gen_keys.keys() if 'real' in key]
77
+
78
+ mod_keys = {
79
+ 'pre': ['PreSocial'],
80
+ 'fb': ['Facebook'],
81
+ 'tl': ['Telegram'],
82
+ 'tw': ['X'],
83
+ }
84
+
85
+ mod_keys['all'] = [mod_keys[key][0] for key in mod_keys.keys()]
86
+ mod_keys['shr'] = [mod_keys[key][0] for key in mod_keys.keys() if key in ['fb', 'tl', 'tw']]
87
+
88
+ need_real = (settings.task == 'train' and not len([data.split(':')[0] for data in settings.data_keys.split('&') if 'real' in data.split(':')[0]]))
89
+
90
+ assert not need_real, 'Train task without real data, this will not get handeled automatically, terminating'
91
+
92
+ dataset_list = []
93
+ for data in settings.data_keys.split('&'):
94
+ gen, mod = data.split(':')
95
+ dataset_list.append({'gen':gen_keys[gen], 'mod':mod_keys[mod]})
96
+
97
+ return dataset_list
98
+
99
+ class TrueFake_dataset(DatasetFolder):
100
+ def __init__(self, settings):
101
+ self.data_root = settings.data_root
102
+ self.split = settings.split
103
+
104
+ with open(settings.split_file, "r") as f:
105
+ split_list = sorted(json.load(f)[self.split])
106
+
107
+ dataset_list = parse_dataset(settings)
108
+
109
+ self.samples = []
110
+ self.info = []
111
+ for dict in dataset_list:
112
+ generators = dict['gen']
113
+ modifiers = dict['mod']
114
+
115
+ for mod in modifiers:
116
+ for dataset_root, dataset_dirs, dataset_files in os.walk(os.path.join(self.data_root, mod), topdown=True, followlinks=True):
117
+ if len(dataset_dirs):
118
+ continue
119
+
120
+ (label, gen, sub) = f'{dataset_root}/'.replace(os.path.join(self.data_root, mod) + os.sep, '').split(os.sep)[:3]
121
+
122
+ if gen in generators:
123
+ for filename in sorted(dataset_files):
124
+ if os.path.splitext(filename)[1].lower() in ['.png', '.jpg', '.jpeg']:
125
+ if self._in_list(split_list, os.path.join(gen, sub, os.path.splitext(filename)[0])):
126
+ self.samples.append(os.path.join(dataset_root, filename))
127
+ self.info.append((mod, label, gen, sub))
128
+
129
+ if settings.isTrain:
130
+ crop_func = Tv2.RandomCrop(settings.cropSize)
131
+ elif settings.no_crop:
132
+ crop_func = Tv2.Identity()
133
+ else:
134
+ crop_func = Tv2.CenterCrop(settings.cropSize)
135
+
136
+ if settings.isTrain and not settings.no_flip:
137
+ flip_func = Tv2.RandomHorizontalFlip()
138
+ else:
139
+ flip_func = Tv2.Identity()
140
+
141
+ if not settings.isTrain and settings.no_resize:
142
+ rz_func = Tv2.Identity()
143
+ else:
144
+ rz_func = Tv2.Resize((settings.loadSize, settings.loadSize))
145
+
146
+ self.transform = Tv2.Compose([
147
+ rz_func,
148
+ crop_func,
149
+ flip_func,
150
+ Tv2.ToTensor(),
151
+ Tv2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
152
+ ])
153
+
154
+
155
+ def _in_list(self, split, elem):
156
+ i = bisect.bisect_left(split, elem)
157
+ return i != len(split) and split[i] == elem
158
+
159
+ def __len__(self):
160
+ return len(self.samples)
161
+
162
+ def __getitem__(self, index):
163
+ path = self.samples[index]
164
+ mod, label, gen, sub = self.info[index]
165
+
166
+ image = Image.open(path).convert('RGB')
167
+ sample = self.transform(image)
168
+
169
+ target = 1.0 if label == 'Fake' else 0.0
170
+
171
+ return sample, target, path
172
+
173
+
174
+ def create_dataloader(settings, split=None):
175
+ if split == "train":
176
+ settings.split = 'train'
177
+ is_train=True
178
+
179
+ elif split == "val":
180
+ settings.split = 'val'
181
+ settings.batch_size = settings.batch_size//4
182
+ is_train=False
183
+
184
+ elif split == "test":
185
+ settings.split = 'test'
186
+ settings.batch_size = settings.batch_size//4
187
+ is_train=False
188
+
189
+ else:
190
+ raise ValueError(f"Unknown split {split}")
191
+
192
+ dataset = TrueFake_dataset(settings)
193
+
194
+ data_loader = torch.utils.data.DataLoader(
195
+ dataset,
196
+ batch_size=settings.batch_size,
197
+ num_workers=int(settings.num_threads),
198
+ shuffle = is_train,
199
+ collate_fn=None,
200
+ )
201
+ return data_loader
detectors/NPR/data/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (4.78 kB). View file
 
detectors/NPR/data/__pycache__/datasets.cpython-310.pyc ADDED
Binary file (4.65 kB). View file
 
detectors/NPR/data/datasets.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import torchvision.datasets as datasets
4
+ import torchvision.transforms as transforms
5
+ import torchvision.transforms.functional as TF
6
+ from random import random, choice
7
+ from io import BytesIO
8
+ from PIL import Image
9
+ from PIL import ImageFile
10
+ from scipy.ndimage.filters import gaussian_filter
11
+ from torchvision.transforms import InterpolationMode
12
+
13
+ ImageFile.LOAD_TRUNCATED_IMAGES = True
14
+
15
+ def dataset_folder(opt, root):
16
+ if opt.mode == 'binary':
17
+ return binary_dataset(opt, root)
18
+ if opt.mode == 'filename':
19
+ return FileNameDataset(opt, root)
20
+ raise ValueError('opt.mode needs to be binary or filename.')
21
+
22
+
23
+ def binary_dataset(opt, root):
24
+ if opt.isTrain:
25
+ crop_func = transforms.RandomCrop(opt.cropSize)
26
+ elif opt.no_crop:
27
+ crop_func = transforms.Lambda(lambda img: img)
28
+ else:
29
+ crop_func = transforms.CenterCrop(opt.cropSize)
30
+
31
+ if opt.isTrain and not opt.no_flip:
32
+ flip_func = transforms.RandomHorizontalFlip()
33
+ else:
34
+ flip_func = transforms.Lambda(lambda img: img)
35
+ if not opt.isTrain and opt.no_resize:
36
+ rz_func = transforms.Lambda(lambda img: img)
37
+ else:
38
+ # rz_func = transforms.Lambda(lambda img: custom_resize(img, opt))
39
+ rz_func = transforms.Resize((opt.loadSize, opt.loadSize))
40
+
41
+ dset = datasets.ImageFolder(
42
+ root,
43
+ transforms.Compose([
44
+ rz_func,
45
+ # transforms.Lambda(lambda img: data_augment(img, opt)),
46
+ crop_func,
47
+ flip_func,
48
+ transforms.ToTensor(),
49
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
50
+ ]))
51
+ return dset
52
+
53
+
54
+ class FileNameDataset(datasets.ImageFolder):
55
+ def name(self):
56
+ return 'FileNameDataset'
57
+
58
+ def __init__(self, opt, root):
59
+ self.opt = opt
60
+ super().__init__(root)
61
+
62
+ def __getitem__(self, index):
63
+ # Loading sample
64
+ path, target = self.samples[index]
65
+ return path
66
+
67
+
68
+ def data_augment(img, opt):
69
+ img = np.array(img)
70
+
71
+ if random() < opt.blur_prob:
72
+ sig = sample_continuous(opt.blur_sig)
73
+ gaussian_blur(img, sig)
74
+
75
+ if random() < opt.jpg_prob:
76
+ method = sample_discrete(opt.jpg_method)
77
+ qual = sample_discrete(opt.jpg_qual)
78
+ img = jpeg_from_key(img, qual, method)
79
+
80
+ return Image.fromarray(img)
81
+
82
+
83
+ def sample_continuous(s):
84
+ if len(s) == 1:
85
+ return s[0]
86
+ if len(s) == 2:
87
+ rg = s[1] - s[0]
88
+ return random() * rg + s[0]
89
+ raise ValueError("Length of iterable s should be 1 or 2.")
90
+
91
+
92
+ def sample_discrete(s):
93
+ if len(s) == 1:
94
+ return s[0]
95
+ return choice(s)
96
+
97
+
98
+ def gaussian_blur(img, sigma):
99
+ gaussian_filter(img[:,:,0], output=img[:,:,0], sigma=sigma)
100
+ gaussian_filter(img[:,:,1], output=img[:,:,1], sigma=sigma)
101
+ gaussian_filter(img[:,:,2], output=img[:,:,2], sigma=sigma)
102
+
103
+
104
+ def cv2_jpg(img, compress_val):
105
+ img_cv2 = img[:,:,::-1]
106
+ encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), compress_val]
107
+ result, encimg = cv2.imencode('.jpg', img_cv2, encode_param)
108
+ decimg = cv2.imdecode(encimg, 1)
109
+ return decimg[:,:,::-1]
110
+
111
+
112
+ def pil_jpg(img, compress_val):
113
+ out = BytesIO()
114
+ img = Image.fromarray(img)
115
+ img.save(out, format='jpeg', quality=compress_val)
116
+ img = Image.open(out)
117
+ # load from memory before ByteIO closes
118
+ img = np.array(img)
119
+ out.close()
120
+ return img
121
+
122
+
123
+ jpeg_dict = {'cv2': cv2_jpg, 'pil': pil_jpg}
124
+ def jpeg_from_key(img, compress_val, key):
125
+ method = jpeg_dict[key]
126
+ return method(img, compress_val)
127
+
128
+
129
+ # rz_dict = {'bilinear': Image.BILINEAR,
130
+ # 'bicubic': Image.BICUBIC,
131
+ # 'lanczos': Image.LANCZOS,
132
+ # 'nearest': Image.NEAREST}
133
+ rz_dict = {'bilinear': InterpolationMode.BILINEAR,
134
+ 'bicubic': InterpolationMode.BICUBIC,
135
+ 'lanczos': InterpolationMode.LANCZOS,
136
+ 'nearest': InterpolationMode.NEAREST}
137
+ def custom_resize(img, opt):
138
+ interp = sample_discrete(opt.rz_interp)
139
+ return TF.resize(img, (opt.loadSize,opt.loadSize), interpolation=rz_dict[interp])
detectors/NPR/detect.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import time
4
+ import yaml
5
+ import torch
6
+ from PIL import Image
7
+ import torchvision.transforms as transforms
8
+ import argparse
9
+
10
+ # Add project root to path for imports
11
+ project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
12
+ sys.path.append(project_root)
13
+ from support.detect_utils import format_result, save_result, get_device
14
+ import networks.resnet as resnet
15
+ from networks.resnet import resnet50
16
+
17
+
18
+ def parse_args():
19
+ parser = argparse.ArgumentParser(description='NPR single image detector')
20
+ parser.add_argument('--image', type=str, required=True, help='Path to input image')
21
+ parser.add_argument('--model', type=str, default='checkpoint/best.pt', help='Path to model checkpoint')
22
+ parser.add_argument('--output', type=str, help='Path to save detection result JSON')
23
+ parser.add_argument('--device', type=str, help='Device to run on (e.g., cuda:0, cuda:1, cpu)')
24
+ parser.add_argument('--config', type=str, default='configs/npr.yaml', help='Path to config file')
25
+ return parser.parse_args()
26
+
27
+ def load_config(config_path):
28
+ """Load configuration from YAML file."""
29
+ with open(config_path, 'r') as f:
30
+ return yaml.safe_load(f)
31
+
32
+ def load_image(image_path, size=224):
33
+ """Load and preprocess image."""
34
+ if not os.path.exists(image_path):
35
+ raise FileNotFoundError(f"Image not found: {image_path}")
36
+
37
+ image = Image.open(image_path).convert('RGB')
38
+ transform = transforms.Compose([
39
+ transforms.Resize(size),
40
+ transforms.CenterCrop(size),
41
+ transforms.ToTensor(),
42
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
43
+ ])
44
+ return transform(image).unsqueeze(0)
45
+
46
+ def main():
47
+ args = parse_args()
48
+
49
+ # Setup device
50
+ device = torch.device(args.device) if args.device else get_device()
51
+
52
+ # Load model
53
+ try:
54
+ # Initialize model
55
+ model = resnet50(num_classes=1).to(device)
56
+ load_path = f'./detectors/NPR/checkpoint/{args.model}/weights/best.pt'
57
+
58
+ if not os.path.exists(load_path):
59
+ raise FileNotFoundError(f"Model weights not found at: {load_path}")
60
+
61
+ checkpoint = torch.load(load_path, map_location=device)
62
+
63
+ model.load_state_dict(checkpoint, strict=True)
64
+
65
+ model.eval()
66
+ except Exception as e:
67
+ print(f"Error loading model: {e}")
68
+ return
69
+
70
+ # Load and preprocess image
71
+ try:
72
+ image_tensor = load_image(args.image).to(device)
73
+ except Exception as e:
74
+ print(f"Error loading image: {e}")
75
+ return
76
+
77
+ # Run detection
78
+ start_time = time.time()
79
+ with torch.no_grad():
80
+ try:
81
+ score = model(image_tensor)
82
+ prediction = torch.sigmoid(score)
83
+
84
+ confidence = prediction.item()
85
+
86
+ result = format_result(
87
+ 'fake' if confidence>0.5 else 'real',
88
+ confidence,
89
+ time.time() - start_time
90
+ )
91
+
92
+ # Print result
93
+ print(f"Prediction: {result['prediction']}")
94
+ print(f"Confidence: {result['confidence']:.4f}")
95
+ print(f"Time: {result['elapsed_time']:.3f}s")
96
+
97
+ # Save result if output path provided
98
+ if args.output:
99
+ save_result(result, args.output)
100
+
101
+ except Exception as e:
102
+ print(f"Error during detection: {e}")
103
+ return
104
+
105
+ if __name__ == '__main__':
106
+ main()
detectors/NPR/networks/__init__.py ADDED
File without changes