Spaces:
Running
Running
AMontiB
commited on
Commit
·
9c4b1c4
1
Parent(s):
0402b73
Your original commit message (now includes LFS pointer)
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .DS_Store +0 -0
- .gitattributes +6 -32
- DEPLOYMENT.md +124 -0
- README.md +269 -10
- README_HF.md +46 -0
- __pycache__/app.cpython-310.pyc +0 -0
- app.py +117 -0
- configs/CLIP-D.yaml +72 -0
- configs/NPR.yaml +56 -0
- configs/P2G.yaml +56 -0
- configs/R50_TF.yaml +61 -0
- configs/R50_nodown.yaml +84 -0
- demo_images/README.md +21 -0
- detectors/.DS_Store +0 -0
- detectors/.gitattributes +1 -0
- detectors/CLIP-D/.DS_Store +0 -0
- detectors/CLIP-D/LICENSE +202 -0
- detectors/CLIP-D/README.md +29 -0
- detectors/CLIP-D/__pycache__/parser.cpython-310.pyc +0 -0
- detectors/CLIP-D/checkpoint/.DS_Store +0 -0
- detectors/CLIP-D/checkpoint/pretrained/.DS_Store +0 -0
- detectors/CLIP-D/checkpoint/pretrained/weights/best.pt +3 -0
- detectors/CLIP-D/detect.py +109 -0
- detectors/CLIP-D/networks/__init__.py +70 -0
- detectors/CLIP-D/networks/__pycache__/__init__.cpython-310.pyc +0 -0
- detectors/CLIP-D/networks/__pycache__/openclipnet.cpython-310.pyc +0 -0
- detectors/CLIP-D/networks/__pycache__/resnet_mod.cpython-310.pyc +0 -0
- detectors/CLIP-D/networks/openclipnet.py +85 -0
- detectors/CLIP-D/networks/resnet_mod.py +335 -0
- detectors/CLIP-D/parser.py +25 -0
- detectors/CLIP-D/test.py +167 -0
- detectors/CLIP-D/train.py +65 -0
- detectors/CLIP-D/utils/__init__.py +52 -0
- detectors/CLIP-D/utils/__pycache__/__init__.cpython-310.pyc +0 -0
- detectors/CLIP-D/utils/__pycache__/dataset.cpython-310.pyc +0 -0
- detectors/CLIP-D/utils/__pycache__/processing.cpython-310.pyc +0 -0
- detectors/CLIP-D/utils/__pycache__/training.cpython-310.pyc +0 -0
- detectors/CLIP-D/utils/dataset.py +144 -0
- detectors/CLIP-D/utils/processing.py +133 -0
- detectors/CLIP-D/utils/training.py +105 -0
- detectors/NPR/README.md +49 -0
- detectors/NPR/__pycache__/util.cpython-310.pyc +0 -0
- detectors/NPR/__pycache__/validate.cpython-310.pyc +0 -0
- detectors/NPR/checkpoint/pretrained/weights/best.pt +3 -0
- detectors/NPR/data/__init__.py +201 -0
- detectors/NPR/data/__pycache__/__init__.cpython-310.pyc +0 -0
- detectors/NPR/data/__pycache__/datasets.cpython-310.pyc +0 -0
- detectors/NPR/data/datasets.py +139 -0
- detectors/NPR/detect.py +106 -0
- detectors/NPR/networks/__init__.py +0 -0
.DS_Store
ADDED
|
Binary file (8.2 kB). View file
|
|
|
.gitattributes
CHANGED
|
@@ -1,35 +1,9 @@
|
|
| 1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 2 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 3 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.json filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
detectors/P2G/src/models/clip/bpe_simple_vocab_16e6.txt.gz filter=lfs diff=lfs merge=lfs -text
|
DEPLOYMENT.md
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Hugging Face Spaces Deployment Guide
|
| 2 |
+
|
| 3 |
+
## Prerequisites
|
| 4 |
+
|
| 5 |
+
1. A Hugging Face account
|
| 6 |
+
2. Git LFS installed locally: `git lfs install`
|
| 7 |
+
3. Model weights downloaded to the correct directories
|
| 8 |
+
|
| 9 |
+
## Deployment Steps
|
| 10 |
+
|
| 11 |
+
### 1. Prepare Model Weights
|
| 12 |
+
|
| 13 |
+
You have two options:
|
| 14 |
+
|
| 15 |
+
#### Option A: Upload weights via Git LFS (Recommended for public spaces)
|
| 16 |
+
|
| 17 |
+
```bash
|
| 18 |
+
# Initialize Git LFS
|
| 19 |
+
git lfs install
|
| 20 |
+
|
| 21 |
+
# Track large files
|
| 22 |
+
git lfs track "*.pt"
|
| 23 |
+
git lfs track "*.pth"
|
| 24 |
+
git lfs track "*.pkl"
|
| 25 |
+
|
| 26 |
+
# Add weights
|
| 27 |
+
git add .gitattributes
|
| 28 |
+
git add detectors/*/checkpoint/pretrained/weights/best.pt
|
| 29 |
+
git add detectors/P2G/src/utils/classes.pkl
|
| 30 |
+
git commit -m "Add model weights"
|
| 31 |
+
```
|
| 32 |
+
|
| 33 |
+
#### Option B: Configure automatic download
|
| 34 |
+
|
| 35 |
+
1. Upload your model weights to Google Drive or another host
|
| 36 |
+
2. Update `download_weights.py` with the correct URLs
|
| 37 |
+
3. Weights will download automatically when the Space starts
|
| 38 |
+
|
| 39 |
+
### 2. Create Hugging Face Space
|
| 40 |
+
|
| 41 |
+
1. Go to https://huggingface.co/spaces
|
| 42 |
+
2. Click "Create new Space"
|
| 43 |
+
3. Choose:
|
| 44 |
+
- **Name**: deepfake-detection-library (or your preferred name)
|
| 45 |
+
- ** SDK**: Gradio
|
| 46 |
+
- **License**: MIT
|
| 47 |
+
- **Hardware**: CPU Basic (free) or upgrade to GPU if needed
|
| 48 |
+
|
| 49 |
+
### 3. Push to Hugging Face
|
| 50 |
+
|
| 51 |
+
```bash
|
| 52 |
+
# Add HF remote (replace YOUR_USERNAME and SPACE_NAME)
|
| 53 |
+
git remote add hf https://huggingface.co/spaces/YOUR_USERNAME/SPACE_NAME
|
| 54 |
+
|
| 55 |
+
# Rename README for HF
|
| 56 |
+
mv README.md README_github.md
|
| 57 |
+
mv README_HF.md README.md
|
| 58 |
+
|
| 59 |
+
# Push to Hugging Face
|
| 60 |
+
git add .
|
| 61 |
+
git commit -m "Initial commit for HF Spaces"
|
| 62 |
+
git push hf main
|
| 63 |
+
```
|
| 64 |
+
|
| 65 |
+
### 4. Configure Space
|
| 66 |
+
|
| 67 |
+
In your Space settings on Hugging Face:
|
| 68 |
+
|
| 69 |
+
- **Hardware**: Start with CPU Basic (free), upgrade to GPU if needed
|
| 70 |
+
- **Secrets**: Add any API keys if needed (none required currently)
|
| 71 |
+
- **Variables**: No special environment variables needed
|
| 72 |
+
|
| 73 |
+
### 5. Verify Deployment
|
| 74 |
+
|
| 75 |
+
1. Wait for the Space to build (may take 5-10 minutes)
|
| 76 |
+
2. Test each detector with sample images
|
| 77 |
+
3. Check logs for any errors
|
| 78 |
+
|
| 79 |
+
## File Size Considerations
|
| 80 |
+
|
| 81 |
+
- **Git LFS** is required for files >10MB
|
| 82 |
+
- Each model weight file (~100-500MB) will be stored via LFS
|
| 83 |
+
- Free HF Spaces have storage limits; consider:
|
| 84 |
+
- Upgrading to Pro for more storage
|
| 85 |
+
- Using automatic download instead of uploading weights
|
| 86 |
+
|
| 87 |
+
## Troubleshooting
|
| 88 |
+
|
| 89 |
+
### Space fails to build
|
| 90 |
+
|
| 91 |
+
- Check `requirements.txt` for incompatible versions
|
| 92 |
+
- Review build logs in the Space interface
|
| 93 |
+
- Ensure all dependencies are listed
|
| 94 |
+
|
| 95 |
+
### Weights not loading
|
| 96 |
+
|
| 97 |
+
- Verify Git LFS tracked the files: `git lfs ls-files`
|
| 98 |
+
- Check file sizes: LFS pointer files are ~130 bytes
|
| 99 |
+
- Update `download_weights.py` if using automatic download
|
| 100 |
+
|
| 101 |
+
### Out of memory errors
|
| 102 |
+
|
| 103 |
+
- Upgrade to GPU hardware (T4 small recommended)
|
| 104 |
+
- Reduce batch size or model size if possible
|
| 105 |
+
- Use CPU inference for deployment (already configured)
|
| 106 |
+
|
| 107 |
+
## Cost Optimization
|
| 108 |
+
|
| 109 |
+
- **CPU Basic** (free): Works but slower
|
| 110 |
+
- **CPU Upgrade** ($0.03/hour): Faster inference
|
| 111 |
+
- **T4 Small GPU** ($0.60/hour): Needed for real-time performance
|
| 112 |
+
|
| 113 |
+
## Maintenance
|
| 114 |
+
|
| 115 |
+
- Monitor Space usage in HF dashboard
|
| 116 |
+
- Update models by pushing new weights via Git LFS
|
| 117 |
+
- Check Gradio version compatibility: `pip list | grep gradio`
|
| 118 |
+
|
| 119 |
+
## Support
|
| 120 |
+
|
| 121 |
+
For issues specific to this deployment, check:
|
| 122 |
+
- [Gradio Documentation](https://gradio.app/docs/)
|
| 123 |
+
- [HF Spaces Documentation](https://huggingface.co/docs/hub/spaces)
|
| 124 |
+
- [GitHub Repository](https://github.com/truebees-ai/Image-Deepfake-Detectors-Public-Library)
|
README.md
CHANGED
|
@@ -1,13 +1,272 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
| 11 |
---
|
| 12 |
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Image Deepfake Detectors Public Library
|
| 2 |
+
|
| 3 |
+
## Overview
|
| 4 |
+
|
| 5 |
+
This repository provides a unified framework for training, testing, and benchmarking multiple state-of-the-art (SoA) deepfake detection models. It supports automated benchmarking, training, demo runs, and single-image detection, with modular configuration and extensible detector support.
|
| 6 |
+
|
| 7 |
+
### Main Features
|
| 8 |
+
|
| 9 |
+
- **Multiple Detectors:** Supports CLIP-D [1], NPR [2], P2G [3], R50_TF [4], and R50_nodown [5] (an overview for each method is provided in its `README.md`: `./detectors/<DETECTOR>/README.md`)
|
| 10 |
+
- **Pretrained Weights:** All models have been pretrained on images generated with StyleGAN2 and StableDiffusionXL, and real images from the FFHQ Dataset [6] and the FORLAB Dataset [7].
|
| 11 |
+
- **Automated Training & Testing:** Use `launcher.py` to run experiments across detectors and datasets.
|
| 12 |
+
- **Demo Mode:** Easily test all detectors on sample images in `demo_images/`.
|
| 13 |
+
- **Single Image Detection:** Run detection on individual images via the command line.
|
| 14 |
+
- **Flexible Configuration:** All experiment parameters are set via YAML files in `configs/`.
|
| 15 |
+
- **Logging & Results:** Logs and results are saved per detector and scenario for easy analysis.
|
| 16 |
+
|
| 17 |
+
---
|
| 18 |
+
|
| 19 |
+
## Set-Up
|
| 20 |
+
|
| 21 |
+
### Prerequisites
|
| 22 |
+
|
| 23 |
+
`Ubuntu>=22.04.3`, `Python>=3.10` and `CUDA:12.0`
|
| 24 |
+
|
| 25 |
+
---
|
| 26 |
+
|
| 27 |
+
### Download Weights
|
| 28 |
+
|
| 29 |
+
You can download the weights for each model from this [link](https://drive.google.com/file/d/1F60FN2B9skRcb3YrZwhFTZQihbj3ipJQ/view?usp=sharing).
|
| 30 |
+
|
| 31 |
+
Then, copy them into the `pretrained` folder for the corresponding model, following this structure: `./detectors/<DETECTOR>/checkpoint/pretrained/weights/best.pt`
|
| 32 |
+
|
| 33 |
---
|
| 34 |
+
|
| 35 |
+
### Download Benchmarking Dataset
|
| 36 |
+
|
| 37 |
+
DeepShield Dataset: [Zenodo link](https://zenodo.org/records/15648378)
|
| 38 |
+
|
| 39 |
+
Download the dataset and change the corresponding `dataset_path` in `./configs/<DETECTOR>.yaml`.
|
| 40 |
+
|
| 41 |
+
> The DeepShield dataset is a large-scale benchmark for evaluating the robustness of fake image detection systems. It contains 100,000 images, divided between real and AI-generated content produced using advanced generative models, including StyleGAN, StyleGAN2, StyleGAN3, Stable Diffusion 1.5, 2.1, 3, and XL, as well as Flux 1.0.
|
| 42 |
+
>
|
| 43 |
+
> To simulate real-world distortions, 30,000 images were shared on Facebook, X (formerly Twitter), and Telegram, then re-collected to include platform-induced compression and artifacts. This approach ensures that the dataset captures authentic distribution noise and artifacts encountered in real-world scenarios.
|
| 44 |
+
|
| 45 |
---
|
| 46 |
|
| 47 |
+
### VirtualEnv
|
| 48 |
+
|
| 49 |
+
Create a virtual environment using:
|
| 50 |
+
|
| 51 |
+
```bash
|
| 52 |
+
python -m venv IDFD_VENV
|
| 53 |
+
source IDFD_VENV/bin/activate
|
| 54 |
+
pip install -r requirements.txt
|
| 55 |
+
````
|
| 56 |
+
|
| 57 |
+
Or use conda:
|
| 58 |
+
|
| 59 |
+
```bash
|
| 60 |
+
conda env create -f environment.yml
|
| 61 |
+
conda activate IDFD_VENV
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
-----
|
| 65 |
+
|
| 66 |
+
### Download Demo Dataset
|
| 67 |
+
|
| 68 |
+
You can download the demo dataset from this [link](https://drive.google.com/file/d/134Bw8l9tEC7oZJpTAeMO80QRqgdJfJS9/view?usp=sharing). The demo dataset contains 200 images randomly sampled from the DeepShield Dataset.
|
| 69 |
+
|
| 70 |
+
Place sample images for quick testing in `demo_images/`, organized by platform and label:
|
| 71 |
+
|
| 72 |
+
```
|
| 73 |
+
demo_images/
|
| 74 |
+
Facebook/
|
| 75 |
+
Fake/
|
| 76 |
+
Real/
|
| 77 |
+
PreSocial/
|
| 78 |
+
Fake/
|
| 79 |
+
Real/
|
| 80 |
+
Telegram/
|
| 81 |
+
Fake/
|
| 82 |
+
Real/
|
| 83 |
+
X/
|
| 84 |
+
Fake/
|
| 85 |
+
Real/
|
| 86 |
+
```
|
| 87 |
+
|
| 88 |
+
-----
|
| 89 |
+
|
| 90 |
+
## Running Experiments
|
| 91 |
+
|
| 92 |
+
**1. Run Demo:**
|
| 93 |
+
|
| 94 |
+
Test all detectors on sample images:
|
| 95 |
+
|
| 96 |
+
```bash
|
| 97 |
+
python launcher.py --demo --demo-detector all
|
| 98 |
+
```
|
| 99 |
+
|
| 100 |
+
Test a single detector on sample images:
|
| 101 |
+
|
| 102 |
+
```bash
|
| 103 |
+
python launcher.py --demo --demo-detector <DETECTOR>
|
| 104 |
+
```
|
| 105 |
+
|
| 106 |
+
**2. Automated Train-Test (Recommended) on DeepShield Dataset:**
|
| 107 |
+
|
| 108 |
+
To run both train and test using a specific `<DETECTOR>`:
|
| 109 |
+
|
| 110 |
+
```bash
|
| 111 |
+
python launcher.py --detector <DETECTOR> --phases both
|
| 112 |
+
```
|
| 113 |
+
|
| 114 |
+
- `<DETECTOR>`: One of `CLIP-D`, `NPR`, `P2G`, `R50_TF`, `R50_nodown`
|
| 115 |
+
- `--phases`: `train`, `test`, or `both`
|
| 116 |
+
|
| 117 |
+
By doing so, the selected detector will be trained on images generated by StyleGAN2 and StableDiffusionXL and real images from the FORLAB and FFHQ Datasets, **not shared on social networks**.
|
| 118 |
+
|
| 119 |
+
**3. Manual Train-Test on DeepShield Dataset:**
|
| 120 |
+
|
| 121 |
+
```bash
|
| 122 |
+
python launcher.py --detector <DETECTOR> --phases <PHASE> --config-dir <CONFIG_FILE_PATH> --weights_name <WEIGHTS_NAME>
|
| 123 |
+
```
|
| 124 |
+
|
| 125 |
+
- `<DETECTOR>`: One of `CLIP-D`, `NPR`, `P2G`, `R50_TF`, `R50_nodown`
|
| 126 |
+
- `--phases`: `train`, `test`, or `both`
|
| 127 |
+
- `--config-dir`: Path to the detector config files (default: `configs/`)
|
| 128 |
+
- `--weights_name`: Model weights name. The default is defined in `configs/<DETECTOR>.yaml` by these lines:
|
| 129 |
+
|
| 130 |
+
<!-- end list -->
|
| 131 |
+
|
| 132 |
+
```
|
| 133 |
+
training:
|
| 134 |
+
- data: gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre
|
| 135 |
+
```
|
| 136 |
+
|
| 137 |
+
This corresponds to the training subsets used to train a detector (see the "Train on Different Generators from the DeepShield Dataset" section for more information).
|
| 138 |
+
|
| 139 |
+
**4. Test the model using pretrained weights on the DeepShield Dataset:**
|
| 140 |
+
|
| 141 |
+
```bash
|
| 142 |
+
python launcher.py --detector <DETECTOR> --phases test --weights_name pretrained
|
| 143 |
+
```
|
| 144 |
+
|
| 145 |
+
**5. Train the model using a custom weights name on the DeepShield Dataset:**
|
| 146 |
+
|
| 147 |
+
```bash
|
| 148 |
+
python launcher.py --detector <DETECTOR> --phases train --weights_name <WEIGHTS_NAME>
|
| 149 |
+
```
|
| 150 |
+
|
| 151 |
+
**6. Perform Detection on Single Images:**
|
| 152 |
+
|
| 153 |
+
```bash
|
| 154 |
+
python launcher.py --detect --detector <DETECTOR> --image <PATH_TO_IMAGE> --weights <WEIGHTS_NAME> --output <OUTPUT_PATH>
|
| 155 |
+
```
|
| 156 |
+
|
| 157 |
+
- `<DETECTOR>`: One of `CLIP-D`, `NPR`, `P2G`, `R50_TF`, `R50_nodown`
|
| 158 |
+
- `--image`: Path to the input image
|
| 159 |
+
- `--weights`: Weights name (default: `pretrained`)
|
| 160 |
+
- `--output`: Path to save detection results (default: `detection_results`)
|
| 161 |
+
|
| 162 |
+
-----
|
| 163 |
+
|
| 164 |
+
## Train on Different Generators from the DeepShield Dataset
|
| 165 |
+
|
| 166 |
+
To train a detector on generators different from StyleGAN2 and StableDiffusionXL, modify these lines in `configs/<DETECTOR>.yaml`:
|
| 167 |
+
|
| 168 |
+
```json
|
| 169 |
+
training:
|
| 170 |
+
- data: gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre
|
| 171 |
+
```
|
| 172 |
+
|
| 173 |
+
Currently supported pairs of `(key, sub-dataset)` are:
|
| 174 |
+
|
| 175 |
+
```
|
| 176 |
+
'gan1':['StyleGAN']
|
| 177 |
+
'gan2':['StyleGAN2']
|
| 178 |
+
'gan3':['StyleGAN3']
|
| 179 |
+
'sd15':['StableDiffusion1.5']
|
| 180 |
+
'sd2':['StableDiffusion2']
|
| 181 |
+
'sd3':['StableDiffusion3']
|
| 182 |
+
'sdXL':['StableDiffusionXL']
|
| 183 |
+
'flux':['FLUX.1']
|
| 184 |
+
'realFFHQ':['FFHQ']
|
| 185 |
+
'realFORLAB':['FORLAB']
|
| 186 |
+
```
|
| 187 |
+
|
| 188 |
+
And supported pairs of `(key, social)` are:
|
| 189 |
+
|
| 190 |
+
```
|
| 191 |
+
'pre':[Not Shared on Social Networks],
|
| 192 |
+
'fb': [Facebook]
|
| 193 |
+
'tl': [Telegram]
|
| 194 |
+
'tw': [X Social]
|
| 195 |
+
```
|
| 196 |
+
|
| 197 |
+
Then, generate the corresponding `split.json` using `python support/json_compile.py` and use it to replace `./split.json`. NOTE: change line 9 `dataset_path=...` in `support/json_compile.py` accordingly.
|
| 198 |
+
|
| 199 |
+
## Results & Logs
|
| 200 |
+
|
| 201 |
+
- **Results:** Saved in `detectors/<DETECTOR>/results/`
|
| 202 |
+
- **Logs:** Saved in `logs/` per run and scenario
|
| 203 |
+
|
| 204 |
+
-----
|
| 205 |
+
|
| 206 |
+
## Train/Test on a New Dataset
|
| 207 |
+
|
| 208 |
+
### Data Organization
|
| 209 |
+
|
| 210 |
+
Organize your data by platform and label:
|
| 211 |
+
|
| 212 |
+
```
|
| 213 |
+
<DATASET_NAME>/
|
| 214 |
+
Facebook/
|
| 215 |
+
Fake/
|
| 216 |
+
Real/
|
| 217 |
+
PreSocial/
|
| 218 |
+
Fake/
|
| 219 |
+
Real/
|
| 220 |
+
Telegram/
|
| 221 |
+
Fake/
|
| 222 |
+
Real/
|
| 223 |
+
X/
|
| 224 |
+
Fake/
|
| 225 |
+
Real/
|
| 226 |
+
```
|
| 227 |
+
|
| 228 |
+
Generate the corresponding `split.json` using `python support/json_compile.py` and use it to replace `./split.json`. NOTE: change line 9 `dataset_path=...` in `support/json_compile.py` accordingly.
|
| 229 |
+
|
| 230 |
+
### Split Files
|
| 231 |
+
|
| 232 |
+
- **`split.json`:** Main split file for experiments. Format: JSON with `train`/`test` keys and lists of sample IDs.
|
| 233 |
+
- **`split_demo.json`:** Auto-generated for demo mode, covering all images in `demo_images/`.
|
| 234 |
+
|
| 235 |
+
-----
|
| 236 |
+
|
| 237 |
+
## Additional Configuration Options
|
| 238 |
+
|
| 239 |
+
- **YAML Files:** All detectors have a config file in `configs/` (e.g., `CLIP-D.yaml`, `NPR.yaml`).
|
| 240 |
+
- **Config Options:**
|
| 241 |
+
- `global`: Dataset path, device, split file, threads, etc.
|
| 242 |
+
- `detector_args`: Model-specific arguments.
|
| 243 |
+
- `training`: List of training scenarios.
|
| 244 |
+
- `testing`: List of test scenarios.
|
| 245 |
+
|
| 246 |
+
-----
|
| 247 |
+
|
| 248 |
+
## References
|
| 249 |
+
|
| 250 |
+
[1] D. Cozzolino, G. Poggi, R. Corvi, M. Nießner, and L. Verdoliva,
|
| 251 |
+
“Raising the Bar of AI-generated Image Detection with CLIP,” in 2024
|
| 252 |
+
IEEE/CVF Conference on Computer Vision and Pattern Recognition
|
| 253 |
+
Workshops (CVPRW), pp. 4356–4366, June 2024. ISSN: 2160-7516.
|
| 254 |
+
|
| 255 |
+
[2]C. Tan, H. Liu, Y. Zhao, S. Wei, G. Gu, P. Liu, and Y. Wei, “Rethinking the Up-Sampling Operations in CNN-Based Generative Network for
|
| 256 |
+
Generalizable Deepfake Detection,” in 2024 IEEE/CVF Conference on
|
| 257 |
+
Computer Vision and Pattern Recognition (CVPR), pp. 28130–28139,
|
| 258 |
+
June 2024. ISSN: 2575-7075.
|
| 259 |
+
|
| 260 |
+
[3] F. Laiti, B. Liberatori, T. De Min, and E. Ricci, “Conditioned Prompt-Optimization for Continual Deepfake Detection,” in Pattern Recognition (A. Antonacopoulos, S. Chaudhuri, R. Chellappa, C.L. Liu, S. Bhatacharya, and U. Pal, eds.), (Cham), pp. 64–79, Springer Nature Switzerland, 2025.
|
| 261 |
+
|
| 262 |
+
[4] Dell'Anna, Stefano, Andrea Montibeller, and Giulia Boato. "TrueFake: A Real World Case Dataset of Last Generation Fake Images also Shared on Social Networks." arXiv preprint arXiv:2504.20658 (2025).
|
| 263 |
+
|
| 264 |
+
[5]R. Corvi, D. Cozzolino, G. Zingarini, G. Poggi, K. Nagano, and L. Verdoliva, “On The Detection of Synthetic Images Generated by Diffusion
|
| 265 |
+
Models,” in ICASSP 2023 - 2023 IEEE International Conference on
|
| 266 |
+
Acoustics, Speech and Signal Processing (ICASSP), pp. 1–5, June 2023.
|
| 267 |
+
ISSN: 2379-190X.
|
| 268 |
+
|
| 269 |
+
[6] NVlabs, “Flickr faces hq dataset.” https://github.com/NVlabs/ffhq-dataset, n.d. Accessed: 2025-03-04
|
| 270 |
+
|
| 271 |
+
[7] M. Iuliani, M. Fontani, and A. Piva, “A leak in prnu based source
|
| 272 |
+
identification—questioning fingerprint uniqueness,” IEEE Access, vol. 9, pp. 52455–52463, 2021.
|
README_HF.md
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Deepfake Detection Library
|
| 3 |
+
emoji: 🔍
|
| 4 |
+
colorFrom: red
|
| 5 |
+
colorTo: orange
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 4.44.0
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
license: mit
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
# Deepfake Detection Library
|
| 14 |
+
|
| 15 |
+
This Space provides a unified interface to test multiple state-of-the-art deepfake detection models on your images.
|
| 16 |
+
|
| 17 |
+
## Available Detectors
|
| 18 |
+
|
| 19 |
+
- **R50_TF** - ResNet-50 based detector trained on TrueFake dataset
|
| 20 |
+
- **R50_nodown** - ResNet-50 without downsampling operations
|
| 21 |
+
- **CLIP-D** - CLIP-based deepfake detector
|
| 22 |
+
- **P2G** - Prompt2Guard: Conditioned prompt-optimization for continual deepfake detection
|
| 23 |
+
- **NPR** - Neural Posterior Regularization
|
| 24 |
+
|
| 25 |
+
## Usage
|
| 26 |
+
|
| 27 |
+
1. Upload an image
|
| 28 |
+
2. Select a detector from the dropdown
|
| 29 |
+
3. Click "Detect" to get the prediction
|
| 30 |
+
|
| 31 |
+
The detector will return:
|
| 32 |
+
- **Prediction**: Real or Fake
|
| 33 |
+
- **Confidence**: Model confidence score (0-1)
|
| 34 |
+
- **Elapsed Time**: Processing time
|
| 35 |
+
|
| 36 |
+
## Models
|
| 37 |
+
|
| 38 |
+
All models have been pretrained on images generated with StyleGAN2 and StableDiffusionXL, and real images from the FFHQ Dataset and the FORLAB Dataset.
|
| 39 |
+
|
| 40 |
+
## References
|
| 41 |
+
|
| 42 |
+
For more information about the implementation and benchmarking, visit the [GitHub repository](https://github.com/truebees-ai/Image-Deepfake-Detectors-Public-Library).
|
| 43 |
+
|
| 44 |
+
## Note
|
| 45 |
+
|
| 46 |
+
⚠️ Due to file size limitations, model weights need to be downloaded automatically on first use. This may take a few moments.
|
__pycache__/app.cpython-310.pyc
ADDED
|
Binary file (2.31 kB). View file
|
|
|
app.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
+
import json
|
| 5 |
+
import argparse
|
| 6 |
+
from types import SimpleNamespace
|
| 7 |
+
from support.detect import run_detect
|
| 8 |
+
|
| 9 |
+
# Download weights on first run (for HF Spaces)
|
| 10 |
+
if os.environ.get("SPACE_ID"):
|
| 11 |
+
try:
|
| 12 |
+
from download_weights import download_all_weights
|
| 13 |
+
download_all_weights()
|
| 14 |
+
except Exception as e:
|
| 15 |
+
print(f"Warning: Could not download weights: {e}")
|
| 16 |
+
|
| 17 |
+
# Available detectors based on launcher.py
|
| 18 |
+
DETECTORS = ['R50_TF', 'R50_nodown', 'CLIP-D', 'P2G', 'NPR']
|
| 19 |
+
|
| 20 |
+
def predict(image_path, detector_name):
|
| 21 |
+
if not image_path:
|
| 22 |
+
return {"error": "Please upload an image."}
|
| 23 |
+
|
| 24 |
+
# Create a temporary output file path
|
| 25 |
+
output_path = "temp_result.json"
|
| 26 |
+
|
| 27 |
+
# Mock args object
|
| 28 |
+
args = SimpleNamespace(
|
| 29 |
+
image=image_path,
|
| 30 |
+
detector=detector_name,
|
| 31 |
+
config_dir='configs',
|
| 32 |
+
output=output_path,
|
| 33 |
+
weights='pretrained', # Use default/pretrained
|
| 34 |
+
device='cpu', # Force CPU
|
| 35 |
+
dry_run=False,
|
| 36 |
+
verbose=False
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
try:
|
| 40 |
+
# Run detection
|
| 41 |
+
# We need to capture stdout/stderr or just trust the function
|
| 42 |
+
# run_detect might raise FileNotFoundError if weights are missing
|
| 43 |
+
run_detect(args)
|
| 44 |
+
|
| 45 |
+
# Read results
|
| 46 |
+
if os.path.exists(output_path):
|
| 47 |
+
with open(output_path, 'r') as f:
|
| 48 |
+
result = json.load(f)
|
| 49 |
+
|
| 50 |
+
# Format output
|
| 51 |
+
prediction = result.get('prediction', 'Unknown')
|
| 52 |
+
confidence = result.get('confidence', 0.0)
|
| 53 |
+
elapsed_time = result.get('elapsed_time', 0.0)
|
| 54 |
+
|
| 55 |
+
return {
|
| 56 |
+
"Prediction": prediction,
|
| 57 |
+
"Confidence": f"{confidence:.4f}",
|
| 58 |
+
"Elapsed Time": f"{elapsed_time:.3f}s"
|
| 59 |
+
}
|
| 60 |
+
else:
|
| 61 |
+
return {"error": "No result file generated. Check console logs for details."}
|
| 62 |
+
|
| 63 |
+
except FileNotFoundError as e:
|
| 64 |
+
return {"error": str(e), "message": f"Please ensure you have downloaded the weights for {detector_name}."}
|
| 65 |
+
except Exception as e:
|
| 66 |
+
return {"error": str(e)}
|
| 67 |
+
finally:
|
| 68 |
+
# Cleanup
|
| 69 |
+
if os.path.exists(output_path):
|
| 70 |
+
os.remove(output_path)
|
| 71 |
+
|
| 72 |
+
# Create Gradio Interface
|
| 73 |
+
with gr.Blocks(title="Deepfake Detection", theme=gr.themes.Soft()) as demo:
|
| 74 |
+
gr.Markdown("# 🔍 Deepfake Detection Library")
|
| 75 |
+
gr.Markdown("""
|
| 76 |
+
Upload an image and select a detector to check if it's real or fake.
|
| 77 |
+
|
| 78 |
+
**Available Detectors:**
|
| 79 |
+
- **R50_TF**: ResNet-50 based detector
|
| 80 |
+
- **R50_nodown**: ResNet-50 without downsampling
|
| 81 |
+
- **CLIP-D**: CLIP-based detector
|
| 82 |
+
- **P2G**: Prompt2Guard detector
|
| 83 |
+
- **NPR**: Neural Posterior Regularization
|
| 84 |
+
""")
|
| 85 |
+
|
| 86 |
+
with gr.Row():
|
| 87 |
+
with gr.Column():
|
| 88 |
+
image_input = gr.Image(type="filepath", label="Input Image", height=400)
|
| 89 |
+
detector_input = gr.Dropdown(
|
| 90 |
+
choices=DETECTORS,
|
| 91 |
+
value=DETECTORS[0],
|
| 92 |
+
label="Select Detector",
|
| 93 |
+
info="Choose which deepfake detection model to use"
|
| 94 |
+
)
|
| 95 |
+
submit_btn = gr.Button("🔍 Detect", variant="primary")
|
| 96 |
+
|
| 97 |
+
with gr.Column():
|
| 98 |
+
output_json = gr.JSON(label="Detection Results")
|
| 99 |
+
|
| 100 |
+
gr.Markdown("""
|
| 101 |
+
---
|
| 102 |
+
### About
|
| 103 |
+
This Space provides access to multiple state-of-the-art deepfake detection models.
|
| 104 |
+
All models are trained on StyleGAN2, StableDiffusionXL, FFHQ, and FORLAB datasets.
|
| 105 |
+
|
| 106 |
+
**Note:** First detection may be slower due to model loading.
|
| 107 |
+
""")
|
| 108 |
+
|
| 109 |
+
submit_btn.click(
|
| 110 |
+
fn=predict,
|
| 111 |
+
inputs=[image_input, detector_input],
|
| 112 |
+
outputs=output_json
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
if __name__ == "__main__":
|
| 116 |
+
# For HF Spaces, share is automatically enabled
|
| 117 |
+
demo.launch()
|
configs/CLIP-D.yaml
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
global:
|
| 2 |
+
dataset_path: <PATH_TO_DATASET>
|
| 3 |
+
device_override: cpu # null for auto-selection
|
| 4 |
+
min_vram: 16000
|
| 5 |
+
split_file: ./split.json
|
| 6 |
+
num_threads: 8
|
| 7 |
+
dry_run: false
|
| 8 |
+
only_list: false
|
| 9 |
+
|
| 10 |
+
detector_args:
|
| 11 |
+
- "--arch"
|
| 12 |
+
- "opencliplinearnext_clipL14commonpool"
|
| 13 |
+
- "--norm_type"
|
| 14 |
+
- "clip"
|
| 15 |
+
- "--resize_size"
|
| 16 |
+
- "200"
|
| 17 |
+
- "--resize_ratio"
|
| 18 |
+
- "1"
|
| 19 |
+
- "--resize_prob"
|
| 20 |
+
- "0.2"
|
| 21 |
+
- "--cmp_qual"
|
| 22 |
+
- "65,100"
|
| 23 |
+
- "--cmp_prob"
|
| 24 |
+
- "0.5"
|
| 25 |
+
- "--resizeSize"
|
| 26 |
+
- "224"
|
| 27 |
+
|
| 28 |
+
training:
|
| 29 |
+
- data: gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre
|
| 30 |
+
|
| 31 |
+
testing:
|
| 32 |
+
- realFFHQ:fb
|
| 33 |
+
- realFORLAB:fb
|
| 34 |
+
- gan1:fb
|
| 35 |
+
- gan2:fb
|
| 36 |
+
- gan3:fb
|
| 37 |
+
- sd15:fb
|
| 38 |
+
- sd2:fb
|
| 39 |
+
- sd3:fb
|
| 40 |
+
- sdXL:fb
|
| 41 |
+
- flux:fb
|
| 42 |
+
- realFFHQ:tl
|
| 43 |
+
- realFORLAB:tl
|
| 44 |
+
- gan1:tl
|
| 45 |
+
- gan2:tl
|
| 46 |
+
- gan3:tl
|
| 47 |
+
- sd15:tl
|
| 48 |
+
- sd2:tl
|
| 49 |
+
- sd3:tl
|
| 50 |
+
- sdXL:tl
|
| 51 |
+
- flux:tl
|
| 52 |
+
- realFFHQ:tw
|
| 53 |
+
- realFORLAB:tw
|
| 54 |
+
- gan1:tw
|
| 55 |
+
- gan2:tw
|
| 56 |
+
- gan3:tw
|
| 57 |
+
- sd15:tw
|
| 58 |
+
- sd2:tw
|
| 59 |
+
- sd3:tw
|
| 60 |
+
- sdXL:tw
|
| 61 |
+
- flux:tw
|
| 62 |
+
- realFFHQ:pre
|
| 63 |
+
- realFORLAB:pre
|
| 64 |
+
- gan1:pre
|
| 65 |
+
- gan2:pre
|
| 66 |
+
- gan3:pre
|
| 67 |
+
- sd15:pre
|
| 68 |
+
- sd2:pre
|
| 69 |
+
- sd3:pre
|
| 70 |
+
- sdXL:pre
|
| 71 |
+
- flux:pre
|
| 72 |
+
|
configs/NPR.yaml
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
global:
|
| 2 |
+
dataset_path: <PATH_TO_DATASET>
|
| 3 |
+
device_override: cpu # null for auto-selection
|
| 4 |
+
min_vram: 16000
|
| 5 |
+
split_file: ./split.json
|
| 6 |
+
num_threads: 8
|
| 7 |
+
dry_run: false
|
| 8 |
+
only_list: false
|
| 9 |
+
|
| 10 |
+
detector_args: []
|
| 11 |
+
|
| 12 |
+
training:
|
| 13 |
+
- data: gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre
|
| 14 |
+
|
| 15 |
+
testing:
|
| 16 |
+
- realFFHQ:fb
|
| 17 |
+
- realFORLAB:fb
|
| 18 |
+
- gan1:fb
|
| 19 |
+
- gan2:fb
|
| 20 |
+
- gan3:fb
|
| 21 |
+
- sd15:fb
|
| 22 |
+
- sd2:fb
|
| 23 |
+
- sd3:fb
|
| 24 |
+
- sdXL:fb
|
| 25 |
+
- flux:fb
|
| 26 |
+
- realFFHQ:tl
|
| 27 |
+
- realFORLAB:tl
|
| 28 |
+
- gan1:tl
|
| 29 |
+
- gan2:tl
|
| 30 |
+
- gan3:tl
|
| 31 |
+
- sd15:tl
|
| 32 |
+
- sd2:tl
|
| 33 |
+
- sd3:tl
|
| 34 |
+
- sdXL:tl
|
| 35 |
+
- flux:tl
|
| 36 |
+
- realFFHQ:tw
|
| 37 |
+
- realFORLAB:tw
|
| 38 |
+
- gan1:tw
|
| 39 |
+
- gan2:tw
|
| 40 |
+
- gan3:tw
|
| 41 |
+
- sd15:tw
|
| 42 |
+
- sd2:tw
|
| 43 |
+
- sd3:tw
|
| 44 |
+
- sdXL:tw
|
| 45 |
+
- flux:tw
|
| 46 |
+
- realFFHQ:pre
|
| 47 |
+
- realFORLAB:pre
|
| 48 |
+
- gan1:pre
|
| 49 |
+
- gan2:pre
|
| 50 |
+
- gan3:pre
|
| 51 |
+
- sd15:pre
|
| 52 |
+
- sd2:pre
|
| 53 |
+
- sd3:pre
|
| 54 |
+
- sdXL:pre
|
| 55 |
+
- flux:pre
|
| 56 |
+
|
configs/P2G.yaml
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
global:
|
| 2 |
+
dataset_path: <PATH_TO_DATASET>
|
| 3 |
+
device_override: cpu # null for auto-selection
|
| 4 |
+
min_vram: 16000
|
| 5 |
+
split_file: ./split.json
|
| 6 |
+
num_threads: 8
|
| 7 |
+
dry_run: false
|
| 8 |
+
only_list: false
|
| 9 |
+
|
| 10 |
+
detector_args: []
|
| 11 |
+
|
| 12 |
+
training:
|
| 13 |
+
- data: gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre
|
| 14 |
+
|
| 15 |
+
testing:
|
| 16 |
+
- realFFHQ:fb
|
| 17 |
+
- realFORLAB:fb
|
| 18 |
+
- gan1:fb
|
| 19 |
+
- gan2:fb
|
| 20 |
+
- gan3:fb
|
| 21 |
+
- sd15:fb
|
| 22 |
+
- sd2:fb
|
| 23 |
+
- sd3:fb
|
| 24 |
+
- sdXL:fb
|
| 25 |
+
- flux:fb
|
| 26 |
+
- realFFHQ:tl
|
| 27 |
+
- realFORLAB:tl
|
| 28 |
+
- gan1:tl
|
| 29 |
+
- gan2:tl
|
| 30 |
+
- gan3:tl
|
| 31 |
+
- sd15:tl
|
| 32 |
+
- sd2:tl
|
| 33 |
+
- sd3:tl
|
| 34 |
+
- sdXL:tl
|
| 35 |
+
- flux:tl
|
| 36 |
+
- realFFHQ:tw
|
| 37 |
+
- realFORLAB:tw
|
| 38 |
+
- gan1:tw
|
| 39 |
+
- gan2:tw
|
| 40 |
+
- gan3:tw
|
| 41 |
+
- sd15:tw
|
| 42 |
+
- sd2:tw
|
| 43 |
+
- sd3:tw
|
| 44 |
+
- sdXL:tw
|
| 45 |
+
- flux:tw
|
| 46 |
+
- realFFHQ:pre
|
| 47 |
+
- realFORLAB:pre
|
| 48 |
+
- gan1:pre
|
| 49 |
+
- gan2:pre
|
| 50 |
+
- gan3:pre
|
| 51 |
+
- sd15:pre
|
| 52 |
+
- sd2:pre
|
| 53 |
+
- sd3:pre
|
| 54 |
+
- sdXL:pre
|
| 55 |
+
- flux:pre
|
| 56 |
+
|
configs/R50_TF.yaml
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
global:
|
| 2 |
+
dataset_path: <PATH_TO_DATASET>
|
| 3 |
+
device_override: cpu # null for auto-selection
|
| 4 |
+
min_vram: 16000
|
| 5 |
+
split_file: ./split.json
|
| 6 |
+
num_threads: 8
|
| 7 |
+
dry_run: false
|
| 8 |
+
only_list: false
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
detector_args:
|
| 12 |
+
- "--arch"
|
| 13 |
+
- "nodown"
|
| 14 |
+
- "--prototype"
|
| 15 |
+
- "--freeze"
|
| 16 |
+
|
| 17 |
+
training:
|
| 18 |
+
- data: gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre
|
| 19 |
+
|
| 20 |
+
testing:
|
| 21 |
+
- realFFHQ:fb
|
| 22 |
+
- realFORLAB:fb
|
| 23 |
+
- gan1:fb
|
| 24 |
+
- gan2:fb
|
| 25 |
+
- gan3:fb
|
| 26 |
+
- sd15:fb
|
| 27 |
+
- sd2:fb
|
| 28 |
+
- sd3:fb
|
| 29 |
+
- sdXL:fb
|
| 30 |
+
- flux:fb
|
| 31 |
+
- realFFHQ:tl
|
| 32 |
+
- realFORLAB:tl
|
| 33 |
+
- gan1:tl
|
| 34 |
+
- gan2:tl
|
| 35 |
+
- gan3:tl
|
| 36 |
+
- sd15:tl
|
| 37 |
+
- sd2:tl
|
| 38 |
+
- sd3:tl
|
| 39 |
+
- sdXL:tl
|
| 40 |
+
- flux:tl
|
| 41 |
+
- realFFHQ:tw
|
| 42 |
+
- realFORLAB:tw
|
| 43 |
+
- gan1:tw
|
| 44 |
+
- gan2:tw
|
| 45 |
+
- gan3:tw
|
| 46 |
+
- sd15:tw
|
| 47 |
+
- sd2:tw
|
| 48 |
+
- sd3:tw
|
| 49 |
+
- sdXL:tw
|
| 50 |
+
- flux:tw
|
| 51 |
+
- realFFHQ:pre
|
| 52 |
+
- realFORLAB:pre
|
| 53 |
+
- gan1:pre
|
| 54 |
+
- gan2:pre
|
| 55 |
+
- gan3:pre
|
| 56 |
+
- sd15:pre
|
| 57 |
+
- sd2:pre
|
| 58 |
+
- sd3:pre
|
| 59 |
+
- sdXL:pre
|
| 60 |
+
- flux:pre
|
| 61 |
+
|
configs/R50_nodown.yaml
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
global:
|
| 2 |
+
dataset_path: <PATH_TO_DATASET>
|
| 3 |
+
device_override: cpu # null for auto-selection
|
| 4 |
+
min_vram: 16000
|
| 5 |
+
split_file: ./split.json
|
| 6 |
+
num_threads: 8
|
| 7 |
+
dry_run: false
|
| 8 |
+
only_list: false
|
| 9 |
+
|
| 10 |
+
detector_args:
|
| 11 |
+
- "--arch"
|
| 12 |
+
- "res50nodown"
|
| 13 |
+
- "--norm_type"
|
| 14 |
+
- "resnet"
|
| 15 |
+
- "--resize_size"
|
| 16 |
+
- "256"
|
| 17 |
+
- "--resize_ratio"
|
| 18 |
+
- "0.75"
|
| 19 |
+
- "--resize_prob"
|
| 20 |
+
- "0.2"
|
| 21 |
+
- "--cmp_qual"
|
| 22 |
+
- "30,100"
|
| 23 |
+
- "--cmp_prob"
|
| 24 |
+
- "0.5"
|
| 25 |
+
- "--cropSize"
|
| 26 |
+
- "96"
|
| 27 |
+
- "--blur_sig"
|
| 28 |
+
- "0.1,3.0"
|
| 29 |
+
- "--blur_prob"
|
| 30 |
+
- "0.5"
|
| 31 |
+
- "--jitter_prob"
|
| 32 |
+
- "0.8"
|
| 33 |
+
- "--colordist_prob"
|
| 34 |
+
- "0.2"
|
| 35 |
+
- "--cutout_prob"
|
| 36 |
+
- "0.2"
|
| 37 |
+
- "--noise_prob"
|
| 38 |
+
- "0.2"
|
| 39 |
+
|
| 40 |
+
training:
|
| 41 |
+
- data: gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre
|
| 42 |
+
|
| 43 |
+
testing:
|
| 44 |
+
- realFFHQ:fb
|
| 45 |
+
- realFORLAB:fb
|
| 46 |
+
- gan1:fb
|
| 47 |
+
- gan2:fb
|
| 48 |
+
- gan3:fb
|
| 49 |
+
- sd15:fb
|
| 50 |
+
- sd2:fb
|
| 51 |
+
- sd3:fb
|
| 52 |
+
- sdXL:fb
|
| 53 |
+
- flux:fb
|
| 54 |
+
- realFFHQ:tl
|
| 55 |
+
- realFORLAB:tl
|
| 56 |
+
- gan1:tl
|
| 57 |
+
- gan2:tl
|
| 58 |
+
- gan3:tl
|
| 59 |
+
- sd15:tl
|
| 60 |
+
- sd2:tl
|
| 61 |
+
- sd3:tl
|
| 62 |
+
- sdXL:tl
|
| 63 |
+
- flux:tl
|
| 64 |
+
- realFFHQ:tw
|
| 65 |
+
- realFORLAB:tw
|
| 66 |
+
- gan1:tw
|
| 67 |
+
- gan2:tw
|
| 68 |
+
- gan3:tw
|
| 69 |
+
- sd15:tw
|
| 70 |
+
- sd2:tw
|
| 71 |
+
- sd3:tw
|
| 72 |
+
- sdXL:tw
|
| 73 |
+
- flux:tw
|
| 74 |
+
- realFFHQ:pre
|
| 75 |
+
- realFORLAB:pre
|
| 76 |
+
- gan1:pre
|
| 77 |
+
- gan2:pre
|
| 78 |
+
- gan3:pre
|
| 79 |
+
- sd15:pre
|
| 80 |
+
- sd2:pre
|
| 81 |
+
- sd3:pre
|
| 82 |
+
- sdXL:pre
|
| 83 |
+
- flux:pre
|
| 84 |
+
|
demo_images/README.md
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Download Demo Dataset
|
| 2 |
+
|
| 3 |
+
You can download of demo dataset from this [link](https://drive.google.com/file/d/134Bw8l9tEC7oZJpTAeMO80QRqgdJfJS9/view?usp=sharing). The demo dataset contains 200 images randomly samples from the DeepShield Dataset. \
|
| 4 |
+
|
| 5 |
+
Place sample images for quick testing in `demo_images/`, organized by platform and label:
|
| 6 |
+
|
| 7 |
+
```
|
| 8 |
+
demo_images/
|
| 9 |
+
Facebook/
|
| 10 |
+
Fake/
|
| 11 |
+
Real/
|
| 12 |
+
PreSocial/
|
| 13 |
+
Fake/
|
| 14 |
+
Real/
|
| 15 |
+
Telegram/
|
| 16 |
+
Fake/
|
| 17 |
+
Real/
|
| 18 |
+
X/
|
| 19 |
+
Fake/
|
| 20 |
+
Real/
|
| 21 |
+
```
|
detectors/.DS_Store
ADDED
|
Binary file (10.2 kB). View file
|
|
|
detectors/.gitattributes
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
detectors/CLIP-D/.DS_Store
ADDED
|
Binary file (8.2 kB). View file
|
|
|
detectors/CLIP-D/LICENSE
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
Apache License
|
| 3 |
+
Version 2.0, January 2004
|
| 4 |
+
http://www.apache.org/licenses/
|
| 5 |
+
|
| 6 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
| 7 |
+
|
| 8 |
+
1. Definitions.
|
| 9 |
+
|
| 10 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
| 11 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
| 12 |
+
|
| 13 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
| 14 |
+
the copyright owner that is granting the License.
|
| 15 |
+
|
| 16 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
| 17 |
+
other entities that control, are controlled by, or are under common
|
| 18 |
+
control with that entity. For the purposes of this definition,
|
| 19 |
+
"control" means (i) the power, direct or indirect, to cause the
|
| 20 |
+
direction or management of such entity, whether by contract or
|
| 21 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
| 22 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
| 23 |
+
|
| 24 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
| 25 |
+
exercising permissions granted by this License.
|
| 26 |
+
|
| 27 |
+
"Source" form shall mean the preferred form for making modifications,
|
| 28 |
+
including but not limited to software source code, documentation
|
| 29 |
+
source, and configuration files.
|
| 30 |
+
|
| 31 |
+
"Object" form shall mean any form resulting from mechanical
|
| 32 |
+
transformation or translation of a Source form, including but
|
| 33 |
+
not limited to compiled object code, generated documentation,
|
| 34 |
+
and conversions to other media types.
|
| 35 |
+
|
| 36 |
+
"Work" shall mean the work of authorship, whether in Source or
|
| 37 |
+
Object form, made available under the License, as indicated by a
|
| 38 |
+
copyright notice that is included in or attached to the work
|
| 39 |
+
(an example is provided in the Appendix below).
|
| 40 |
+
|
| 41 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
| 42 |
+
form, that is based on (or derived from) the Work and for which the
|
| 43 |
+
editorial revisions, annotations, elaborations, or other modifications
|
| 44 |
+
represent, as a whole, an original work of authorship. For the purposes
|
| 45 |
+
of this License, Derivative Works shall not include works that remain
|
| 46 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
| 47 |
+
the Work and Derivative Works thereof.
|
| 48 |
+
|
| 49 |
+
"Contribution" shall mean any work of authorship, including
|
| 50 |
+
the original version of the Work and any modifications or additions
|
| 51 |
+
to that Work or Derivative Works thereof, that is intentionally
|
| 52 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
| 53 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
| 54 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
| 55 |
+
means any form of electronic, verbal, or written communication sent
|
| 56 |
+
to the Licensor or its representatives, including but not limited to
|
| 57 |
+
communication on electronic mailing lists, source code control systems,
|
| 58 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
| 59 |
+
Licensor for the purpose of discussing and improving the Work, but
|
| 60 |
+
excluding communication that is conspicuously marked or otherwise
|
| 61 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
| 62 |
+
|
| 63 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
| 64 |
+
on behalf of whom a Contribution has been received by Licensor and
|
| 65 |
+
subsequently incorporated within the Work.
|
| 66 |
+
|
| 67 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
| 68 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 69 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 70 |
+
copyright license to reproduce, prepare Derivative Works of,
|
| 71 |
+
publicly display, publicly perform, sublicense, and distribute the
|
| 72 |
+
Work and such Derivative Works in Source or Object form.
|
| 73 |
+
|
| 74 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
| 75 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 76 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 77 |
+
(except as stated in this section) patent license to make, have made,
|
| 78 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
| 79 |
+
where such license applies only to those patent claims licensable
|
| 80 |
+
by such Contributor that are necessarily infringed by their
|
| 81 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
| 82 |
+
with the Work to which such Contribution(s) was submitted. If You
|
| 83 |
+
institute patent litigation against any entity (including a
|
| 84 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
| 85 |
+
or a Contribution incorporated within the Work constitutes direct
|
| 86 |
+
or contributory patent infringement, then any patent licenses
|
| 87 |
+
granted to You under this License for that Work shall terminate
|
| 88 |
+
as of the date such litigation is filed.
|
| 89 |
+
|
| 90 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
| 91 |
+
Work or Derivative Works thereof in any medium, with or without
|
| 92 |
+
modifications, and in Source or Object form, provided that You
|
| 93 |
+
meet the following conditions:
|
| 94 |
+
|
| 95 |
+
(a) You must give any other recipients of the Work or
|
| 96 |
+
Derivative Works a copy of this License; and
|
| 97 |
+
|
| 98 |
+
(b) You must cause any modified files to carry prominent notices
|
| 99 |
+
stating that You changed the files; and
|
| 100 |
+
|
| 101 |
+
(c) You must retain, in the Source form of any Derivative Works
|
| 102 |
+
that You distribute, all copyright, patent, trademark, and
|
| 103 |
+
attribution notices from the Source form of the Work,
|
| 104 |
+
excluding those notices that do not pertain to any part of
|
| 105 |
+
the Derivative Works; and
|
| 106 |
+
|
| 107 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
| 108 |
+
distribution, then any Derivative Works that You distribute must
|
| 109 |
+
include a readable copy of the attribution notices contained
|
| 110 |
+
within such NOTICE file, excluding those notices that do not
|
| 111 |
+
pertain to any part of the Derivative Works, in at least one
|
| 112 |
+
of the following places: within a NOTICE text file distributed
|
| 113 |
+
as part of the Derivative Works; within the Source form or
|
| 114 |
+
documentation, if provided along with the Derivative Works; or,
|
| 115 |
+
within a display generated by the Derivative Works, if and
|
| 116 |
+
wherever such third-party notices normally appear. The contents
|
| 117 |
+
of the NOTICE file are for informational purposes only and
|
| 118 |
+
do not modify the License. You may add Your own attribution
|
| 119 |
+
notices within Derivative Works that You distribute, alongside
|
| 120 |
+
or as an addendum to the NOTICE text from the Work, provided
|
| 121 |
+
that such additional attribution notices cannot be construed
|
| 122 |
+
as modifying the License.
|
| 123 |
+
|
| 124 |
+
You may add Your own copyright statement to Your modifications and
|
| 125 |
+
may provide additional or different license terms and conditions
|
| 126 |
+
for use, reproduction, or distribution of Your modifications, or
|
| 127 |
+
for any such Derivative Works as a whole, provided Your use,
|
| 128 |
+
reproduction, and distribution of the Work otherwise complies with
|
| 129 |
+
the conditions stated in this License.
|
| 130 |
+
|
| 131 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
| 132 |
+
any Contribution intentionally submitted for inclusion in the Work
|
| 133 |
+
by You to the Licensor shall be under the terms and conditions of
|
| 134 |
+
this License, without any additional terms or conditions.
|
| 135 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
| 136 |
+
the terms of any separate license agreement you may have executed
|
| 137 |
+
with Licensor regarding such Contributions.
|
| 138 |
+
|
| 139 |
+
6. Trademarks. This License does not grant permission to use the trade
|
| 140 |
+
names, trademarks, service marks, or product names of the Licensor,
|
| 141 |
+
except as required for reasonable and customary use in describing the
|
| 142 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
| 143 |
+
|
| 144 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
| 145 |
+
agreed to in writing, Licensor provides the Work (and each
|
| 146 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
| 147 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
| 148 |
+
implied, including, without limitation, any warranties or conditions
|
| 149 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
| 150 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
| 151 |
+
appropriateness of using or redistributing the Work and assume any
|
| 152 |
+
risks associated with Your exercise of permissions under this License.
|
| 153 |
+
|
| 154 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
| 155 |
+
whether in tort (including negligence), contract, or otherwise,
|
| 156 |
+
unless required by applicable law (such as deliberate and grossly
|
| 157 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
| 158 |
+
liable to You for damages, including any direct, indirect, special,
|
| 159 |
+
incidental, or consequential damages of any character arising as a
|
| 160 |
+
result of this License or out of the use or inability to use the
|
| 161 |
+
Work (including but not limited to damages for loss of goodwill,
|
| 162 |
+
work stoppage, computer failure or malfunction, or any and all
|
| 163 |
+
other commercial damages or losses), even if such Contributor
|
| 164 |
+
has been advised of the possibility of such damages.
|
| 165 |
+
|
| 166 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
| 167 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
| 168 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
| 169 |
+
or other liability obligations and/or rights consistent with this
|
| 170 |
+
License. However, in accepting such obligations, You may act only
|
| 171 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
| 172 |
+
of any other Contributor, and only if You agree to indemnify,
|
| 173 |
+
defend, and hold each Contributor harmless for any liability
|
| 174 |
+
incurred by, or claims asserted against, such Contributor by reason
|
| 175 |
+
of your accepting any such warranty or additional liability.
|
| 176 |
+
|
| 177 |
+
END OF TERMS AND CONDITIONS
|
| 178 |
+
|
| 179 |
+
APPENDIX: How to apply the Apache License to your work.
|
| 180 |
+
|
| 181 |
+
To apply the Apache License to your work, attach the following
|
| 182 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
| 183 |
+
replaced with your own identifying information. (Don't include
|
| 184 |
+
the brackets!) The text should be enclosed in the appropriate
|
| 185 |
+
comment syntax for the file format. We also recommend that a
|
| 186 |
+
file or class name and description of purpose be included on the
|
| 187 |
+
same "printed page" as the copyright notice for easier
|
| 188 |
+
identification within third-party archives.
|
| 189 |
+
|
| 190 |
+
Copyright [yyyy] [name of copyright owner]
|
| 191 |
+
|
| 192 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 193 |
+
you may not use this file except in compliance with the License.
|
| 194 |
+
You may obtain a copy of the License at
|
| 195 |
+
|
| 196 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
| 197 |
+
|
| 198 |
+
Unless required by applicable law or agreed to in writing, software
|
| 199 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
| 200 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 201 |
+
See the License for the specific language governing permissions and
|
| 202 |
+
limitations under the License.
|
detectors/CLIP-D/README.md
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ClipBased-SyntheticImageDetection
|
| 2 |
+
|
| 3 |
+
[](https://grip-unina.github.io/ClipBased-SyntheticImageDetection/)
|
| 4 |
+
[](https://arxiv.org/abs/2312.00195v2)
|
| 5 |
+
[](https://www.grip.unina.it)
|
| 6 |
+
|
| 7 |
+
Original Paper:
|
| 8 |
+
[Raising the Bar of AI-generated Image Detection with CLIP](https://arxiv.org/abs/2312.00195v2).
|
| 9 |
+
|
| 10 |
+
Authors: Davide Cozzolino, Giovanni Poggi, Riccardo Corvi, Matthias Nießner, and Luisa Verdoliva.
|
| 11 |
+
|
| 12 |
+
## Abstract
|
| 13 |
+
|
| 14 |
+
The aim of this work is to explore the potential of pre-trained vision-language models (VLMs) for universal detection of AI-generated images. We develop a lightweight detection strategy based on CLIP features and study its performance in a wide variety of challenging scenarios. We find that, contrary to previous beliefs, it is neither necessary nor convenient to use a large domain-specific dataset for training. On the contrary, by using only a handful of example images from a single generative model, a CLIP-based detector exhibits surprising generalization ability and high robustness across different architectures, including recent commercial tools such as Dalle-3, Midjourney v5, and Firefly. We match the state-of-the-art (SoTA) on in-distribution data and significantly improve upon it in terms of generalization to out-of-distribution data (+6% AUC) and robustness to impaired/laundered data (+13%).
|
| 15 |
+
|
| 16 |
+
## Please Cite
|
| 17 |
+
|
| 18 |
+
```
|
| 19 |
+
@inproceedings{cozzolino2023raising,
|
| 20 |
+
author={Davide Cozzolino and Giovanni Poggi and
|
| 21 |
+
Riccardo Corvi and Matthias Nießner and Luisa
|
| 22 |
+
Verdoliva},
|
| 23 |
+
title={{Raising the Bar of AI-generated Image
|
| 24 |
+
Detection with CLIP}},
|
| 25 |
+
booktitle={IEEE/CVF Conference on Computer Vision
|
| 26 |
+
and Pattern Recognition Workshops (CVPRW)},
|
| 27 |
+
year={2024},
|
| 28 |
+
}
|
| 29 |
+
```
|
detectors/CLIP-D/__pycache__/parser.cpython-310.pyc
ADDED
|
Binary file (1.3 kB). View file
|
|
|
detectors/CLIP-D/checkpoint/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
detectors/CLIP-D/checkpoint/pretrained/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
detectors/CLIP-D/checkpoint/pretrained/weights/best.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34315ca92adbf17921f5aa1e28f8fe5bf7d56dd8126205c9e264cbfb26582d12
|
| 3 |
+
size 15452
|
detectors/CLIP-D/detect.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ----------------------------------------------------------------------------
|
| 2 |
+
# IMPORTS
|
| 3 |
+
# ----------------------------------------------------------------------------
|
| 4 |
+
import os
|
| 5 |
+
import sys
|
| 6 |
+
import time
|
| 7 |
+
import torch
|
| 8 |
+
import argparse
|
| 9 |
+
from PIL import Image
|
| 10 |
+
import torchvision.transforms as transforms
|
| 11 |
+
|
| 12 |
+
# Add project root to path for imports
|
| 13 |
+
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 14 |
+
sys.path.append(project_root)
|
| 15 |
+
from support.detect_utils import format_result, save_result, get_device
|
| 16 |
+
|
| 17 |
+
from networks import create_architecture
|
| 18 |
+
|
| 19 |
+
# ----------------------------------------------------------------------------
|
| 20 |
+
# IMAGE PREPROCESSING
|
| 21 |
+
# ----------------------------------------------------------------------------
|
| 22 |
+
def preprocess_image(image_path, size=224):
|
| 23 |
+
"""Load and preprocess a single image for model input."""
|
| 24 |
+
if not os.path.exists(image_path):
|
| 25 |
+
raise FileNotFoundError(f"Image not found: {image_path}")
|
| 26 |
+
|
| 27 |
+
image = Image.open(image_path).convert('RGB')
|
| 28 |
+
transform = transforms.Compose([
|
| 29 |
+
transforms.Resize(size),
|
| 30 |
+
transforms.CenterCrop(size),
|
| 31 |
+
transforms.ToTensor(),
|
| 32 |
+
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
| 33 |
+
])
|
| 34 |
+
return transform(image).unsqueeze(0) # Add batch dimension
|
| 35 |
+
|
| 36 |
+
# ----------------------------------------------------------------------------
|
| 37 |
+
# ARGUMENT PARSING
|
| 38 |
+
# ----------------------------------------------------------------------------
|
| 39 |
+
def parse_args():
|
| 40 |
+
parser = argparse.ArgumentParser(description='CLIP-D single image detector')
|
| 41 |
+
parser.add_argument('--image', type=str, required=True, help='Path to input image')
|
| 42 |
+
parser.add_argument('--model', type=str, default='pretrained', help='Name of the model checkpoint directory')
|
| 43 |
+
parser.add_argument('--output', type=str, help='Path to save detection result JSON')
|
| 44 |
+
parser.add_argument('--device', type=str, help='Device to run on (e.g., cuda:0, cuda:1, cpu)')
|
| 45 |
+
return parser.parse_args()
|
| 46 |
+
|
| 47 |
+
def main():
|
| 48 |
+
args = parse_args()
|
| 49 |
+
|
| 50 |
+
# Setup device
|
| 51 |
+
device = torch.device(args.device) if args.device else get_device()
|
| 52 |
+
|
| 53 |
+
# Load model
|
| 54 |
+
try:
|
| 55 |
+
load_path = f'./detectors/CLIP-D/checkpoint/{args.model}/weights/best.pt'
|
| 56 |
+
|
| 57 |
+
if not os.path.exists(load_path):
|
| 58 |
+
raise FileNotFoundError(f"Model weights not found at: {load_path}")
|
| 59 |
+
|
| 60 |
+
checkpoint = torch.load(load_path, map_location=device)
|
| 61 |
+
# Initialize model and load state
|
| 62 |
+
model = create_architecture("opencliplinearnext_clipL14commonpool", pretrained=False, num_classes=1).to(device)
|
| 63 |
+
if 'model' in checkpoint:
|
| 64 |
+
model.load_state_dict(checkpoint['model'])
|
| 65 |
+
else:
|
| 66 |
+
model.load_state_dict(checkpoint)
|
| 67 |
+
model.eval()
|
| 68 |
+
except Exception as e:
|
| 69 |
+
print(f"Error loading model: {e}")
|
| 70 |
+
return
|
| 71 |
+
|
| 72 |
+
# Load and preprocess image
|
| 73 |
+
try:
|
| 74 |
+
image_tensor = preprocess_image(args.image)
|
| 75 |
+
image_tensor = image_tensor.to(device)
|
| 76 |
+
except Exception as e:
|
| 77 |
+
print(f"Error loading image: {e}")
|
| 78 |
+
return
|
| 79 |
+
|
| 80 |
+
# Run detection
|
| 81 |
+
start_time = time.time()
|
| 82 |
+
with torch.no_grad():
|
| 83 |
+
try:
|
| 84 |
+
score = model(image_tensor)
|
| 85 |
+
prediction = torch.sigmoid(score)
|
| 86 |
+
|
| 87 |
+
confidence = prediction.item()
|
| 88 |
+
|
| 89 |
+
result = format_result(
|
| 90 |
+
'fake' if confidence>0.5 else 'real',
|
| 91 |
+
confidence,
|
| 92 |
+
time.time() - start_time
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
# Print result
|
| 96 |
+
print(f"Prediction: {result['prediction']}")
|
| 97 |
+
print(f"Confidence: {result['confidence']:.4f}")
|
| 98 |
+
print(f"Time: {result['elapsed_time']:.3f}s")
|
| 99 |
+
|
| 100 |
+
# Save result if output path provided
|
| 101 |
+
if args.output:
|
| 102 |
+
save_result(result, args.output)
|
| 103 |
+
|
| 104 |
+
except Exception as e:
|
| 105 |
+
print(f"Error during detection: {e}")
|
| 106 |
+
return
|
| 107 |
+
|
| 108 |
+
if __name__ == '__main__':
|
| 109 |
+
main()
|
detectors/CLIP-D/networks/__init__.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
'''
|
| 2 |
+
Copyright 2024 Image Processing Research Group of University Federico
|
| 3 |
+
II of Naples ('GRIP-UNINA'). All rights reserved.
|
| 4 |
+
|
| 5 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 6 |
+
you may not use this file except in compliance with the License.
|
| 7 |
+
You may obtain a copy of the License at
|
| 8 |
+
|
| 9 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
|
| 11 |
+
Unless required by applicable law or agreed to in writing, software
|
| 12 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
| 13 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 14 |
+
See the License for the specific language governing permissions and
|
| 15 |
+
limitations under the License.
|
| 16 |
+
'''
|
| 17 |
+
|
| 18 |
+
def create_architecture(name_arch, pretrained=False, num_classes=1):
|
| 19 |
+
if name_arch == "res50nodown":
|
| 20 |
+
from .resnet_mod import resnet50
|
| 21 |
+
|
| 22 |
+
if pretrained:
|
| 23 |
+
model = resnet50(pretrained=True, stride0=1, dropout=0.5).change_output(num_classes)
|
| 24 |
+
else:
|
| 25 |
+
model = resnet50(num_classes=num_classes, stride0=1, dropout=0.5)
|
| 26 |
+
elif name_arch == "res50":
|
| 27 |
+
from .resnet_mod import resnet50
|
| 28 |
+
|
| 29 |
+
if pretrained:
|
| 30 |
+
model = resnet50(pretrained=True, stride0=2).change_output(num_classes)
|
| 31 |
+
else:
|
| 32 |
+
model = resnet50(num_classes=num_classes, stride0=2)
|
| 33 |
+
elif name_arch.startswith('opencliplinear_'):
|
| 34 |
+
from .openclipnet import OpenClipLinear
|
| 35 |
+
model = OpenClipLinear(num_classes=num_classes, pretrain=name_arch[15:], normalize=True)
|
| 36 |
+
elif name_arch.startswith('opencliplinearnext_'):
|
| 37 |
+
from .openclipnet import OpenClipLinear
|
| 38 |
+
model = OpenClipLinear(num_classes=num_classes, pretrain=name_arch[19:], normalize=True, next_to_last=True)
|
| 39 |
+
else:
|
| 40 |
+
assert False
|
| 41 |
+
return model
|
| 42 |
+
|
| 43 |
+
def count_parameters(model):
|
| 44 |
+
return sum(p.numel() for p in model.parameters() if p.requires_grad)
|
| 45 |
+
|
| 46 |
+
def load_weights(model, model_path):
|
| 47 |
+
from torch import load
|
| 48 |
+
dat = load(model_path, map_location='cpu')
|
| 49 |
+
if 'model' in dat:
|
| 50 |
+
if ('module._conv_stem.weight' in dat['model']) or \
|
| 51 |
+
('module.fc.fc1.weight' in dat['model']) or \
|
| 52 |
+
('module.fc.weight' in dat['model']):
|
| 53 |
+
model.load_state_dict(
|
| 54 |
+
{key[7:]: dat['model'][key] for key in dat['model']})
|
| 55 |
+
else:
|
| 56 |
+
model.load_state_dict(dat['model'])
|
| 57 |
+
elif 'state_dict' in dat:
|
| 58 |
+
model.load_state_dict(dat['state_dict'])
|
| 59 |
+
elif 'net' in dat:
|
| 60 |
+
model.load_state_dict(dat['net'])
|
| 61 |
+
elif 'main.0.weight' in dat:
|
| 62 |
+
model.load_state_dict(dat)
|
| 63 |
+
elif '_fc.weight' in dat:
|
| 64 |
+
model.load_state_dict(dat)
|
| 65 |
+
elif 'conv1.weight' in dat:
|
| 66 |
+
model.load_state_dict(dat)
|
| 67 |
+
else:
|
| 68 |
+
print(list(dat.keys()))
|
| 69 |
+
assert False
|
| 70 |
+
return model
|
detectors/CLIP-D/networks/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (2.96 kB). View file
|
|
|
detectors/CLIP-D/networks/__pycache__/openclipnet.cpython-310.pyc
ADDED
|
Binary file (3.81 kB). View file
|
|
|
detectors/CLIP-D/networks/__pycache__/resnet_mod.cpython-310.pyc
ADDED
|
Binary file (9.57 kB). View file
|
|
|
detectors/CLIP-D/networks/openclipnet.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
'''
|
| 2 |
+
Copyright 2024 Image Processing Research Group of University Federico
|
| 3 |
+
II of Naples ('GRIP-UNINA'). All rights reserved.
|
| 4 |
+
|
| 5 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 6 |
+
you may not use this file except in compliance with the License.
|
| 7 |
+
You may obtain a copy of the License at
|
| 8 |
+
|
| 9 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
|
| 11 |
+
Unless required by applicable law or agreed to in writing, software
|
| 12 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
| 13 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 14 |
+
See the License for the specific language governing permissions and
|
| 15 |
+
limitations under the License.
|
| 16 |
+
'''
|
| 17 |
+
|
| 18 |
+
import torch
|
| 19 |
+
import torch.nn as nn
|
| 20 |
+
import torch.nn.functional as F
|
| 21 |
+
import open_clip
|
| 22 |
+
from .resnet_mod import ChannelLinear
|
| 23 |
+
|
| 24 |
+
dict_pretrain = {
|
| 25 |
+
'clipL14openai' : ('ViT-L-14', 'openai'),
|
| 26 |
+
'clipL14laion400m' : ('ViT-L-14', 'laion400m_e32'),
|
| 27 |
+
'clipL14laion2B' : ('ViT-L-14', 'laion2b_s32b_b82k'),
|
| 28 |
+
'clipL14datacomp' : ('ViT-L-14', 'laion/CLIP-ViT-L-14-DataComp.XL-s13B-b90K', 'open_clip_pytorch_model.bin'),
|
| 29 |
+
'clipL14commonpool' : ('ViT-L-14', "laion/CLIP-ViT-L-14-CommonPool.XL-s13B-b90K", 'open_clip_pytorch_model.bin'),
|
| 30 |
+
'clipaL14datacomp' : ('ViT-L-14-CLIPA', 'datacomp1b'),
|
| 31 |
+
'cocaL14laion2B' : ('coca_ViT-L-14', 'laion2b_s13b_b90k'),
|
| 32 |
+
'clipg14laion2B' : ('ViT-g-14', 'laion2b_s34b_b88k'),
|
| 33 |
+
'eva2L14merged2b' : ('EVA02-L-14', 'merged2b_s4b_b131k'),
|
| 34 |
+
'clipB16laion2B' : ('ViT-B-16', 'laion2b_s34b_b88k'),
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
class OpenClipLinear(nn.Module):
|
| 39 |
+
def __init__(self, num_classes=1, pretrain='clipL14commonpool', normalize=True, next_to_last=False):
|
| 40 |
+
super(OpenClipLinear, self).__init__()
|
| 41 |
+
|
| 42 |
+
# Modified to handle download failures gracefully
|
| 43 |
+
# The checkpoint only contains fc weights, so we need the pretrained backbone
|
| 44 |
+
if len(dict_pretrain[pretrain])==2:
|
| 45 |
+
try:
|
| 46 |
+
backbone = open_clip.create_model(dict_pretrain[pretrain][0], pretrained=dict_pretrain[pretrain][1])
|
| 47 |
+
except Exception as e:
|
| 48 |
+
print(f"WARNING: Could not download pretrained weights ({e}). Using random initialization.")
|
| 49 |
+
backbone = open_clip.create_model(dict_pretrain[pretrain][0], pretrained=None)
|
| 50 |
+
else:
|
| 51 |
+
try:
|
| 52 |
+
from huggingface_hub import hf_hub_download
|
| 53 |
+
backbone = open_clip.create_model(dict_pretrain[pretrain][0], pretrained=hf_hub_download(*dict_pretrain[pretrain][1:]))
|
| 54 |
+
except Exception as e:
|
| 55 |
+
print(f"WARNING: Could not download pretrained weights ({e}). Using random initialization.")
|
| 56 |
+
backbone = open_clip.create_model(dict_pretrain[pretrain][0], pretrained=None)
|
| 57 |
+
|
| 58 |
+
if next_to_last:
|
| 59 |
+
self.num_features = backbone.visual.proj.shape[0]
|
| 60 |
+
backbone.visual.proj = None
|
| 61 |
+
else:
|
| 62 |
+
self.num_features = backbone.visual.output_dim
|
| 63 |
+
|
| 64 |
+
self.bb = [backbone, ]
|
| 65 |
+
self.normalize = normalize
|
| 66 |
+
|
| 67 |
+
self.fc = ChannelLinear(self.num_features, num_classes)
|
| 68 |
+
torch.nn.init.normal_(self.fc.weight.data, 0.0, 0.02)
|
| 69 |
+
|
| 70 |
+
def to(self, *args, **kwargs):
|
| 71 |
+
self.bb[0].to(*args, **kwargs)
|
| 72 |
+
super(OpenClipLinear, self).to(*args, **kwargs)
|
| 73 |
+
return self
|
| 74 |
+
|
| 75 |
+
def forward_features(self, x):
|
| 76 |
+
with torch.no_grad():
|
| 77 |
+
self.bb[0].eval()
|
| 78 |
+
features = self.bb[0].encode_image(x, normalize=self.normalize)
|
| 79 |
+
return features
|
| 80 |
+
|
| 81 |
+
def forward_head(self, x):
|
| 82 |
+
return self.fc(x)
|
| 83 |
+
|
| 84 |
+
def forward(self, x):
|
| 85 |
+
return self.forward_head(self.forward_features(x))
|
detectors/CLIP-D/networks/resnet_mod.py
ADDED
|
@@ -0,0 +1,335 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
'''
|
| 2 |
+
Copyright 2024 Image Processing Research Group of University Federico
|
| 3 |
+
II of Naples ('GRIP-UNINA'). All rights reserved.
|
| 4 |
+
|
| 5 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 6 |
+
you may not use this file except in compliance with the License.
|
| 7 |
+
You may obtain a copy of the License at
|
| 8 |
+
|
| 9 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
|
| 11 |
+
Unless required by applicable law or agreed to in writing, software
|
| 12 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
| 13 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 14 |
+
See the License for the specific language governing permissions and
|
| 15 |
+
limitations under the License.
|
| 16 |
+
'''
|
| 17 |
+
|
| 18 |
+
import torch
|
| 19 |
+
import torch.nn as nn
|
| 20 |
+
import torch.utils.model_zoo as model_zoo
|
| 21 |
+
|
| 22 |
+
__all__ = ["ResNet", "resnet18", "resnet34", "resnet50", "resnet101", "resnet152"]
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
model_urls = {
|
| 26 |
+
"resnet18": "https://download.pytorch.org/models/resnet18-5c106cde.pth",
|
| 27 |
+
"resnet34": "https://download.pytorch.org/models/resnet34-333f7ec4.pth",
|
| 28 |
+
"resnet50": "https://download.pytorch.org/models/resnet50-19c8e357.pth",
|
| 29 |
+
"resnet101": "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth",
|
| 30 |
+
"resnet152": "https://download.pytorch.org/models/resnet152-b121ed2d.pth",
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
class ChannelLinear(nn.Linear):
|
| 34 |
+
def __init__(
|
| 35 |
+
self, in_features: int, out_features: int, bias: bool = True, pool=None
|
| 36 |
+
) -> None:
|
| 37 |
+
super(ChannelLinear, self).__init__(in_features, out_features, bias)
|
| 38 |
+
self.compute_axis = 1
|
| 39 |
+
self.pool = pool
|
| 40 |
+
|
| 41 |
+
def forward(self, x):
|
| 42 |
+
axis_ref = len(x.shape) - 1
|
| 43 |
+
x = torch.transpose(x, self.compute_axis, axis_ref)
|
| 44 |
+
out_shape = list(x.shape)
|
| 45 |
+
out_shape[-1] = self.out_features
|
| 46 |
+
x = x.reshape(-1, x.shape[-1])
|
| 47 |
+
x = x.matmul(self.weight.t())
|
| 48 |
+
if self.bias is not None:
|
| 49 |
+
x = x + self.bias[None, :]
|
| 50 |
+
x = torch.transpose(x.view(out_shape), axis_ref, self.compute_axis)
|
| 51 |
+
if self.pool is not None:
|
| 52 |
+
x = self.pool(x)
|
| 53 |
+
return x
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def conv3x3(in_planes, out_planes, stride=1, padding=1):
|
| 57 |
+
"""3x3 convolution with padding"""
|
| 58 |
+
return nn.Conv2d(
|
| 59 |
+
in_planes, out_planes, kernel_size=3, stride=stride, padding=padding, bias=False
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def conv1x1(in_planes, out_planes, stride=1):
|
| 64 |
+
"""1x1 convolution"""
|
| 65 |
+
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
class BasicBlock(nn.Module):
|
| 69 |
+
expansion = 1
|
| 70 |
+
|
| 71 |
+
def __init__(self, inplanes, planes, stride=1, padding=1, downsample=None):
|
| 72 |
+
super(BasicBlock, self).__init__()
|
| 73 |
+
self.conv1 = conv3x3(inplanes, planes, stride, padding=padding)
|
| 74 |
+
self.bn1 = nn.BatchNorm2d(planes)
|
| 75 |
+
self.relu = nn.ReLU(inplace=True)
|
| 76 |
+
self.conv2 = conv3x3(planes, planes, padding=padding)
|
| 77 |
+
self.bn2 = nn.BatchNorm2d(planes)
|
| 78 |
+
self.downsample = downsample
|
| 79 |
+
self.stride = stride
|
| 80 |
+
self.padding = padding
|
| 81 |
+
|
| 82 |
+
def forward(self, x):
|
| 83 |
+
identity = x
|
| 84 |
+
|
| 85 |
+
out = self.conv1(x)
|
| 86 |
+
out = self.bn1(out)
|
| 87 |
+
out = self.relu(out)
|
| 88 |
+
|
| 89 |
+
out = self.conv2(out)
|
| 90 |
+
out = self.bn2(out)
|
| 91 |
+
|
| 92 |
+
if self.padding == 0:
|
| 93 |
+
identity = identity[..., 1:-1, 1:-1]
|
| 94 |
+
if self.downsample is not None:
|
| 95 |
+
identity = self.downsample(identity)
|
| 96 |
+
if self.padding == 0:
|
| 97 |
+
identity = identity[..., 1:-1, 1:-1]
|
| 98 |
+
|
| 99 |
+
out += identity
|
| 100 |
+
out = self.relu(out)
|
| 101 |
+
|
| 102 |
+
return out
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
class Bottleneck(nn.Module):
|
| 106 |
+
expansion = 4
|
| 107 |
+
|
| 108 |
+
def __init__(self, inplanes, planes, stride=1, padding=1, downsample=None):
|
| 109 |
+
super(Bottleneck, self).__init__()
|
| 110 |
+
self.conv1 = conv1x1(inplanes, planes)
|
| 111 |
+
self.bn1 = nn.BatchNorm2d(planes)
|
| 112 |
+
self.conv2 = conv3x3(planes, planes, stride, padding=padding)
|
| 113 |
+
self.bn2 = nn.BatchNorm2d(planes)
|
| 114 |
+
self.conv3 = conv1x1(planes, planes * self.expansion)
|
| 115 |
+
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
|
| 116 |
+
self.relu = nn.ReLU(inplace=True)
|
| 117 |
+
self.downsample = downsample
|
| 118 |
+
self.stride = stride
|
| 119 |
+
self.padding = padding
|
| 120 |
+
|
| 121 |
+
def forward(self, x):
|
| 122 |
+
identity = x
|
| 123 |
+
|
| 124 |
+
out = self.conv1(x)
|
| 125 |
+
out = self.bn1(out)
|
| 126 |
+
out = self.relu(out)
|
| 127 |
+
|
| 128 |
+
out = self.conv2(out)
|
| 129 |
+
out = self.bn2(out)
|
| 130 |
+
out = self.relu(out)
|
| 131 |
+
|
| 132 |
+
out = self.conv3(out)
|
| 133 |
+
out = self.bn3(out)
|
| 134 |
+
|
| 135 |
+
if self.padding == 0:
|
| 136 |
+
identity = identity[..., 1:-1, 1:-1]
|
| 137 |
+
if self.downsample is not None:
|
| 138 |
+
identity = self.downsample(identity)
|
| 139 |
+
|
| 140 |
+
out += identity
|
| 141 |
+
out = self.relu(out)
|
| 142 |
+
|
| 143 |
+
return out
|
| 144 |
+
|
| 145 |
+
class ResNet(nn.Module):
|
| 146 |
+
def __init__(
|
| 147 |
+
self,
|
| 148 |
+
block,
|
| 149 |
+
layers,
|
| 150 |
+
num_classes=1000,
|
| 151 |
+
zero_init_residual=False,
|
| 152 |
+
stride0=2,
|
| 153 |
+
padding=1,
|
| 154 |
+
dropout=0.0,
|
| 155 |
+
gap_size=None,
|
| 156 |
+
):
|
| 157 |
+
super(ResNet, self).__init__()
|
| 158 |
+
self.inplanes = 64
|
| 159 |
+
|
| 160 |
+
self.conv1 = nn.Conv2d(
|
| 161 |
+
3, 64, kernel_size=7, stride=stride0, padding=3 * padding, bias=False
|
| 162 |
+
)
|
| 163 |
+
self.bn1 = nn.BatchNorm2d(64)
|
| 164 |
+
if dropout > 0:
|
| 165 |
+
self.dropout = nn.Dropout(dropout)
|
| 166 |
+
else:
|
| 167 |
+
self.dropout = None
|
| 168 |
+
self.relu = nn.ReLU(inplace=True)
|
| 169 |
+
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=stride0, padding=padding)
|
| 170 |
+
self.layer1 = self._make_layer(block, 64, layers[0], padding=padding)
|
| 171 |
+
self.layer2 = self._make_layer(block, 128, layers[1], stride=2, padding=padding)
|
| 172 |
+
self.layer3 = self._make_layer(block, 256, layers[2], stride=2, padding=padding)
|
| 173 |
+
self.layer4 = self._make_layer(block, 512, layers[3], stride=2, padding=padding)
|
| 174 |
+
|
| 175 |
+
if gap_size is None:
|
| 176 |
+
self.gap_size = None
|
| 177 |
+
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
|
| 178 |
+
elif gap_size < 0:
|
| 179 |
+
with torch.no_grad():
|
| 180 |
+
y = self.forward_features(
|
| 181 |
+
torch.zeros((1, 3, -gap_size, -gap_size), dtype=torch.float32)
|
| 182 |
+
).shape
|
| 183 |
+
print("gap_size:", -gap_size, ">>", y[-1])
|
| 184 |
+
self.gap_size = y[-1]
|
| 185 |
+
self.avgpool = nn.AvgPool2d(kernel_size=self.gap_size, stride=1, padding=0)
|
| 186 |
+
elif gap_size == 1:
|
| 187 |
+
self.gap_size = gap_size
|
| 188 |
+
self.avgpool = None
|
| 189 |
+
else:
|
| 190 |
+
self.gap_size = gap_size
|
| 191 |
+
self.avgpool = nn.AvgPool2d(kernel_size=self.gap_size, stride=1, padding=0)
|
| 192 |
+
self.num_features = 512 * block.expansion
|
| 193 |
+
self.fc = ChannelLinear(self.num_features, num_classes)
|
| 194 |
+
|
| 195 |
+
for m in self.modules():
|
| 196 |
+
if isinstance(m, nn.Conv2d):
|
| 197 |
+
nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
|
| 198 |
+
elif isinstance(m, nn.BatchNorm2d):
|
| 199 |
+
nn.init.constant_(m.weight, 1)
|
| 200 |
+
nn.init.constant_(m.bias, 0)
|
| 201 |
+
|
| 202 |
+
if zero_init_residual:
|
| 203 |
+
for m in self.modules():
|
| 204 |
+
if isinstance(m, Bottleneck):
|
| 205 |
+
nn.init.constant_(m.bn3.weight, 0)
|
| 206 |
+
elif isinstance(m, BasicBlock):
|
| 207 |
+
nn.init.constant_(m.bn2.weight, 0)
|
| 208 |
+
|
| 209 |
+
def _make_layer(self, block, planes, blocks, stride=1, padding=1):
|
| 210 |
+
downsample = None
|
| 211 |
+
if stride != 1 or self.inplanes != planes * block.expansion:
|
| 212 |
+
downsample = nn.Sequential(
|
| 213 |
+
conv1x1(self.inplanes, planes * block.expansion, stride),
|
| 214 |
+
nn.BatchNorm2d(planes * block.expansion),
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
layers = []
|
| 218 |
+
layers.append(
|
| 219 |
+
block(
|
| 220 |
+
self.inplanes,
|
| 221 |
+
planes,
|
| 222 |
+
stride=stride,
|
| 223 |
+
downsample=downsample,
|
| 224 |
+
padding=padding,
|
| 225 |
+
)
|
| 226 |
+
)
|
| 227 |
+
self.inplanes = planes * block.expansion
|
| 228 |
+
for _ in range(1, blocks):
|
| 229 |
+
layers.append(block(self.inplanes, planes, padding=padding))
|
| 230 |
+
|
| 231 |
+
return nn.Sequential(*layers)
|
| 232 |
+
|
| 233 |
+
def change_output(self, num_classes):
|
| 234 |
+
self.fc = ChannelLinear(self.num_features, num_classes)
|
| 235 |
+
torch.nn.init.normal_(self.fc.weight.data, 0.0, 0.02)
|
| 236 |
+
return self
|
| 237 |
+
|
| 238 |
+
def change_input(self, num_inputs):
|
| 239 |
+
data = self.conv1.weight.data
|
| 240 |
+
old_num_inputs = int(data.shape[1])
|
| 241 |
+
if num_inputs > old_num_inputs:
|
| 242 |
+
times = num_inputs // old_num_inputs
|
| 243 |
+
if (times * old_num_inputs) < num_inputs:
|
| 244 |
+
times = times + 1
|
| 245 |
+
data = data.repeat(1, times, 1, 1) / times
|
| 246 |
+
elif num_inputs == old_num_inputs:
|
| 247 |
+
return self
|
| 248 |
+
|
| 249 |
+
data = data[:, :num_inputs, :, :]
|
| 250 |
+
print(self.conv1.weight.data.shape, "->", data.shape)
|
| 251 |
+
self.conv1.weight.data = data
|
| 252 |
+
|
| 253 |
+
return self
|
| 254 |
+
|
| 255 |
+
def forward_features(self, x):
|
| 256 |
+
x = self.conv1(x)
|
| 257 |
+
x = self.bn1(x)
|
| 258 |
+
x = self.relu(x)
|
| 259 |
+
x = self.maxpool(x)
|
| 260 |
+
|
| 261 |
+
x = self.layer1(x)
|
| 262 |
+
x = self.layer2(x)
|
| 263 |
+
x = self.layer3(x)
|
| 264 |
+
x = self.layer4(x)
|
| 265 |
+
return x
|
| 266 |
+
|
| 267 |
+
def forward_head(self, x):
|
| 268 |
+
if self.avgpool is not None:
|
| 269 |
+
x = self.avgpool(x)
|
| 270 |
+
if self.dropout is not None:
|
| 271 |
+
x = self.dropout(x)
|
| 272 |
+
y = self.fc(x)
|
| 273 |
+
if self.gap_size is None:
|
| 274 |
+
y = torch.squeeze(torch.squeeze(y, -1), -1)
|
| 275 |
+
return y
|
| 276 |
+
|
| 277 |
+
def forward(self, x):
|
| 278 |
+
x = self.forward_features(x)
|
| 279 |
+
x = self.forward_head(x)
|
| 280 |
+
return x
|
| 281 |
+
|
| 282 |
+
|
| 283 |
+
def resnet18(pretrained=False, **kwargs):
|
| 284 |
+
"""Constructs a ResNet-18 model.
|
| 285 |
+
Args:
|
| 286 |
+
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
| 287 |
+
"""
|
| 288 |
+
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
|
| 289 |
+
if pretrained:
|
| 290 |
+
model.load_state_dict(model_zoo.load_url(model_urls["resnet18"]))
|
| 291 |
+
return model
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
def resnet34(pretrained=False, **kwargs):
|
| 295 |
+
"""Constructs a ResNet-34 model.
|
| 296 |
+
Args:
|
| 297 |
+
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
| 298 |
+
"""
|
| 299 |
+
model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
|
| 300 |
+
if pretrained:
|
| 301 |
+
model.load_state_dict(model_zoo.load_url(model_urls["resnet34"]))
|
| 302 |
+
return model
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
def resnet50(pretrained=False, **kwargs):
|
| 306 |
+
"""Constructs a ResNet-50 model.
|
| 307 |
+
Args:
|
| 308 |
+
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
| 309 |
+
"""
|
| 310 |
+
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
|
| 311 |
+
if pretrained:
|
| 312 |
+
model.load_state_dict(model_zoo.load_url(model_urls["resnet50"]))
|
| 313 |
+
return model
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
def resnet101(pretrained=False, **kwargs):
|
| 317 |
+
"""Constructs a ResNet-101 model.
|
| 318 |
+
Args:
|
| 319 |
+
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
| 320 |
+
"""
|
| 321 |
+
model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
|
| 322 |
+
if pretrained:
|
| 323 |
+
model.load_state_dict(model_zoo.load_url(model_urls["resnet101"]))
|
| 324 |
+
return model
|
| 325 |
+
|
| 326 |
+
|
| 327 |
+
def resnet152(pretrained=False, **kwargs):
|
| 328 |
+
"""Constructs a ResNet-152 model.
|
| 329 |
+
Args:
|
| 330 |
+
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
| 331 |
+
"""
|
| 332 |
+
model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
|
| 333 |
+
if pretrained:
|
| 334 |
+
model.load_state_dict(model_zoo.load_url(model_urls["resnet152"]))
|
| 335 |
+
return model
|
detectors/CLIP-D/parser.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
|
| 3 |
+
def get_parser():
|
| 4 |
+
parser = argparse.ArgumentParser()
|
| 5 |
+
parser.add_argument("--name", type=str, default="test", help="run name")
|
| 6 |
+
parser.add_argument("--arch", type=str, default="opencliplinearnext_clipL14commonpresool", help="architecture name")
|
| 7 |
+
|
| 8 |
+
parser.add_argument("--task", type=str, help="Task: train/test")
|
| 9 |
+
parser.add_argument("--device", type=str, default="cuda:0", help="cuda device to use")
|
| 10 |
+
|
| 11 |
+
parser.add_argument("--split_file", type=str, help="Path to split json")
|
| 12 |
+
parser.add_argument("--data_root", type=str, help="Path to dataset")
|
| 13 |
+
parser.add_argument("--data_keys", type=str, help="Dataset specifications")
|
| 14 |
+
|
| 15 |
+
parser.add_argument("--batch_size", type=int, default=64, help='Dataloader batch size')
|
| 16 |
+
parser.add_argument("--num_threads", type=int, default=14, help='# threads for loading data')
|
| 17 |
+
|
| 18 |
+
parser.add_argument("--lr", type=float, default=0.0001, help="initial learning rate")
|
| 19 |
+
parser.add_argument("--weight_decay", type=float, default=0.0, help="weight decay")
|
| 20 |
+
parser.add_argument("--beta1", type=float, default=0.9, help="momentum term of adam")
|
| 21 |
+
|
| 22 |
+
parser.add_argument("--num_epoches", type=int, default=1000, help="# of epoches at starting learning rate")
|
| 23 |
+
parser.add_argument("--earlystop_epoch", type=int, default=5, help="Number of epochs without loss reduction before lowering the learning rate")
|
| 24 |
+
|
| 25 |
+
return parser
|
detectors/CLIP-D/test.py
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from tqdm import tqdm
|
| 3 |
+
import torch
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import json
|
| 6 |
+
import time
|
| 7 |
+
import numpy as np
|
| 8 |
+
from sklearn.metrics import roc_auc_score, accuracy_score
|
| 9 |
+
from networks import create_architecture, count_parameters
|
| 10 |
+
from utils.dataset import create_dataloader
|
| 11 |
+
from utils.processing import add_processing_arguments
|
| 12 |
+
from parser import get_parser
|
| 13 |
+
|
| 14 |
+
def test(loader, model, settings, device):
|
| 15 |
+
model.eval()
|
| 16 |
+
|
| 17 |
+
start_time = time.time()
|
| 18 |
+
|
| 19 |
+
# File paths
|
| 20 |
+
output_dir = f'./results/{settings.name}/data/{settings.data_keys}'
|
| 21 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 22 |
+
|
| 23 |
+
csv_filename = os.path.join(output_dir, 'results.csv')
|
| 24 |
+
metrics_filename = os.path.join(output_dir, 'metrics.json')
|
| 25 |
+
image_results_filename = os.path.join(output_dir, 'image_results.json')
|
| 26 |
+
|
| 27 |
+
# Collect all results
|
| 28 |
+
all_scores = []
|
| 29 |
+
all_labels = []
|
| 30 |
+
all_paths = []
|
| 31 |
+
image_results = []
|
| 32 |
+
|
| 33 |
+
# Extract training dataset keys from model name (format: "training_keys_freeze_down" or "training_keys")
|
| 34 |
+
training_dataset_keys = []
|
| 35 |
+
model_name = settings.name
|
| 36 |
+
if '_freeze_down' in model_name:
|
| 37 |
+
training_name = model_name.replace('_freeze_down', '')
|
| 38 |
+
else:
|
| 39 |
+
training_name = model_name
|
| 40 |
+
if '&' in training_name:
|
| 41 |
+
training_dataset_keys = training_name.split('&')
|
| 42 |
+
else:
|
| 43 |
+
training_dataset_keys = [training_name]
|
| 44 |
+
|
| 45 |
+
# Write CSV header
|
| 46 |
+
with open(csv_filename, 'w') as f:
|
| 47 |
+
f.write(f"{','.join(['name', 'pro', 'flag'])}\n")
|
| 48 |
+
|
| 49 |
+
with torch.no_grad():
|
| 50 |
+
with tqdm(loader, unit='batch', mininterval=0.5) as tbatch:
|
| 51 |
+
tbatch.set_description(f'Validation')
|
| 52 |
+
for data_dict in tbatch:
|
| 53 |
+
data = data_dict['img'].to(device)
|
| 54 |
+
labels = data_dict['target'].to(device)
|
| 55 |
+
paths = data_dict['path']
|
| 56 |
+
|
| 57 |
+
scores = model(data).squeeze(1)
|
| 58 |
+
|
| 59 |
+
# Collect results
|
| 60 |
+
for score, label, path in zip(scores, labels, paths):
|
| 61 |
+
score_val = score.item()
|
| 62 |
+
label_val = label.item()
|
| 63 |
+
|
| 64 |
+
all_scores.append(score_val)
|
| 65 |
+
all_labels.append(label_val)
|
| 66 |
+
all_paths.append(path)
|
| 67 |
+
|
| 68 |
+
image_results.append({
|
| 69 |
+
'path': path,
|
| 70 |
+
'score': score_val,
|
| 71 |
+
'label': label_val
|
| 72 |
+
})
|
| 73 |
+
|
| 74 |
+
# Write to CSV (maintain backward compatibility)
|
| 75 |
+
with open(csv_filename, 'a') as f:
|
| 76 |
+
for score, label, path in zip(scores, labels, paths):
|
| 77 |
+
f.write(f"{path}, {score.item()}, {label.item()}\n")
|
| 78 |
+
|
| 79 |
+
# Calculate metrics
|
| 80 |
+
all_scores = np.array(all_scores)
|
| 81 |
+
all_labels = np.array(all_labels)
|
| 82 |
+
|
| 83 |
+
# Convert scores to predictions (threshold at 0, as used in train.py: y_pred > 0.0)
|
| 84 |
+
predictions = (all_scores > 0).astype(int)
|
| 85 |
+
|
| 86 |
+
# Calculate overall metrics
|
| 87 |
+
total_accuracy = accuracy_score(all_labels, predictions)
|
| 88 |
+
|
| 89 |
+
# TPR (True Positive Rate) = TP / (TP + FN) = accuracy on fake images (label==1)
|
| 90 |
+
fake_mask = all_labels == 1
|
| 91 |
+
if fake_mask.sum() > 0:
|
| 92 |
+
tpr = accuracy_score(all_labels[fake_mask], predictions[fake_mask])
|
| 93 |
+
else:
|
| 94 |
+
tpr = 0.0
|
| 95 |
+
|
| 96 |
+
# TNR per dataset key (True Negative Rate) = TN / (TN + FP) = accuracy on real images (label==0)
|
| 97 |
+
tnr_per_dataset = {}
|
| 98 |
+
|
| 99 |
+
# Calculate TNR on real images (label==0) in the test set
|
| 100 |
+
real_mask = all_labels == 0
|
| 101 |
+
if real_mask.sum() > 0:
|
| 102 |
+
# Overall TNR calculated on all real images in the test set
|
| 103 |
+
tnr = accuracy_score(all_labels[real_mask], predictions[real_mask])
|
| 104 |
+
else:
|
| 105 |
+
tnr = 0.0
|
| 106 |
+
|
| 107 |
+
# Map TNR to training dataset keys (as shown in the example JSON structure)
|
| 108 |
+
# The TNR is calculated on the test set, but organized by training dataset keys
|
| 109 |
+
#for training_key in training_dataset_keys:
|
| 110 |
+
# tnr_per_dataset[training_key] = overall_tnr
|
| 111 |
+
|
| 112 |
+
# AUC calculation (needs probabilities, so we'll use sigmoid on scores)
|
| 113 |
+
if len(np.unique(all_labels)) > 1: # Need both classes for AUC
|
| 114 |
+
# Apply sigmoid to convert scores to probabilities
|
| 115 |
+
probabilities = torch.sigmoid(torch.tensor(all_scores)).numpy()
|
| 116 |
+
auc = roc_auc_score(all_labels, probabilities)
|
| 117 |
+
else:
|
| 118 |
+
auc = 0.0
|
| 119 |
+
|
| 120 |
+
execution_time = time.time() - start_time
|
| 121 |
+
|
| 122 |
+
# Prepare metrics JSON
|
| 123 |
+
metrics = {
|
| 124 |
+
'TPR': float(tpr),
|
| 125 |
+
'TNR': float(tnr),
|
| 126 |
+
'Acc total': float(total_accuracy),
|
| 127 |
+
'AUC': float(auc),
|
| 128 |
+
'execution time': float(execution_time)
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
# Write metrics JSON
|
| 132 |
+
with open(metrics_filename, 'w') as f:
|
| 133 |
+
json.dump(metrics, f, indent=2)
|
| 134 |
+
|
| 135 |
+
# Write individual image results JSON
|
| 136 |
+
with open(image_results_filename, 'w') as f:
|
| 137 |
+
json.dump(image_results, f, indent=2)
|
| 138 |
+
|
| 139 |
+
print(f'\nMetrics saved to {metrics_filename}')
|
| 140 |
+
print(f'Image results saved to {image_results_filename}')
|
| 141 |
+
print(f'\nMetrics:')
|
| 142 |
+
print(f' TPR: {tpr:.4f}')
|
| 143 |
+
print(f' TNR: {tnr:.4f}')
|
| 144 |
+
print(f' Accuracy: {total_accuracy:.4f}')
|
| 145 |
+
print(f' AUC: {auc:.4f}')
|
| 146 |
+
print(f' Execution time: {execution_time:.2f} seconds')
|
| 147 |
+
|
| 148 |
+
if __name__ == '__main__':
|
| 149 |
+
parser = get_parser()
|
| 150 |
+
parser = add_processing_arguments(parser)
|
| 151 |
+
settings = parser.parse_args()
|
| 152 |
+
|
| 153 |
+
device = torch.device(settings.device if torch.cuda.is_available() else 'cpu')
|
| 154 |
+
|
| 155 |
+
test_dataloader = create_dataloader(settings, split='test')
|
| 156 |
+
|
| 157 |
+
model = create_architecture(settings.arch, pretrained=True, num_classes=1).to(device)
|
| 158 |
+
num_parameters = count_parameters(model)
|
| 159 |
+
print(f"Arch: {settings.arch} with #parameters {num_parameters}")
|
| 160 |
+
|
| 161 |
+
load_path = f'./checkpoint/{settings.name}/weights/best.pt'
|
| 162 |
+
|
| 163 |
+
print('loading the model from %s' % load_path)
|
| 164 |
+
model.load_state_dict(torch.load(load_path, map_location=device)['model'])
|
| 165 |
+
model.to(device)
|
| 166 |
+
|
| 167 |
+
test(test_dataloader, model, settings, device)
|
detectors/CLIP-D/train.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import tqdm
|
| 3 |
+
from utils import TrainingModel, create_dataloader, EarlyStopping
|
| 4 |
+
from sklearn.metrics import balanced_accuracy_score, roc_auc_score
|
| 5 |
+
from utils.processing import add_processing_arguments
|
| 6 |
+
from parser import get_parser
|
| 7 |
+
|
| 8 |
+
if __name__ == "__main__":
|
| 9 |
+
parser = get_parser()
|
| 10 |
+
parser = add_processing_arguments(parser)
|
| 11 |
+
|
| 12 |
+
opt = parser.parse_args()
|
| 13 |
+
|
| 14 |
+
os.makedirs(os.path.join('checkpoint', opt.name,'weights'), exist_ok=True)
|
| 15 |
+
|
| 16 |
+
valid_data_loader = create_dataloader(opt, split="val")
|
| 17 |
+
train_data_loader = create_dataloader(opt, split="train")
|
| 18 |
+
print()
|
| 19 |
+
print("# validation batches = %d" % len(valid_data_loader))
|
| 20 |
+
print("# training batches = %d" % len(train_data_loader))
|
| 21 |
+
model = TrainingModel(opt)
|
| 22 |
+
early_stopping = None
|
| 23 |
+
start_epoch = model.total_steps // len(train_data_loader)
|
| 24 |
+
print()
|
| 25 |
+
|
| 26 |
+
for epoch in range(start_epoch, opt.num_epoches+1):
|
| 27 |
+
if epoch > start_epoch:
|
| 28 |
+
# Training
|
| 29 |
+
pbar = tqdm.tqdm(train_data_loader)
|
| 30 |
+
for data in pbar:
|
| 31 |
+
loss = model.train_on_batch(data).item()
|
| 32 |
+
total_steps = model.total_steps
|
| 33 |
+
pbar.set_description(f"Train loss: {loss:.4f}")
|
| 34 |
+
|
| 35 |
+
# Save model
|
| 36 |
+
model.save_networks(epoch)
|
| 37 |
+
|
| 38 |
+
# Validation
|
| 39 |
+
print("Validation ...", flush=True)
|
| 40 |
+
y_true, y_pred, y_path = model.predict(valid_data_loader)
|
| 41 |
+
acc = balanced_accuracy_score(y_true, y_pred > 0.0)
|
| 42 |
+
auc = roc_auc_score(y_true, y_pred)
|
| 43 |
+
lr = model.get_learning_rate()
|
| 44 |
+
print("After {} epoches: val acc = {}; val auc = {}".format(epoch, acc, auc), flush=True)
|
| 45 |
+
|
| 46 |
+
# Early Stopping
|
| 47 |
+
if early_stopping is None:
|
| 48 |
+
early_stopping = EarlyStopping(
|
| 49 |
+
init_score=acc, patience=opt.earlystop_epoch,
|
| 50 |
+
delta=0.001, verbose=True,
|
| 51 |
+
)
|
| 52 |
+
print('Save best model', flush=True)
|
| 53 |
+
model.save_networks('best')
|
| 54 |
+
else:
|
| 55 |
+
if early_stopping(acc):
|
| 56 |
+
print('Save best model', flush=True)
|
| 57 |
+
model.save_networks('best')
|
| 58 |
+
if early_stopping.early_stop:
|
| 59 |
+
cont_train = model.adjust_learning_rate()
|
| 60 |
+
if cont_train:
|
| 61 |
+
print("Learning rate dropped by 10, continue training ...", flush=True)
|
| 62 |
+
early_stopping.reset_counter()
|
| 63 |
+
else:
|
| 64 |
+
print("Early stopping.", flush=True)
|
| 65 |
+
break
|
detectors/CLIP-D/utils/__init__.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
'''
|
| 2 |
+
Copyright 2024 Image Processing Research Group of University Federico
|
| 3 |
+
II of Naples ('GRIP-UNINA'). All rights reserved.
|
| 4 |
+
|
| 5 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 6 |
+
you may not use this file except in compliance with the License.
|
| 7 |
+
You may obtain a copy of the License at
|
| 8 |
+
|
| 9 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
|
| 11 |
+
Unless required by applicable law or agreed to in writing, software
|
| 12 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
| 13 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 14 |
+
See the License for the specific language governing permissions and
|
| 15 |
+
limitations under the License.
|
| 16 |
+
'''
|
| 17 |
+
|
| 18 |
+
from .dataset import create_dataloader
|
| 19 |
+
from .training import TrainingModel
|
| 20 |
+
class EarlyStopping:
|
| 21 |
+
def __init__(self, init_score=None, patience=1, verbose=False, delta=0):
|
| 22 |
+
self.best_score = init_score
|
| 23 |
+
self.patience = patience
|
| 24 |
+
self.delta = delta
|
| 25 |
+
self.verbose = verbose
|
| 26 |
+
self.count_down = self.patience
|
| 27 |
+
self.early_stop = False
|
| 28 |
+
|
| 29 |
+
def __call__(self, score):
|
| 30 |
+
if self.best_score is None:
|
| 31 |
+
if self.verbose:
|
| 32 |
+
print(f'Score set to {score:.6f}.')
|
| 33 |
+
self.best_score = score
|
| 34 |
+
self.count_down = self.patience
|
| 35 |
+
return True
|
| 36 |
+
elif score <= self.best_score + self.delta:
|
| 37 |
+
self.count_down -= 1
|
| 38 |
+
if self.verbose:
|
| 39 |
+
print(f'EarlyStopping count_down: {self.count_down} on {self.patience}')
|
| 40 |
+
if self.count_down <= 0:
|
| 41 |
+
self.early_stop = True
|
| 42 |
+
return False
|
| 43 |
+
else:
|
| 44 |
+
if self.verbose:
|
| 45 |
+
print(f'Score increased from ({self.best_score:.6f} to {score:.6f}).')
|
| 46 |
+
self.best_score = score
|
| 47 |
+
self.count_down = self.patience
|
| 48 |
+
return True
|
| 49 |
+
|
| 50 |
+
def reset_counter(self):
|
| 51 |
+
self.count_down = self.patience
|
| 52 |
+
self.early_stop = False
|
detectors/CLIP-D/utils/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (2.26 kB). View file
|
|
|
detectors/CLIP-D/utils/__pycache__/dataset.cpython-310.pyc
ADDED
|
Binary file (5.22 kB). View file
|
|
|
detectors/CLIP-D/utils/__pycache__/processing.cpython-310.pyc
ADDED
|
Binary file (3.5 kB). View file
|
|
|
detectors/CLIP-D/utils/__pycache__/training.cpython-310.pyc
ADDED
|
Binary file (4.21 kB). View file
|
|
|
detectors/CLIP-D/utils/dataset.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
'''
|
| 2 |
+
Copyright 2024 Image Processing Research Group of University Federico
|
| 3 |
+
II of Naples ('GRIP-UNINA'). All rights reserved.
|
| 4 |
+
|
| 5 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 6 |
+
you may not use this file except in compliance with the License.
|
| 7 |
+
You may obtain a copy of the License at
|
| 8 |
+
|
| 9 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
|
| 11 |
+
Unless required by applicable law or agreed to in writing, software
|
| 12 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
| 13 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 14 |
+
See the License for the specific language governing permissions and
|
| 15 |
+
limitations under the License.
|
| 16 |
+
'''
|
| 17 |
+
|
| 18 |
+
import os
|
| 19 |
+
import json
|
| 20 |
+
import torch
|
| 21 |
+
import bisect
|
| 22 |
+
import numpy as np
|
| 23 |
+
from torch.utils.data.sampler import WeightedRandomSampler, RandomSampler
|
| 24 |
+
from torchvision import datasets
|
| 25 |
+
from .processing import make_processing
|
| 26 |
+
|
| 27 |
+
from PIL import Image, ImageFile
|
| 28 |
+
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
| 29 |
+
|
| 30 |
+
def create_dataloader(opt, split=None):
|
| 31 |
+
if split == "train":
|
| 32 |
+
opt.split = 'train'
|
| 33 |
+
is_train=True
|
| 34 |
+
|
| 35 |
+
elif split == "val":
|
| 36 |
+
opt.split = 'val'
|
| 37 |
+
is_train=False
|
| 38 |
+
|
| 39 |
+
elif split == "test":
|
| 40 |
+
opt.split = 'test'
|
| 41 |
+
is_train=False
|
| 42 |
+
|
| 43 |
+
else:
|
| 44 |
+
raise ValueError(f"Unknown split {split}")
|
| 45 |
+
|
| 46 |
+
dataset = TrueFake_dataset(opt)
|
| 47 |
+
|
| 48 |
+
data_loader = torch.utils.data.DataLoader(
|
| 49 |
+
dataset,
|
| 50 |
+
batch_size=opt.batch_size,
|
| 51 |
+
shuffle=is_train,
|
| 52 |
+
num_workers=int(opt.num_threads),
|
| 53 |
+
)
|
| 54 |
+
return data_loader
|
| 55 |
+
|
| 56 |
+
def parse_dataset(settings):
|
| 57 |
+
gen_keys = {
|
| 58 |
+
'gan1':['StyleGAN'],
|
| 59 |
+
'gan2':['StyleGAN2'],
|
| 60 |
+
'gan3':['StyleGAN3'],
|
| 61 |
+
'sd15':['StableDiffusion1.5'],
|
| 62 |
+
'sd2':['StableDiffusion2'],
|
| 63 |
+
'sd3':['StableDiffusion3'],
|
| 64 |
+
'sdXL':['StableDiffusionXL'],
|
| 65 |
+
'flux':['FLUX.1'],
|
| 66 |
+
'realFFHQ':['FFHQ'],
|
| 67 |
+
'realFORLAB':['FORLAB']
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
gen_keys['all'] = [gen_keys[key][0] for key in gen_keys.keys()]
|
| 71 |
+
# gen_keys['gan'] = [gen_keys[key][0] for key in gen_keys.keys() if 'gan' in key]
|
| 72 |
+
# gen_keys['sd'] = [gen_keys[key][0] for key in gen_keys.keys() if 'sd' in key]
|
| 73 |
+
gen_keys['real'] = [gen_keys[key][0] for key in gen_keys.keys() if 'real' in key]
|
| 74 |
+
|
| 75 |
+
mod_keys = {
|
| 76 |
+
'pre': ['PreSocial'],
|
| 77 |
+
'fb': ['Facebook'],
|
| 78 |
+
'tl': ['Telegram'],
|
| 79 |
+
'tw': ['X'],
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
mod_keys['all'] = [mod_keys[key][0] for key in mod_keys.keys()]
|
| 83 |
+
mod_keys['shr'] = [mod_keys[key][0] for key in mod_keys.keys() if key in ['fb', 'tl', 'tw']]
|
| 84 |
+
|
| 85 |
+
need_real = (settings.split in ['train', 'val'] and not len([data for data in settings.data_keys.split('&') if 'real' in data.split(':')[0]]))
|
| 86 |
+
|
| 87 |
+
assert not need_real, 'Train task without real data, this will not get handeled automatically, terminating'
|
| 88 |
+
|
| 89 |
+
dataset_list = []
|
| 90 |
+
for data in settings.data_keys.split('&'):
|
| 91 |
+
gen, mod = data.split(':')
|
| 92 |
+
dataset_list.append({'gen':gen_keys[gen], 'mod':mod_keys[mod]})
|
| 93 |
+
|
| 94 |
+
return dataset_list
|
| 95 |
+
|
| 96 |
+
class TrueFake_dataset(datasets.DatasetFolder):
|
| 97 |
+
def __init__(self, settings):
|
| 98 |
+
self.data_root = settings.data_root
|
| 99 |
+
self.split = settings.split
|
| 100 |
+
|
| 101 |
+
with open(settings.split_file, "r") as f:
|
| 102 |
+
split_list = sorted(json.load(f)[self.split])
|
| 103 |
+
|
| 104 |
+
dataset_list = parse_dataset(settings)
|
| 105 |
+
|
| 106 |
+
self.samples = []
|
| 107 |
+
self.info = []
|
| 108 |
+
for dict in dataset_list:
|
| 109 |
+
generators = dict['gen']
|
| 110 |
+
modifiers = dict['mod']
|
| 111 |
+
|
| 112 |
+
for mod in modifiers:
|
| 113 |
+
for dataset_root, dataset_dirs, dataset_files in os.walk(os.path.join(self.data_root, mod), topdown=True, followlinks=True):
|
| 114 |
+
if len(dataset_dirs):
|
| 115 |
+
continue
|
| 116 |
+
(label, gen, sub) = f'{dataset_root}/'.replace(os.path.join(self.data_root, mod) + os.sep, '').split(os.sep)[:3][:3]
|
| 117 |
+
|
| 118 |
+
if gen in generators:
|
| 119 |
+
for filename in sorted(dataset_files):
|
| 120 |
+
if os.path.splitext(filename)[1].lower() in ['.png', '.jpg', '.jpeg']:
|
| 121 |
+
if self._in_list(split_list, os.path.join(gen, sub, os.path.splitext(filename)[0])):
|
| 122 |
+
self.samples.append(os.path.join(dataset_root, filename))
|
| 123 |
+
self.info.append((mod, label, gen, sub))
|
| 124 |
+
|
| 125 |
+
self.transform = make_processing(settings)
|
| 126 |
+
print(self.transform)
|
| 127 |
+
|
| 128 |
+
def _in_list(self, split, elem):
|
| 129 |
+
i = bisect.bisect_left(split, elem)
|
| 130 |
+
return i != len(split) and split[i] == elem
|
| 131 |
+
|
| 132 |
+
def __len__(self):
|
| 133 |
+
return len(self.samples)
|
| 134 |
+
|
| 135 |
+
def __getitem__(self, index):
|
| 136 |
+
path = self.samples[index]
|
| 137 |
+
mod, label, gen, sub = self.info[index]
|
| 138 |
+
|
| 139 |
+
sample = Image.open(path).convert('RGB')
|
| 140 |
+
sample = self.transform(sample)
|
| 141 |
+
|
| 142 |
+
target = 1.0 if label == 'Fake' else 0.0
|
| 143 |
+
|
| 144 |
+
return {'img':sample, 'target':target, 'path':path}
|
detectors/CLIP-D/utils/processing.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
'''
|
| 2 |
+
Copyright 2024 Image Processing Research Group of University Federico
|
| 3 |
+
II of Naples ('GRIP-UNINA'). All rights reserved.
|
| 4 |
+
|
| 5 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 6 |
+
you may not use this file except in compliance with the License.
|
| 7 |
+
You may obtain a copy of the License at
|
| 8 |
+
|
| 9 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
|
| 11 |
+
Unless required by applicable law or agreed to in writing, software
|
| 12 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
| 13 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 14 |
+
See the License for the specific language governing permissions and
|
| 15 |
+
limitations under the License.
|
| 16 |
+
'''
|
| 17 |
+
import torchvision.transforms.v2 as Tv2
|
| 18 |
+
|
| 19 |
+
def make_processing(opt):
|
| 20 |
+
opt = parse_arguments(opt)
|
| 21 |
+
transforms_list = list() # list of transforms
|
| 22 |
+
|
| 23 |
+
if opt.task == 'train':
|
| 24 |
+
transforms_aug = make_aug(opt) # make data-augmentation transforms
|
| 25 |
+
if transforms_aug is not None:
|
| 26 |
+
transforms_list.append(transforms_aug)
|
| 27 |
+
|
| 28 |
+
transforms_post = make_post(opt) # make post-data-augmentation transforms
|
| 29 |
+
if transforms_post is not None:
|
| 30 |
+
transforms_list.append(transforms_post)
|
| 31 |
+
|
| 32 |
+
transforms_list.append(make_normalize(opt)) # make normalization
|
| 33 |
+
|
| 34 |
+
t = Tv2.Compose(transforms_list)
|
| 35 |
+
|
| 36 |
+
return t
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def add_processing_arguments(parser):
|
| 40 |
+
# parser is an argparse.ArgumentParser
|
| 41 |
+
#
|
| 42 |
+
# ICASSP2023: --cropSize 96 --loadSize -1 --resizeSize -1 --norm_type resnet --resize_prob 0.2 --jitter_prob 0.8 --colordist_prob 0.2 --cutout_prob 0.2 --noise_prob 0.2 --blur_prob 0.5 --cmp_prob 0.5 --rot90_prob 1.0 --hpf_prob 0.0 --blur_sig 0.0,3.0 --cmp_method cv2,pil --cmp_qual 30,100 --resize_size 256 --resize_ratio 0.75
|
| 43 |
+
# ICME2021 : --cropSize 96 --loadSize -1 --resizeSize -1 --norm_type resnet --resize_prob 0.0 --jitter_prob 0.0 --colordist_prob 0.0 --cutout_prob 0.0 --noise_prob 0.0 --blur_prob 0.5 --cmp_prob 0.5 --rot90_prob 1.0 --hpf_prob 0.0 --blur_sig 0.0,3.0 --cmp_method cv2,pil --cmp_qual 30,100
|
| 44 |
+
#
|
| 45 |
+
|
| 46 |
+
parser.add_argument("--resizeSize", type=int, default=224, help="scale images to this size post augumentation")
|
| 47 |
+
|
| 48 |
+
# data-augmentation probabilities
|
| 49 |
+
parser.add_argument("--resize_prob", type=float, default=0.0)
|
| 50 |
+
parser.add_argument("--cmp_prob", type=float, default=0.0)
|
| 51 |
+
|
| 52 |
+
# data-augmentation parameters
|
| 53 |
+
parser.add_argument("--cmp_qual", default="75")
|
| 54 |
+
parser.add_argument("--resize_size", type=int, default=256)
|
| 55 |
+
parser.add_argument("--resize_ratio", type=float, default=1.0)
|
| 56 |
+
|
| 57 |
+
# other
|
| 58 |
+
parser.add_argument("--norm_type", type=str, default="clip")
|
| 59 |
+
|
| 60 |
+
return parser
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def parse_arguments(opt):
|
| 64 |
+
if not isinstance(opt.cmp_qual, list):
|
| 65 |
+
opt.cmp_qual = [int(s) for s in opt.cmp_qual.split(",")]
|
| 66 |
+
return opt
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def make_post(opt):
|
| 70 |
+
transforms_list = list()
|
| 71 |
+
if opt.resizeSize > 0:
|
| 72 |
+
print("\nUsing Post Resizing\n")
|
| 73 |
+
transforms_list.append(Tv2.Resize(opt.resizeSize, interpolation=Tv2.InterpolationMode.BICUBIC))
|
| 74 |
+
transforms_list.append(Tv2.CenterCrop((opt.resizeSize, opt.resizeSize)))
|
| 75 |
+
|
| 76 |
+
if len(transforms_list) == 0:
|
| 77 |
+
return None
|
| 78 |
+
else:
|
| 79 |
+
return Tv2.Compose(transforms_list)
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def make_aug(opt):
|
| 83 |
+
# AUG
|
| 84 |
+
transforms_list_aug = list()
|
| 85 |
+
|
| 86 |
+
if (opt.resize_size > 0) and (opt.resize_prob > 0): # opt.resized_ratio
|
| 87 |
+
transforms_list_aug.append(
|
| 88 |
+
Tv2.RandomApply(
|
| 89 |
+
[
|
| 90 |
+
Tv2.RandomResizedCrop(
|
| 91 |
+
size=opt.resize_size,
|
| 92 |
+
scale=(5/8, 1.0),
|
| 93 |
+
ratio=(opt.resize_ratio, 1.0 / opt.resize_ratio),
|
| 94 |
+
)
|
| 95 |
+
],
|
| 96 |
+
opt.resize_prob,
|
| 97 |
+
)
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
if opt.cmp_prob > 0:
|
| 101 |
+
transforms_list_aug.append(
|
| 102 |
+
Tv2.RandomApply(
|
| 103 |
+
[
|
| 104 |
+
Tv2.JPEG(
|
| 105 |
+
opt.cmp_qual
|
| 106 |
+
)
|
| 107 |
+
],
|
| 108 |
+
opt.cmp_prob,
|
| 109 |
+
)
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
if len(transforms_list_aug) > 0:
|
| 113 |
+
return Tv2.Compose(transforms_list_aug)
|
| 114 |
+
else:
|
| 115 |
+
return None
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def make_normalize(opt):
|
| 119 |
+
transforms_list = list()
|
| 120 |
+
|
| 121 |
+
if opt.norm_type == "clip":
|
| 122 |
+
print("normalize CLIP")
|
| 123 |
+
transforms_list.append(Tv2.ToTensor())
|
| 124 |
+
transforms_list.append(
|
| 125 |
+
Tv2.Normalize(
|
| 126 |
+
mean=(0.48145466, 0.4578275, 0.40821073),
|
| 127 |
+
std=(0.26862954, 0.26130258, 0.27577711),
|
| 128 |
+
)
|
| 129 |
+
)
|
| 130 |
+
else:
|
| 131 |
+
assert False
|
| 132 |
+
|
| 133 |
+
return Tv2.Compose(transforms_list)
|
detectors/CLIP-D/utils/training.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
'''
|
| 2 |
+
Copyright 2024 Image Processing Research Group of University Federico
|
| 3 |
+
II of Naples ('GRIP-UNINA'). All rights reserved.
|
| 4 |
+
|
| 5 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 6 |
+
you may not use this file except in compliance with the License.
|
| 7 |
+
You may obtain a copy of the License at
|
| 8 |
+
|
| 9 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
|
| 11 |
+
Unless required by applicable law or agreed to in writing, software
|
| 12 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
| 13 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 14 |
+
See the License for the specific language governing permissions and
|
| 15 |
+
limitations under the License.
|
| 16 |
+
'''
|
| 17 |
+
|
| 18 |
+
import os
|
| 19 |
+
import torch
|
| 20 |
+
import numpy as np
|
| 21 |
+
import tqdm
|
| 22 |
+
from networks import create_architecture, count_parameters
|
| 23 |
+
|
| 24 |
+
class TrainingModel(torch.nn.Module):
|
| 25 |
+
|
| 26 |
+
def __init__(self, opt):
|
| 27 |
+
super(TrainingModel, self).__init__()
|
| 28 |
+
|
| 29 |
+
self.opt = opt
|
| 30 |
+
self.total_steps = 0
|
| 31 |
+
self.save_dir = os.path.join('checkpoint', opt.name,'weights')
|
| 32 |
+
self.device = torch.device(opt.device if torch.cuda.is_available() else 'cpu')
|
| 33 |
+
|
| 34 |
+
self.model = create_architecture(opt.arch, pretrained=True, num_classes=1)
|
| 35 |
+
num_parameters = count_parameters(self.model)
|
| 36 |
+
print(f"Arch: {opt.arch} with #trainable {num_parameters}")
|
| 37 |
+
|
| 38 |
+
self.loss_fn = torch.nn.BCEWithLogitsLoss().to(self.device)
|
| 39 |
+
parameters = filter(lambda p: p.requires_grad, self.model.parameters())
|
| 40 |
+
self.optimizer = torch.optim.Adam(parameters, lr=opt.lr, betas=(opt.beta1, 0.999), weight_decay=opt.weight_decay)
|
| 41 |
+
|
| 42 |
+
self.model.to(self.device)
|
| 43 |
+
|
| 44 |
+
def adjust_learning_rate(self, min_lr=1e-6):
|
| 45 |
+
for param_group in self.optimizer.param_groups:
|
| 46 |
+
param_group["lr"] /= 10.0
|
| 47 |
+
if param_group["lr"] < min_lr:
|
| 48 |
+
return False
|
| 49 |
+
return True
|
| 50 |
+
|
| 51 |
+
def get_learning_rate(self):
|
| 52 |
+
for param_group in self.optimizer.param_groups:
|
| 53 |
+
return param_group["lr"]
|
| 54 |
+
|
| 55 |
+
def train_on_batch(self, data):
|
| 56 |
+
self.total_steps += 1
|
| 57 |
+
self.model.train()
|
| 58 |
+
input = data['img'].to(self.device)
|
| 59 |
+
label = data['target'].to(self.device).float()
|
| 60 |
+
output = self.model(input)
|
| 61 |
+
if len(output.shape) == 4:
|
| 62 |
+
ss = output.shape
|
| 63 |
+
loss = self.loss_fn(
|
| 64 |
+
output,
|
| 65 |
+
label[:, None, None, None].repeat(
|
| 66 |
+
(1, int(ss[1]), int(ss[2]), int(ss[3]))
|
| 67 |
+
),
|
| 68 |
+
)
|
| 69 |
+
else:
|
| 70 |
+
loss = self.loss_fn(output.squeeze(1), label)
|
| 71 |
+
self.optimizer.zero_grad()
|
| 72 |
+
loss.backward()
|
| 73 |
+
self.optimizer.step()
|
| 74 |
+
return loss.cpu()
|
| 75 |
+
|
| 76 |
+
def save_networks(self, epoch):
|
| 77 |
+
save_filename = f'{epoch}.pt'
|
| 78 |
+
save_path = os.path.join(self.save_dir, save_filename)
|
| 79 |
+
|
| 80 |
+
# serialize model and optimizer to dict
|
| 81 |
+
state_dict = {
|
| 82 |
+
'model': self.model.state_dict(),
|
| 83 |
+
'optimizer': self.optimizer.state_dict(),
|
| 84 |
+
'total_steps': self.total_steps,
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
torch.save(state_dict, save_path)
|
| 88 |
+
|
| 89 |
+
def predict(self, data_loader):
|
| 90 |
+
model = self.model.eval()
|
| 91 |
+
with torch.no_grad():
|
| 92 |
+
y_true, y_pred, y_path = [], [], []
|
| 93 |
+
for data in tqdm.tqdm(data_loader):
|
| 94 |
+
img = data['img']
|
| 95 |
+
label = data['target'].cpu().numpy()
|
| 96 |
+
paths = list(data['path'])
|
| 97 |
+
out_tens = model(img.to(self.device)).cpu().numpy()[:, -1]
|
| 98 |
+
assert label.shape == out_tens.shape
|
| 99 |
+
|
| 100 |
+
y_pred.extend(out_tens.tolist())
|
| 101 |
+
y_true.extend(label.tolist())
|
| 102 |
+
y_path.extend(paths)
|
| 103 |
+
|
| 104 |
+
y_true, y_pred = np.array(y_true), np.array(y_pred)
|
| 105 |
+
return y_true, y_pred, y_path
|
detectors/NPR/README.md
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Rethinking the Up-Sampling Operations in CNN-based Generative Network for Generalizable Deepfake Detection
|
| 2 |
+
|
| 3 |
+
[](https://github.com/chuangchuangtan/NPR-DeepfakeDetection)
|
| 4 |
+
[](https://arxiv.org/abs/2312.10461)
|
| 5 |
+
|
| 6 |
+
Original Paper:
|
| 7 |
+
[Rethinking the Up-Sampling Operations in CNN-based Generative Network for Generalizable Deepfake Detection](https://arxiv.org/abs/2312.10461).
|
| 8 |
+
|
| 9 |
+
Authors: Chuangchuang Tan, Huan Liu, Yao Zhao, Shikui Wei, Guanghua Gu, Ping Liu, Yunchao Wei.
|
| 10 |
+
|
| 11 |
+
## Abstract
|
| 12 |
+
|
| 13 |
+
Recently, the proliferation of highly realistic synthetic
|
| 14 |
+
images, facilitated through a variety of GANs and Diffu-
|
| 15 |
+
sions, has significantly heightened the susceptibility to mis-
|
| 16 |
+
use. While the primary focus of deepfake detection has tra-
|
| 17 |
+
ditionally centered on the design of detection algorithms,
|
| 18 |
+
an investigative inquiry into the generator architectures has
|
| 19 |
+
remained conspicuously absent in recent years. This paper
|
| 20 |
+
contributes to this lacuna by rethinking the architectures of
|
| 21 |
+
CNN-based generator, thereby establishing a generalized
|
| 22 |
+
representation of synthetic artifacts. Our findings illumi-
|
| 23 |
+
nate that the up-sampling operator can, beyond frequency-
|
| 24 |
+
based artifacts, produce generalized forgery artifacts. In
|
| 25 |
+
particular, the local interdependence among image pixels
|
| 26 |
+
caused by upsampling operators is significantly demon-
|
| 27 |
+
strated in synthetic images generated by GAN or diffusion.
|
| 28 |
+
Building upon this observation, we introduce the concept of
|
| 29 |
+
Neighboring Pixel Relationships(NPR) as a means to cap-
|
| 30 |
+
ture and characterize the generalized structural artifacts
|
| 31 |
+
stemming from up-sampling operations. A comprehensive
|
| 32 |
+
analysis is conducted on an open-world dataset, comprising
|
| 33 |
+
samples generated by 28 distinct generative models. This
|
| 34 |
+
analysis culminates in the establishment of a novel state-of-
|
| 35 |
+
the-art performance, showcasing a remarkable 11.6% im-
|
| 36 |
+
provement over existing methods
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
## Please Cite
|
| 40 |
+
|
| 41 |
+
```
|
| 42 |
+
@inproceedings{tan2024rethinking,
|
| 43 |
+
title={Rethinking the up-sampling operations in cnn-based generative network for generalizable deepfake detection},
|
| 44 |
+
author={Tan, Chuangchuang and Zhao, Yao and Wei, Shikui and Gu, Guanghua and Liu, Ping and Wei, Yunchao},
|
| 45 |
+
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
|
| 46 |
+
pages={28130--28139},
|
| 47 |
+
year={2024}
|
| 48 |
+
}
|
| 49 |
+
```
|
detectors/NPR/__pycache__/util.cpython-310.pyc
ADDED
|
Binary file (1.73 kB). View file
|
|
|
detectors/NPR/__pycache__/validate.cpython-310.pyc
ADDED
|
Binary file (1.7 kB). View file
|
|
|
detectors/NPR/checkpoint/pretrained/weights/best.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb76594b7304c23249206ab2ac434dbd153c4114660ecc11a23eb82848f0721d
|
| 3 |
+
size 5831180
|
detectors/NPR/data/__init__.py
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import numpy as np
|
| 3 |
+
from torch.utils.data.sampler import WeightedRandomSampler
|
| 4 |
+
|
| 5 |
+
from .datasets import dataset_folder
|
| 6 |
+
from torchvision.datasets import DatasetFolder
|
| 7 |
+
import json
|
| 8 |
+
import bisect
|
| 9 |
+
from PIL import Image
|
| 10 |
+
import torchvision.transforms.v2 as Tv2
|
| 11 |
+
'''
|
| 12 |
+
def get_dataset(opt):
|
| 13 |
+
dset_lst = []
|
| 14 |
+
for cls in opt.classes:
|
| 15 |
+
root = opt.dataroot + '/' + cls
|
| 16 |
+
dset = dataset_folder(opt, root)
|
| 17 |
+
dset_lst.append(dset)
|
| 18 |
+
return torch.utils.data.ConcatDataset(dset_lst)
|
| 19 |
+
'''
|
| 20 |
+
|
| 21 |
+
import os
|
| 22 |
+
# def get_dataset(opt):
|
| 23 |
+
# classes = os.listdir(opt.dataroot) if len(opt.classes) == 0 else opt.classes
|
| 24 |
+
# if '0_real' not in classes or '1_fake' not in classes:
|
| 25 |
+
# dset_lst = []
|
| 26 |
+
# for cls in classes:
|
| 27 |
+
# root = opt.dataroot + '/' + cls
|
| 28 |
+
# dset = dataset_folder(opt, root)
|
| 29 |
+
# dset_lst.append(dset)
|
| 30 |
+
# return torch.utils.data.ConcatDataset(dset_lst)
|
| 31 |
+
# return dataset_folder(opt, opt.dataroot)
|
| 32 |
+
|
| 33 |
+
# def get_bal_sampler(dataset):
|
| 34 |
+
# targets = []
|
| 35 |
+
# for d in dataset.datasets:
|
| 36 |
+
# targets.extend(d.targets)
|
| 37 |
+
|
| 38 |
+
# ratio = np.bincount(targets)
|
| 39 |
+
# w = 1. / torch.tensor(ratio, dtype=torch.float)
|
| 40 |
+
# sample_weights = w[targets]
|
| 41 |
+
# sampler = WeightedRandomSampler(weights=sample_weights,
|
| 42 |
+
# num_samples=len(sample_weights))
|
| 43 |
+
# return sampler
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
# def create_dataloader(opt):
|
| 47 |
+
# shuffle = not opt.serial_batches if (opt.isTrain and not opt.class_bal) else False
|
| 48 |
+
# dataset = get_dataset(opt)
|
| 49 |
+
# sampler = get_bal_sampler(dataset) if opt.class_bal else None
|
| 50 |
+
|
| 51 |
+
# data_loader = torch.utils.data.DataLoader(dataset,
|
| 52 |
+
# batch_size=opt.batch_size,
|
| 53 |
+
# shuffle=shuffle,
|
| 54 |
+
# sampler=sampler,
|
| 55 |
+
# num_workers=int(opt.num_threads))
|
| 56 |
+
# return data_loader
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def parse_dataset(settings):
|
| 60 |
+
gen_keys = {
|
| 61 |
+
'gan1':['StyleGAN'],
|
| 62 |
+
'gan2':['StyleGAN2'],
|
| 63 |
+
'gan3':['StyleGAN3'],
|
| 64 |
+
'sd15':['StableDiffusion1.5'],
|
| 65 |
+
'sd2':['StableDiffusion2'],
|
| 66 |
+
'sd3':['StableDiffusion3'],
|
| 67 |
+
'sdXL':['StableDiffusionXL'],
|
| 68 |
+
'flux':['FLUX.1'],
|
| 69 |
+
'realFFHQ':['FFHQ'],
|
| 70 |
+
'realFORLAB':['FORLAB']
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
gen_keys['all'] = [gen_keys[key][0] for key in gen_keys.keys()]
|
| 74 |
+
# gen_keys['gan'] = [gen_keys[key][0] for key in gen_keys.keys() if 'gan' in key]
|
| 75 |
+
# gen_keys['sd'] = [gen_keys[key][0] for key in gen_keys.keys() if 'sd' in key]
|
| 76 |
+
gen_keys['real'] = [gen_keys[key][0] for key in gen_keys.keys() if 'real' in key]
|
| 77 |
+
|
| 78 |
+
mod_keys = {
|
| 79 |
+
'pre': ['PreSocial'],
|
| 80 |
+
'fb': ['Facebook'],
|
| 81 |
+
'tl': ['Telegram'],
|
| 82 |
+
'tw': ['X'],
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
mod_keys['all'] = [mod_keys[key][0] for key in mod_keys.keys()]
|
| 86 |
+
mod_keys['shr'] = [mod_keys[key][0] for key in mod_keys.keys() if key in ['fb', 'tl', 'tw']]
|
| 87 |
+
|
| 88 |
+
need_real = (settings.task == 'train' and not len([data.split(':')[0] for data in settings.data_keys.split('&') if 'real' in data.split(':')[0]]))
|
| 89 |
+
|
| 90 |
+
assert not need_real, 'Train task without real data, this will not get handeled automatically, terminating'
|
| 91 |
+
|
| 92 |
+
dataset_list = []
|
| 93 |
+
for data in settings.data_keys.split('&'):
|
| 94 |
+
gen, mod = data.split(':')
|
| 95 |
+
dataset_list.append({'gen':gen_keys[gen], 'mod':mod_keys[mod]})
|
| 96 |
+
|
| 97 |
+
return dataset_list
|
| 98 |
+
|
| 99 |
+
class TrueFake_dataset(DatasetFolder):
|
| 100 |
+
def __init__(self, settings):
|
| 101 |
+
self.data_root = settings.data_root
|
| 102 |
+
self.split = settings.split
|
| 103 |
+
|
| 104 |
+
with open(settings.split_file, "r") as f:
|
| 105 |
+
split_list = sorted(json.load(f)[self.split])
|
| 106 |
+
|
| 107 |
+
dataset_list = parse_dataset(settings)
|
| 108 |
+
|
| 109 |
+
self.samples = []
|
| 110 |
+
self.info = []
|
| 111 |
+
for dict in dataset_list:
|
| 112 |
+
generators = dict['gen']
|
| 113 |
+
modifiers = dict['mod']
|
| 114 |
+
|
| 115 |
+
for mod in modifiers:
|
| 116 |
+
for dataset_root, dataset_dirs, dataset_files in os.walk(os.path.join(self.data_root, mod), topdown=True, followlinks=True):
|
| 117 |
+
if len(dataset_dirs):
|
| 118 |
+
continue
|
| 119 |
+
|
| 120 |
+
(label, gen, sub) = f'{dataset_root}/'.replace(os.path.join(self.data_root, mod) + os.sep, '').split(os.sep)[:3]
|
| 121 |
+
|
| 122 |
+
if gen in generators:
|
| 123 |
+
for filename in sorted(dataset_files):
|
| 124 |
+
if os.path.splitext(filename)[1].lower() in ['.png', '.jpg', '.jpeg']:
|
| 125 |
+
if self._in_list(split_list, os.path.join(gen, sub, os.path.splitext(filename)[0])):
|
| 126 |
+
self.samples.append(os.path.join(dataset_root, filename))
|
| 127 |
+
self.info.append((mod, label, gen, sub))
|
| 128 |
+
|
| 129 |
+
if settings.isTrain:
|
| 130 |
+
crop_func = Tv2.RandomCrop(settings.cropSize)
|
| 131 |
+
elif settings.no_crop:
|
| 132 |
+
crop_func = Tv2.Identity()
|
| 133 |
+
else:
|
| 134 |
+
crop_func = Tv2.CenterCrop(settings.cropSize)
|
| 135 |
+
|
| 136 |
+
if settings.isTrain and not settings.no_flip:
|
| 137 |
+
flip_func = Tv2.RandomHorizontalFlip()
|
| 138 |
+
else:
|
| 139 |
+
flip_func = Tv2.Identity()
|
| 140 |
+
|
| 141 |
+
if not settings.isTrain and settings.no_resize:
|
| 142 |
+
rz_func = Tv2.Identity()
|
| 143 |
+
else:
|
| 144 |
+
rz_func = Tv2.Resize((settings.loadSize, settings.loadSize))
|
| 145 |
+
|
| 146 |
+
self.transform = Tv2.Compose([
|
| 147 |
+
rz_func,
|
| 148 |
+
crop_func,
|
| 149 |
+
flip_func,
|
| 150 |
+
Tv2.ToTensor(),
|
| 151 |
+
Tv2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
| 152 |
+
])
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
def _in_list(self, split, elem):
|
| 156 |
+
i = bisect.bisect_left(split, elem)
|
| 157 |
+
return i != len(split) and split[i] == elem
|
| 158 |
+
|
| 159 |
+
def __len__(self):
|
| 160 |
+
return len(self.samples)
|
| 161 |
+
|
| 162 |
+
def __getitem__(self, index):
|
| 163 |
+
path = self.samples[index]
|
| 164 |
+
mod, label, gen, sub = self.info[index]
|
| 165 |
+
|
| 166 |
+
image = Image.open(path).convert('RGB')
|
| 167 |
+
sample = self.transform(image)
|
| 168 |
+
|
| 169 |
+
target = 1.0 if label == 'Fake' else 0.0
|
| 170 |
+
|
| 171 |
+
return sample, target, path
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
def create_dataloader(settings, split=None):
|
| 175 |
+
if split == "train":
|
| 176 |
+
settings.split = 'train'
|
| 177 |
+
is_train=True
|
| 178 |
+
|
| 179 |
+
elif split == "val":
|
| 180 |
+
settings.split = 'val'
|
| 181 |
+
settings.batch_size = settings.batch_size//4
|
| 182 |
+
is_train=False
|
| 183 |
+
|
| 184 |
+
elif split == "test":
|
| 185 |
+
settings.split = 'test'
|
| 186 |
+
settings.batch_size = settings.batch_size//4
|
| 187 |
+
is_train=False
|
| 188 |
+
|
| 189 |
+
else:
|
| 190 |
+
raise ValueError(f"Unknown split {split}")
|
| 191 |
+
|
| 192 |
+
dataset = TrueFake_dataset(settings)
|
| 193 |
+
|
| 194 |
+
data_loader = torch.utils.data.DataLoader(
|
| 195 |
+
dataset,
|
| 196 |
+
batch_size=settings.batch_size,
|
| 197 |
+
num_workers=int(settings.num_threads),
|
| 198 |
+
shuffle = is_train,
|
| 199 |
+
collate_fn=None,
|
| 200 |
+
)
|
| 201 |
+
return data_loader
|
detectors/NPR/data/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (4.78 kB). View file
|
|
|
detectors/NPR/data/__pycache__/datasets.cpython-310.pyc
ADDED
|
Binary file (4.65 kB). View file
|
|
|
detectors/NPR/data/datasets.py
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import cv2
|
| 2 |
+
import numpy as np
|
| 3 |
+
import torchvision.datasets as datasets
|
| 4 |
+
import torchvision.transforms as transforms
|
| 5 |
+
import torchvision.transforms.functional as TF
|
| 6 |
+
from random import random, choice
|
| 7 |
+
from io import BytesIO
|
| 8 |
+
from PIL import Image
|
| 9 |
+
from PIL import ImageFile
|
| 10 |
+
from scipy.ndimage.filters import gaussian_filter
|
| 11 |
+
from torchvision.transforms import InterpolationMode
|
| 12 |
+
|
| 13 |
+
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
| 14 |
+
|
| 15 |
+
def dataset_folder(opt, root):
|
| 16 |
+
if opt.mode == 'binary':
|
| 17 |
+
return binary_dataset(opt, root)
|
| 18 |
+
if opt.mode == 'filename':
|
| 19 |
+
return FileNameDataset(opt, root)
|
| 20 |
+
raise ValueError('opt.mode needs to be binary or filename.')
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def binary_dataset(opt, root):
|
| 24 |
+
if opt.isTrain:
|
| 25 |
+
crop_func = transforms.RandomCrop(opt.cropSize)
|
| 26 |
+
elif opt.no_crop:
|
| 27 |
+
crop_func = transforms.Lambda(lambda img: img)
|
| 28 |
+
else:
|
| 29 |
+
crop_func = transforms.CenterCrop(opt.cropSize)
|
| 30 |
+
|
| 31 |
+
if opt.isTrain and not opt.no_flip:
|
| 32 |
+
flip_func = transforms.RandomHorizontalFlip()
|
| 33 |
+
else:
|
| 34 |
+
flip_func = transforms.Lambda(lambda img: img)
|
| 35 |
+
if not opt.isTrain and opt.no_resize:
|
| 36 |
+
rz_func = transforms.Lambda(lambda img: img)
|
| 37 |
+
else:
|
| 38 |
+
# rz_func = transforms.Lambda(lambda img: custom_resize(img, opt))
|
| 39 |
+
rz_func = transforms.Resize((opt.loadSize, opt.loadSize))
|
| 40 |
+
|
| 41 |
+
dset = datasets.ImageFolder(
|
| 42 |
+
root,
|
| 43 |
+
transforms.Compose([
|
| 44 |
+
rz_func,
|
| 45 |
+
# transforms.Lambda(lambda img: data_augment(img, opt)),
|
| 46 |
+
crop_func,
|
| 47 |
+
flip_func,
|
| 48 |
+
transforms.ToTensor(),
|
| 49 |
+
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
| 50 |
+
]))
|
| 51 |
+
return dset
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
class FileNameDataset(datasets.ImageFolder):
|
| 55 |
+
def name(self):
|
| 56 |
+
return 'FileNameDataset'
|
| 57 |
+
|
| 58 |
+
def __init__(self, opt, root):
|
| 59 |
+
self.opt = opt
|
| 60 |
+
super().__init__(root)
|
| 61 |
+
|
| 62 |
+
def __getitem__(self, index):
|
| 63 |
+
# Loading sample
|
| 64 |
+
path, target = self.samples[index]
|
| 65 |
+
return path
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def data_augment(img, opt):
|
| 69 |
+
img = np.array(img)
|
| 70 |
+
|
| 71 |
+
if random() < opt.blur_prob:
|
| 72 |
+
sig = sample_continuous(opt.blur_sig)
|
| 73 |
+
gaussian_blur(img, sig)
|
| 74 |
+
|
| 75 |
+
if random() < opt.jpg_prob:
|
| 76 |
+
method = sample_discrete(opt.jpg_method)
|
| 77 |
+
qual = sample_discrete(opt.jpg_qual)
|
| 78 |
+
img = jpeg_from_key(img, qual, method)
|
| 79 |
+
|
| 80 |
+
return Image.fromarray(img)
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def sample_continuous(s):
|
| 84 |
+
if len(s) == 1:
|
| 85 |
+
return s[0]
|
| 86 |
+
if len(s) == 2:
|
| 87 |
+
rg = s[1] - s[0]
|
| 88 |
+
return random() * rg + s[0]
|
| 89 |
+
raise ValueError("Length of iterable s should be 1 or 2.")
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def sample_discrete(s):
|
| 93 |
+
if len(s) == 1:
|
| 94 |
+
return s[0]
|
| 95 |
+
return choice(s)
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def gaussian_blur(img, sigma):
|
| 99 |
+
gaussian_filter(img[:,:,0], output=img[:,:,0], sigma=sigma)
|
| 100 |
+
gaussian_filter(img[:,:,1], output=img[:,:,1], sigma=sigma)
|
| 101 |
+
gaussian_filter(img[:,:,2], output=img[:,:,2], sigma=sigma)
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def cv2_jpg(img, compress_val):
|
| 105 |
+
img_cv2 = img[:,:,::-1]
|
| 106 |
+
encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), compress_val]
|
| 107 |
+
result, encimg = cv2.imencode('.jpg', img_cv2, encode_param)
|
| 108 |
+
decimg = cv2.imdecode(encimg, 1)
|
| 109 |
+
return decimg[:,:,::-1]
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def pil_jpg(img, compress_val):
|
| 113 |
+
out = BytesIO()
|
| 114 |
+
img = Image.fromarray(img)
|
| 115 |
+
img.save(out, format='jpeg', quality=compress_val)
|
| 116 |
+
img = Image.open(out)
|
| 117 |
+
# load from memory before ByteIO closes
|
| 118 |
+
img = np.array(img)
|
| 119 |
+
out.close()
|
| 120 |
+
return img
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
jpeg_dict = {'cv2': cv2_jpg, 'pil': pil_jpg}
|
| 124 |
+
def jpeg_from_key(img, compress_val, key):
|
| 125 |
+
method = jpeg_dict[key]
|
| 126 |
+
return method(img, compress_val)
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
# rz_dict = {'bilinear': Image.BILINEAR,
|
| 130 |
+
# 'bicubic': Image.BICUBIC,
|
| 131 |
+
# 'lanczos': Image.LANCZOS,
|
| 132 |
+
# 'nearest': Image.NEAREST}
|
| 133 |
+
rz_dict = {'bilinear': InterpolationMode.BILINEAR,
|
| 134 |
+
'bicubic': InterpolationMode.BICUBIC,
|
| 135 |
+
'lanczos': InterpolationMode.LANCZOS,
|
| 136 |
+
'nearest': InterpolationMode.NEAREST}
|
| 137 |
+
def custom_resize(img, opt):
|
| 138 |
+
interp = sample_discrete(opt.rz_interp)
|
| 139 |
+
return TF.resize(img, (opt.loadSize,opt.loadSize), interpolation=rz_dict[interp])
|
detectors/NPR/detect.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
import time
|
| 4 |
+
import yaml
|
| 5 |
+
import torch
|
| 6 |
+
from PIL import Image
|
| 7 |
+
import torchvision.transforms as transforms
|
| 8 |
+
import argparse
|
| 9 |
+
|
| 10 |
+
# Add project root to path for imports
|
| 11 |
+
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 12 |
+
sys.path.append(project_root)
|
| 13 |
+
from support.detect_utils import format_result, save_result, get_device
|
| 14 |
+
import networks.resnet as resnet
|
| 15 |
+
from networks.resnet import resnet50
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def parse_args():
|
| 19 |
+
parser = argparse.ArgumentParser(description='NPR single image detector')
|
| 20 |
+
parser.add_argument('--image', type=str, required=True, help='Path to input image')
|
| 21 |
+
parser.add_argument('--model', type=str, default='checkpoint/best.pt', help='Path to model checkpoint')
|
| 22 |
+
parser.add_argument('--output', type=str, help='Path to save detection result JSON')
|
| 23 |
+
parser.add_argument('--device', type=str, help='Device to run on (e.g., cuda:0, cuda:1, cpu)')
|
| 24 |
+
parser.add_argument('--config', type=str, default='configs/npr.yaml', help='Path to config file')
|
| 25 |
+
return parser.parse_args()
|
| 26 |
+
|
| 27 |
+
def load_config(config_path):
|
| 28 |
+
"""Load configuration from YAML file."""
|
| 29 |
+
with open(config_path, 'r') as f:
|
| 30 |
+
return yaml.safe_load(f)
|
| 31 |
+
|
| 32 |
+
def load_image(image_path, size=224):
|
| 33 |
+
"""Load and preprocess image."""
|
| 34 |
+
if not os.path.exists(image_path):
|
| 35 |
+
raise FileNotFoundError(f"Image not found: {image_path}")
|
| 36 |
+
|
| 37 |
+
image = Image.open(image_path).convert('RGB')
|
| 38 |
+
transform = transforms.Compose([
|
| 39 |
+
transforms.Resize(size),
|
| 40 |
+
transforms.CenterCrop(size),
|
| 41 |
+
transforms.ToTensor(),
|
| 42 |
+
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
| 43 |
+
])
|
| 44 |
+
return transform(image).unsqueeze(0)
|
| 45 |
+
|
| 46 |
+
def main():
|
| 47 |
+
args = parse_args()
|
| 48 |
+
|
| 49 |
+
# Setup device
|
| 50 |
+
device = torch.device(args.device) if args.device else get_device()
|
| 51 |
+
|
| 52 |
+
# Load model
|
| 53 |
+
try:
|
| 54 |
+
# Initialize model
|
| 55 |
+
model = resnet50(num_classes=1).to(device)
|
| 56 |
+
load_path = f'./detectors/NPR/checkpoint/{args.model}/weights/best.pt'
|
| 57 |
+
|
| 58 |
+
if not os.path.exists(load_path):
|
| 59 |
+
raise FileNotFoundError(f"Model weights not found at: {load_path}")
|
| 60 |
+
|
| 61 |
+
checkpoint = torch.load(load_path, map_location=device)
|
| 62 |
+
|
| 63 |
+
model.load_state_dict(checkpoint, strict=True)
|
| 64 |
+
|
| 65 |
+
model.eval()
|
| 66 |
+
except Exception as e:
|
| 67 |
+
print(f"Error loading model: {e}")
|
| 68 |
+
return
|
| 69 |
+
|
| 70 |
+
# Load and preprocess image
|
| 71 |
+
try:
|
| 72 |
+
image_tensor = load_image(args.image).to(device)
|
| 73 |
+
except Exception as e:
|
| 74 |
+
print(f"Error loading image: {e}")
|
| 75 |
+
return
|
| 76 |
+
|
| 77 |
+
# Run detection
|
| 78 |
+
start_time = time.time()
|
| 79 |
+
with torch.no_grad():
|
| 80 |
+
try:
|
| 81 |
+
score = model(image_tensor)
|
| 82 |
+
prediction = torch.sigmoid(score)
|
| 83 |
+
|
| 84 |
+
confidence = prediction.item()
|
| 85 |
+
|
| 86 |
+
result = format_result(
|
| 87 |
+
'fake' if confidence>0.5 else 'real',
|
| 88 |
+
confidence,
|
| 89 |
+
time.time() - start_time
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
# Print result
|
| 93 |
+
print(f"Prediction: {result['prediction']}")
|
| 94 |
+
print(f"Confidence: {result['confidence']:.4f}")
|
| 95 |
+
print(f"Time: {result['elapsed_time']:.3f}s")
|
| 96 |
+
|
| 97 |
+
# Save result if output path provided
|
| 98 |
+
if args.output:
|
| 99 |
+
save_result(result, args.output)
|
| 100 |
+
|
| 101 |
+
except Exception as e:
|
| 102 |
+
print(f"Error during detection: {e}")
|
| 103 |
+
return
|
| 104 |
+
|
| 105 |
+
if __name__ == '__main__':
|
| 106 |
+
main()
|
detectors/NPR/networks/__init__.py
ADDED
|
File without changes
|