diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..d78608a7e0701a9bb371a97c865a73551b53bd6e Binary files /dev/null and b/.DS_Store differ diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..044370827ba3e34a0a05941c93b106b50d3ebe56 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,35 +1,9 @@ -*.7z filter=lfs diff=lfs merge=lfs -text -*.arrow filter=lfs diff=lfs merge=lfs -text -*.bin filter=lfs diff=lfs merge=lfs -text -*.bz2 filter=lfs diff=lfs merge=lfs -text -*.ckpt filter=lfs diff=lfs merge=lfs -text -*.ftz filter=lfs diff=lfs merge=lfs -text -*.gz filter=lfs diff=lfs merge=lfs -text -*.h5 filter=lfs diff=lfs merge=lfs -text -*.joblib filter=lfs diff=lfs merge=lfs -text -*.lfs.* filter=lfs diff=lfs merge=lfs -text -*.mlmodel filter=lfs diff=lfs merge=lfs -text -*.model filter=lfs diff=lfs merge=lfs -text -*.msgpack filter=lfs diff=lfs merge=lfs -text -*.npy filter=lfs diff=lfs merge=lfs -text -*.npz filter=lfs diff=lfs merge=lfs -text -*.onnx filter=lfs diff=lfs merge=lfs -text -*.ot filter=lfs diff=lfs merge=lfs -text -*.parquet filter=lfs diff=lfs merge=lfs -text -*.pb filter=lfs diff=lfs merge=lfs -text -*.pickle filter=lfs diff=lfs merge=lfs -text -*.pkl filter=lfs diff=lfs merge=lfs -text -*.pt filter=lfs diff=lfs merge=lfs -text -*.pth filter=lfs diff=lfs merge=lfs -text -*.rar filter=lfs diff=lfs merge=lfs -text -*.safetensors filter=lfs diff=lfs merge=lfs -text -saved_model/**/* filter=lfs diff=lfs merge=lfs -text -*.tar.* filter=lfs diff=lfs merge=lfs -text -*.tar filter=lfs diff=lfs merge=lfs -text -*.tflite filter=lfs diff=lfs merge=lfs -text -*.tgz filter=lfs diff=lfs merge=lfs -text -*.wasm filter=lfs diff=lfs merge=lfs -text -*.xz filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.json filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +detectors/P2G/src/models/clip/bpe_simple_vocab_16e6.txt.gz filter=lfs diff=lfs merge=lfs -text diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md new file mode 100644 index 0000000000000000000000000000000000000000..2e4a511fb40f3f1248e256aeeee46d57bf9648d8 --- /dev/null +++ b/DEPLOYMENT.md @@ -0,0 +1,124 @@ +# Hugging Face Spaces Deployment Guide + +## Prerequisites + +1. A Hugging Face account +2. Git LFS installed locally: `git lfs install` +3. Model weights downloaded to the correct directories + +## Deployment Steps + +### 1. Prepare Model Weights + +You have two options: + +#### Option A: Upload weights via Git LFS (Recommended for public spaces) + +```bash +# Initialize Git LFS +git lfs install + +# Track large files +git lfs track "*.pt" +git lfs track "*.pth" +git lfs track "*.pkl" + +# Add weights +git add .gitattributes +git add detectors/*/checkpoint/pretrained/weights/best.pt +git add detectors/P2G/src/utils/classes.pkl +git commit -m "Add model weights" +``` + +#### Option B: Configure automatic download + +1. Upload your model weights to Google Drive or another host +2. Update `download_weights.py` with the correct URLs +3. Weights will download automatically when the Space starts + +### 2. Create Hugging Face Space + +1. Go to https://huggingface.co/spaces +2. Click "Create new Space" +3. Choose: + - **Name**: deepfake-detection-library (or your preferred name) + - ** SDK**: Gradio + - **License**: MIT + - **Hardware**: CPU Basic (free) or upgrade to GPU if needed + +### 3. Push to Hugging Face + +```bash +# Add HF remote (replace YOUR_USERNAME and SPACE_NAME) +git remote add hf https://huggingface.co/spaces/YOUR_USERNAME/SPACE_NAME + +# Rename README for HF +mv README.md README_github.md +mv README_HF.md README.md + +# Push to Hugging Face +git add . +git commit -m "Initial commit for HF Spaces" +git push hf main +``` + +### 4. Configure Space + +In your Space settings on Hugging Face: + +- **Hardware**: Start with CPU Basic (free), upgrade to GPU if needed +- **Secrets**: Add any API keys if needed (none required currently) +- **Variables**: No special environment variables needed + +### 5. Verify Deployment + +1. Wait for the Space to build (may take 5-10 minutes) +2. Test each detector with sample images +3. Check logs for any errors + +## File Size Considerations + +- **Git LFS** is required for files >10MB +- Each model weight file (~100-500MB) will be stored via LFS +- Free HF Spaces have storage limits; consider: + - Upgrading to Pro for more storage + - Using automatic download instead of uploading weights + +## Troubleshooting + +### Space fails to build + +- Check `requirements.txt` for incompatible versions +- Review build logs in the Space interface +- Ensure all dependencies are listed + +### Weights not loading + +- Verify Git LFS tracked the files: `git lfs ls-files` +- Check file sizes: LFS pointer files are ~130 bytes +- Update `download_weights.py` if using automatic download + +### Out of memory errors + +- Upgrade to GPU hardware (T4 small recommended) +- Reduce batch size or model size if possible +- Use CPU inference for deployment (already configured) + +## Cost Optimization + +- **CPU Basic** (free): Works but slower +- **CPU Upgrade** ($0.03/hour): Faster inference +- **T4 Small GPU** ($0.60/hour): Needed for real-time performance + +## Maintenance + +- Monitor Space usage in HF dashboard +- Update models by pushing new weights via Git LFS +- Check Gradio version compatibility: `pip list | grep gradio` + +## Support + +For issues specific to this deployment, check: +- [Gradio Documentation](https://gradio.app/docs/) +- [HF Spaces Documentation](https://huggingface.co/docs/hub/spaces) +- [GitHub Repository](https://github.com/truebees-ai/Image-Deepfake-Detectors-Public-Library) diff --git a/README.md b/README.md index 5cb1a9498e0f8d4e2694e876d3f1193de13a8e98..2dc9ffc7287b48f492a97bcc72703c5f4a44f8db 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,272 @@ +# Image Deepfake Detectors Public Library + +## Overview + +This repository provides a unified framework for training, testing, and benchmarking multiple state-of-the-art (SoA) deepfake detection models. It supports automated benchmarking, training, demo runs, and single-image detection, with modular configuration and extensible detector support. + +### Main Features + +- **Multiple Detectors:** Supports CLIP-D [1], NPR [2], P2G [3], R50_TF [4], and R50_nodown [5] (an overview for each method is provided in its `README.md`: `./detectors//README.md`) +- **Pretrained Weights:** All models have been pretrained on images generated with StyleGAN2 and StableDiffusionXL, and real images from the FFHQ Dataset [6] and the FORLAB Dataset [7]. +- **Automated Training & Testing:** Use `launcher.py` to run experiments across detectors and datasets. +- **Demo Mode:** Easily test all detectors on sample images in `demo_images/`. +- **Single Image Detection:** Run detection on individual images via the command line. +- **Flexible Configuration:** All experiment parameters are set via YAML files in `configs/`. +- **Logging & Results:** Logs and results are saved per detector and scenario for easy analysis. + +--- + +## Set-Up + +### Prerequisites + +`Ubuntu>=22.04.3`, `Python>=3.10` and `CUDA:12.0` + +--- + +### Download Weights + +You can download the weights for each model from this [link](https://drive.google.com/file/d/1F60FN2B9skRcb3YrZwhFTZQihbj3ipJQ/view?usp=sharing). + +Then, copy them into the `pretrained` folder for the corresponding model, following this structure: `./detectors//checkpoint/pretrained/weights/best.pt` + --- -title: MDS Demonstrator -emoji: 🐢 -colorFrom: green -colorTo: yellow -sdk: gradio -sdk_version: 6.0.1 -app_file: app.py -pinned: false -license: apache-2.0 + +### Download Benchmarking Dataset + +DeepShield Dataset: [Zenodo link](https://zenodo.org/records/15648378) + +Download the dataset and change the corresponding `dataset_path` in `./configs/.yaml`. + +> The DeepShield dataset is a large-scale benchmark for evaluating the robustness of fake image detection systems. It contains 100,000 images, divided between real and AI-generated content produced using advanced generative models, including StyleGAN, StyleGAN2, StyleGAN3, Stable Diffusion 1.5, 2.1, 3, and XL, as well as Flux 1.0. +> +> To simulate real-world distortions, 30,000 images were shared on Facebook, X (formerly Twitter), and Telegram, then re-collected to include platform-induced compression and artifacts. This approach ensures that the dataset captures authentic distribution noise and artifacts encountered in real-world scenarios. + --- -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +### VirtualEnv + +Create a virtual environment using: + +```bash +python -m venv IDFD_VENV +source IDFD_VENV/bin/activate +pip install -r requirements.txt +```` + +Or use conda: + +```bash +conda env create -f environment.yml +conda activate IDFD_VENV +``` + +----- + +### Download Demo Dataset + +You can download the demo dataset from this [link](https://drive.google.com/file/d/134Bw8l9tEC7oZJpTAeMO80QRqgdJfJS9/view?usp=sharing). The demo dataset contains 200 images randomly sampled from the DeepShield Dataset. + +Place sample images for quick testing in `demo_images/`, organized by platform and label: + +``` + demo_images/ + Facebook/ + Fake/ + Real/ + PreSocial/ + Fake/ + Real/ + Telegram/ + Fake/ + Real/ + X/ + Fake/ + Real/ +``` + +----- + +## Running Experiments + +**1. Run Demo:** + +Test all detectors on sample images: + +```bash +python launcher.py --demo --demo-detector all +``` + +Test a single detector on sample images: + +```bash +python launcher.py --demo --demo-detector +``` + +**2. Automated Train-Test (Recommended) on DeepShield Dataset:** + +To run both train and test using a specific ``: + +```bash +python launcher.py --detector --phases both +``` + + - ``: One of `CLIP-D`, `NPR`, `P2G`, `R50_TF`, `R50_nodown` + - `--phases`: `train`, `test`, or `both` + +By doing so, the selected detector will be trained on images generated by StyleGAN2 and StableDiffusionXL and real images from the FORLAB and FFHQ Datasets, **not shared on social networks**. + +**3. Manual Train-Test on DeepShield Dataset:** + +```bash +python launcher.py --detector --phases --config-dir --weights_name +``` + + - ``: One of `CLIP-D`, `NPR`, `P2G`, `R50_TF`, `R50_nodown` + - `--phases`: `train`, `test`, or `both` + - `--config-dir`: Path to the detector config files (default: `configs/`) + - `--weights_name`: Model weights name. The default is defined in `configs/.yaml` by these lines: + + + +``` +training: + - data: gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre +``` + +This corresponds to the training subsets used to train a detector (see the "Train on Different Generators from the DeepShield Dataset" section for more information). + +**4. Test the model using pretrained weights on the DeepShield Dataset:** + +```bash +python launcher.py --detector --phases test --weights_name pretrained +``` + +**5. Train the model using a custom weights name on the DeepShield Dataset:** + +```bash +python launcher.py --detector --phases train --weights_name +``` + +**6. Perform Detection on Single Images:** + +```bash +python launcher.py --detect --detector --image --weights --output +``` + + - ``: One of `CLIP-D`, `NPR`, `P2G`, `R50_TF`, `R50_nodown` + - `--image`: Path to the input image + - `--weights`: Weights name (default: `pretrained`) + - `--output`: Path to save detection results (default: `detection_results`) + +----- + +## Train on Different Generators from the DeepShield Dataset + +To train a detector on generators different from StyleGAN2 and StableDiffusionXL, modify these lines in `configs/.yaml`: + +```json +training: + - data: gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre +``` + +Currently supported pairs of `(key, sub-dataset)` are: + +``` +'gan1':['StyleGAN'] +'gan2':['StyleGAN2'] +'gan3':['StyleGAN3'] +'sd15':['StableDiffusion1.5'] +'sd2':['StableDiffusion2'] +'sd3':['StableDiffusion3'] +'sdXL':['StableDiffusionXL'] +'flux':['FLUX.1'] +'realFFHQ':['FFHQ'] +'realFORLAB':['FORLAB'] +``` + +And supported pairs of `(key, social)` are: + +``` +'pre':[Not Shared on Social Networks], +'fb': [Facebook] +'tl': [Telegram] +'tw': [X Social] +``` + +Then, generate the corresponding `split.json` using `python support/json_compile.py` and use it to replace `./split.json`. NOTE: change line 9 `dataset_path=...` in `support/json_compile.py` accordingly. + +## Results & Logs + + - **Results:** Saved in `detectors//results/` + - **Logs:** Saved in `logs/` per run and scenario + +----- + +## Train/Test on a New Dataset + +### Data Organization + +Organize your data by platform and label: + +``` + / + Facebook/ + Fake/ + Real/ + PreSocial/ + Fake/ + Real/ + Telegram/ + Fake/ + Real/ + X/ + Fake/ + Real/ +``` + +Generate the corresponding `split.json` using `python support/json_compile.py` and use it to replace `./split.json`. NOTE: change line 9 `dataset_path=...` in `support/json_compile.py` accordingly. + +### Split Files + + - **`split.json`:** Main split file for experiments. Format: JSON with `train`/`test` keys and lists of sample IDs. + - **`split_demo.json`:** Auto-generated for demo mode, covering all images in `demo_images/`. + +----- + +## Additional Configuration Options + + - **YAML Files:** All detectors have a config file in `configs/` (e.g., `CLIP-D.yaml`, `NPR.yaml`). + - **Config Options:** + - `global`: Dataset path, device, split file, threads, etc. + - `detector_args`: Model-specific arguments. + - `training`: List of training scenarios. + - `testing`: List of test scenarios. + +----- + +## References + +[1] D. Cozzolino, G. Poggi, R. Corvi, M. Nießner, and L. Verdoliva, +“Raising the Bar of AI-generated Image Detection with CLIP,” in 2024 +IEEE/CVF Conference on Computer Vision and Pattern Recognition +Workshops (CVPRW), pp. 4356–4366, June 2024. ISSN: 2160-7516. + +[2]C. Tan, H. Liu, Y. Zhao, S. Wei, G. Gu, P. Liu, and Y. Wei, “Rethinking the Up-Sampling Operations in CNN-Based Generative Network for +Generalizable Deepfake Detection,” in 2024 IEEE/CVF Conference on +Computer Vision and Pattern Recognition (CVPR), pp. 28130–28139, +June 2024. ISSN: 2575-7075. + +[3] F. Laiti, B. Liberatori, T. De Min, and E. Ricci, “Conditioned Prompt-Optimization for Continual Deepfake Detection,” in Pattern Recognition (A. Antonacopoulos, S. Chaudhuri, R. Chellappa, C.L. Liu, S. Bhatacharya, and U. Pal, eds.), (Cham), pp. 64–79, Springer Nature Switzerland, 2025. + +[4] Dell'Anna, Stefano, Andrea Montibeller, and Giulia Boato. "TrueFake: A Real World Case Dataset of Last Generation Fake Images also Shared on Social Networks." arXiv preprint arXiv:2504.20658 (2025). + +[5]R. Corvi, D. Cozzolino, G. Zingarini, G. Poggi, K. Nagano, and L. Verdoliva, “On The Detection of Synthetic Images Generated by Diffusion +Models,” in ICASSP 2023 - 2023 IEEE International Conference on +Acoustics, Speech and Signal Processing (ICASSP), pp. 1–5, June 2023. +ISSN: 2379-190X. + +[6] NVlabs, “Flickr faces hq dataset.” https://github.com/NVlabs/ffhq-dataset, n.d. Accessed: 2025-03-04 + +[7] M. Iuliani, M. Fontani, and A. Piva, “A leak in prnu based source +identification—questioning fingerprint uniqueness,” IEEE Access, vol. 9, pp. 52455–52463, 2021. diff --git a/README_HF.md b/README_HF.md new file mode 100644 index 0000000000000000000000000000000000000000..6425cfe39b97824b9ca6f489fe3164e5b254a274 --- /dev/null +++ b/README_HF.md @@ -0,0 +1,46 @@ +--- +title: Deepfake Detection Library +emoji: 🔍 +colorFrom: red +colorTo: orange +sdk: gradio +sdk_version: 4.44.0 +app_file: app.py +pinned: false +license: mit +--- + +# Deepfake Detection Library + +This Space provides a unified interface to test multiple state-of-the-art deepfake detection models on your images. + +## Available Detectors + +- **R50_TF** - ResNet-50 based detector trained on TrueFake dataset +- **R50_nodown** - ResNet-50 without downsampling operations +- **CLIP-D** - CLIP-based deepfake detector +- **P2G** - Prompt2Guard: Conditioned prompt-optimization for continual deepfake detection +- **NPR** - Neural Posterior Regularization + +## Usage + +1. Upload an image +2. Select a detector from the dropdown +3. Click "Detect" to get the prediction + +The detector will return: +- **Prediction**: Real or Fake +- **Confidence**: Model confidence score (0-1) +- **Elapsed Time**: Processing time + +## Models + +All models have been pretrained on images generated with StyleGAN2 and StableDiffusionXL, and real images from the FFHQ Dataset and the FORLAB Dataset. + +## References + +For more information about the implementation and benchmarking, visit the [GitHub repository](https://github.com/truebees-ai/Image-Deepfake-Detectors-Public-Library). + +## Note + +⚠️ Due to file size limitations, model weights need to be downloaded automatically on first use. This may take a few moments. diff --git a/__pycache__/app.cpython-310.pyc b/__pycache__/app.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3de632b73201b56045c4fffa0f9703e40b2721d3 Binary files /dev/null and b/__pycache__/app.cpython-310.pyc differ diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..a7b1d0d240a10350a5b29a999106644938af4209 --- /dev/null +++ b/app.py @@ -0,0 +1,117 @@ +import gradio as gr +import os +import sys +import json +import argparse +from types import SimpleNamespace +from support.detect import run_detect + +# Download weights on first run (for HF Spaces) +if os.environ.get("SPACE_ID"): + try: + from download_weights import download_all_weights + download_all_weights() + except Exception as e: + print(f"Warning: Could not download weights: {e}") + +# Available detectors based on launcher.py +DETECTORS = ['R50_TF', 'R50_nodown', 'CLIP-D', 'P2G', 'NPR'] + +def predict(image_path, detector_name): + if not image_path: + return {"error": "Please upload an image."} + + # Create a temporary output file path + output_path = "temp_result.json" + + # Mock args object + args = SimpleNamespace( + image=image_path, + detector=detector_name, + config_dir='configs', + output=output_path, + weights='pretrained', # Use default/pretrained + device='cpu', # Force CPU + dry_run=False, + verbose=False + ) + + try: + # Run detection + # We need to capture stdout/stderr or just trust the function + # run_detect might raise FileNotFoundError if weights are missing + run_detect(args) + + # Read results + if os.path.exists(output_path): + with open(output_path, 'r') as f: + result = json.load(f) + + # Format output + prediction = result.get('prediction', 'Unknown') + confidence = result.get('confidence', 0.0) + elapsed_time = result.get('elapsed_time', 0.0) + + return { + "Prediction": prediction, + "Confidence": f"{confidence:.4f}", + "Elapsed Time": f"{elapsed_time:.3f}s" + } + else: + return {"error": "No result file generated. Check console logs for details."} + + except FileNotFoundError as e: + return {"error": str(e), "message": f"Please ensure you have downloaded the weights for {detector_name}."} + except Exception as e: + return {"error": str(e)} + finally: + # Cleanup + if os.path.exists(output_path): + os.remove(output_path) + +# Create Gradio Interface +with gr.Blocks(title="Deepfake Detection", theme=gr.themes.Soft()) as demo: + gr.Markdown("# 🔍 Deepfake Detection Library") + gr.Markdown(""" + Upload an image and select a detector to check if it's real or fake. + + **Available Detectors:** + - **R50_TF**: ResNet-50 based detector + - **R50_nodown**: ResNet-50 without downsampling + - **CLIP-D**: CLIP-based detector + - **P2G**: Prompt2Guard detector + - **NPR**: Neural Posterior Regularization + """) + + with gr.Row(): + with gr.Column(): + image_input = gr.Image(type="filepath", label="Input Image", height=400) + detector_input = gr.Dropdown( + choices=DETECTORS, + value=DETECTORS[0], + label="Select Detector", + info="Choose which deepfake detection model to use" + ) + submit_btn = gr.Button("🔍 Detect", variant="primary") + + with gr.Column(): + output_json = gr.JSON(label="Detection Results") + + gr.Markdown(""" + --- + ### About + This Space provides access to multiple state-of-the-art deepfake detection models. + All models are trained on StyleGAN2, StableDiffusionXL, FFHQ, and FORLAB datasets. + + **Note:** First detection may be slower due to model loading. + """) + + submit_btn.click( + fn=predict, + inputs=[image_input, detector_input], + outputs=output_json + ) + +if __name__ == "__main__": + # For HF Spaces, share is automatically enabled + demo.launch() diff --git a/configs/CLIP-D.yaml b/configs/CLIP-D.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f54e0d5bfd64aeb55b3de61d2be665e00ab1e318 --- /dev/null +++ b/configs/CLIP-D.yaml @@ -0,0 +1,72 @@ +global: + dataset_path: + device_override: cpu # null for auto-selection + min_vram: 16000 + split_file: ./split.json + num_threads: 8 + dry_run: false + only_list: false + +detector_args: + - "--arch" + - "opencliplinearnext_clipL14commonpool" + - "--norm_type" + - "clip" + - "--resize_size" + - "200" + - "--resize_ratio" + - "1" + - "--resize_prob" + - "0.2" + - "--cmp_qual" + - "65,100" + - "--cmp_prob" + - "0.5" + - "--resizeSize" + - "224" + +training: + - data: gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre + +testing: + - realFFHQ:fb + - realFORLAB:fb + - gan1:fb + - gan2:fb + - gan3:fb + - sd15:fb + - sd2:fb + - sd3:fb + - sdXL:fb + - flux:fb + - realFFHQ:tl + - realFORLAB:tl + - gan1:tl + - gan2:tl + - gan3:tl + - sd15:tl + - sd2:tl + - sd3:tl + - sdXL:tl + - flux:tl + - realFFHQ:tw + - realFORLAB:tw + - gan1:tw + - gan2:tw + - gan3:tw + - sd15:tw + - sd2:tw + - sd3:tw + - sdXL:tw + - flux:tw + - realFFHQ:pre + - realFORLAB:pre + - gan1:pre + - gan2:pre + - gan3:pre + - sd15:pre + - sd2:pre + - sd3:pre + - sdXL:pre + - flux:pre + diff --git a/configs/NPR.yaml b/configs/NPR.yaml new file mode 100644 index 0000000000000000000000000000000000000000..154ddfd27f73991a9e24a503889b7931a274feaa --- /dev/null +++ b/configs/NPR.yaml @@ -0,0 +1,56 @@ +global: + dataset_path: + device_override: cpu # null for auto-selection + min_vram: 16000 + split_file: ./split.json + num_threads: 8 + dry_run: false + only_list: false + +detector_args: [] + +training: + - data: gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre + +testing: + - realFFHQ:fb + - realFORLAB:fb + - gan1:fb + - gan2:fb + - gan3:fb + - sd15:fb + - sd2:fb + - sd3:fb + - sdXL:fb + - flux:fb + - realFFHQ:tl + - realFORLAB:tl + - gan1:tl + - gan2:tl + - gan3:tl + - sd15:tl + - sd2:tl + - sd3:tl + - sdXL:tl + - flux:tl + - realFFHQ:tw + - realFORLAB:tw + - gan1:tw + - gan2:tw + - gan3:tw + - sd15:tw + - sd2:tw + - sd3:tw + - sdXL:tw + - flux:tw + - realFFHQ:pre + - realFORLAB:pre + - gan1:pre + - gan2:pre + - gan3:pre + - sd15:pre + - sd2:pre + - sd3:pre + - sdXL:pre + - flux:pre + diff --git a/configs/P2G.yaml b/configs/P2G.yaml new file mode 100644 index 0000000000000000000000000000000000000000..154ddfd27f73991a9e24a503889b7931a274feaa --- /dev/null +++ b/configs/P2G.yaml @@ -0,0 +1,56 @@ +global: + dataset_path: + device_override: cpu # null for auto-selection + min_vram: 16000 + split_file: ./split.json + num_threads: 8 + dry_run: false + only_list: false + +detector_args: [] + +training: + - data: gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre + +testing: + - realFFHQ:fb + - realFORLAB:fb + - gan1:fb + - gan2:fb + - gan3:fb + - sd15:fb + - sd2:fb + - sd3:fb + - sdXL:fb + - flux:fb + - realFFHQ:tl + - realFORLAB:tl + - gan1:tl + - gan2:tl + - gan3:tl + - sd15:tl + - sd2:tl + - sd3:tl + - sdXL:tl + - flux:tl + - realFFHQ:tw + - realFORLAB:tw + - gan1:tw + - gan2:tw + - gan3:tw + - sd15:tw + - sd2:tw + - sd3:tw + - sdXL:tw + - flux:tw + - realFFHQ:pre + - realFORLAB:pre + - gan1:pre + - gan2:pre + - gan3:pre + - sd15:pre + - sd2:pre + - sd3:pre + - sdXL:pre + - flux:pre + diff --git a/configs/R50_TF.yaml b/configs/R50_TF.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6ea8fd987e2ce5886e49e30f9218884fe512d5d5 --- /dev/null +++ b/configs/R50_TF.yaml @@ -0,0 +1,61 @@ +global: + dataset_path: + device_override: cpu # null for auto-selection + min_vram: 16000 + split_file: ./split.json + num_threads: 8 + dry_run: false + only_list: false + + +detector_args: + - "--arch" + - "nodown" + - "--prototype" + - "--freeze" + +training: + - data: gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre + +testing: + - realFFHQ:fb + - realFORLAB:fb + - gan1:fb + - gan2:fb + - gan3:fb + - sd15:fb + - sd2:fb + - sd3:fb + - sdXL:fb + - flux:fb + - realFFHQ:tl + - realFORLAB:tl + - gan1:tl + - gan2:tl + - gan3:tl + - sd15:tl + - sd2:tl + - sd3:tl + - sdXL:tl + - flux:tl + - realFFHQ:tw + - realFORLAB:tw + - gan1:tw + - gan2:tw + - gan3:tw + - sd15:tw + - sd2:tw + - sd3:tw + - sdXL:tw + - flux:tw + - realFFHQ:pre + - realFORLAB:pre + - gan1:pre + - gan2:pre + - gan3:pre + - sd15:pre + - sd2:pre + - sd3:pre + - sdXL:pre + - flux:pre + diff --git a/configs/R50_nodown.yaml b/configs/R50_nodown.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eef97041c377b28ee6354a323584d710e379c3d5 --- /dev/null +++ b/configs/R50_nodown.yaml @@ -0,0 +1,84 @@ +global: + dataset_path: + device_override: cpu # null for auto-selection + min_vram: 16000 + split_file: ./split.json + num_threads: 8 + dry_run: false + only_list: false + +detector_args: + - "--arch" + - "res50nodown" + - "--norm_type" + - "resnet" + - "--resize_size" + - "256" + - "--resize_ratio" + - "0.75" + - "--resize_prob" + - "0.2" + - "--cmp_qual" + - "30,100" + - "--cmp_prob" + - "0.5" + - "--cropSize" + - "96" + - "--blur_sig" + - "0.1,3.0" + - "--blur_prob" + - "0.5" + - "--jitter_prob" + - "0.8" + - "--colordist_prob" + - "0.2" + - "--cutout_prob" + - "0.2" + - "--noise_prob" + - "0.2" + +training: + - data: gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre + +testing: + - realFFHQ:fb + - realFORLAB:fb + - gan1:fb + - gan2:fb + - gan3:fb + - sd15:fb + - sd2:fb + - sd3:fb + - sdXL:fb + - flux:fb + - realFFHQ:tl + - realFORLAB:tl + - gan1:tl + - gan2:tl + - gan3:tl + - sd15:tl + - sd2:tl + - sd3:tl + - sdXL:tl + - flux:tl + - realFFHQ:tw + - realFORLAB:tw + - gan1:tw + - gan2:tw + - gan3:tw + - sd15:tw + - sd2:tw + - sd3:tw + - sdXL:tw + - flux:tw + - realFFHQ:pre + - realFORLAB:pre + - gan1:pre + - gan2:pre + - gan3:pre + - sd15:pre + - sd2:pre + - sd3:pre + - sdXL:pre + - flux:pre + diff --git a/demo_images/README.md b/demo_images/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6fc49583bb764ef80751d956330fc749409a43ec --- /dev/null +++ b/demo_images/README.md @@ -0,0 +1,21 @@ +# Download Demo Dataset + +You can download of demo dataset from this [link](https://drive.google.com/file/d/134Bw8l9tEC7oZJpTAeMO80QRqgdJfJS9/view?usp=sharing). The demo dataset contains 200 images randomly samples from the DeepShield Dataset. \ + +Place sample images for quick testing in `demo_images/`, organized by platform and label: + +``` + demo_images/ + Facebook/ + Fake/ + Real/ + PreSocial/ + Fake/ + Real/ + Telegram/ + Fake/ + Real/ + X/ + Fake/ + Real/ +``` diff --git a/detectors/.DS_Store b/detectors/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..c616f44252aaa88e305d6db4c2b359ae7fcfe8e6 Binary files /dev/null and b/detectors/.DS_Store differ diff --git a/detectors/.gitattributes b/detectors/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..ed09e9e9de9df18ee882602fbd8f0fd7d4accf02 --- /dev/null +++ b/detectors/.gitattributes @@ -0,0 +1 @@ +*.pkl filter=lfs diff=lfs merge=lfs -text diff --git a/detectors/CLIP-D/.DS_Store b/detectors/CLIP-D/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..ba2ba54552856738e8ff38b7b088dae571278631 Binary files /dev/null and b/detectors/CLIP-D/.DS_Store differ diff --git a/detectors/CLIP-D/LICENSE b/detectors/CLIP-D/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..d645695673349e3947e8e5ae42332d0ac3164cd7 --- /dev/null +++ b/detectors/CLIP-D/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/detectors/CLIP-D/README.md b/detectors/CLIP-D/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c7f2b24fb24d5cf851ca9757deabbca9668be0a6 --- /dev/null +++ b/detectors/CLIP-D/README.md @@ -0,0 +1,29 @@ +# ClipBased-SyntheticImageDetection + +[![Official Github Repo](https://img.shields.io/badge/Github%20page-222222.svg?style=for-the-badge&logo=github)](https://grip-unina.github.io/ClipBased-SyntheticImageDetection/) +[![Paper](https://img.shields.io/badge/-arXiv-B31B1B.svg?style=for-the-badge)](https://arxiv.org/abs/2312.00195v2) +[![GRIP Research Group Website](https://img.shields.io/badge/-GRIP-0888ef.svg?style=for-the-badge)](https://www.grip.unina.it) + +Original Paper: +[Raising the Bar of AI-generated Image Detection with CLIP](https://arxiv.org/abs/2312.00195v2). + +Authors: Davide Cozzolino, Giovanni Poggi, Riccardo Corvi, Matthias Nießner, and Luisa Verdoliva. + +## Abstract + +The aim of this work is to explore the potential of pre-trained vision-language models (VLMs) for universal detection of AI-generated images. We develop a lightweight detection strategy based on CLIP features and study its performance in a wide variety of challenging scenarios. We find that, contrary to previous beliefs, it is neither necessary nor convenient to use a large domain-specific dataset for training. On the contrary, by using only a handful of example images from a single generative model, a CLIP-based detector exhibits surprising generalization ability and high robustness across different architectures, including recent commercial tools such as Dalle-3, Midjourney v5, and Firefly. We match the state-of-the-art (SoTA) on in-distribution data and significantly improve upon it in terms of generalization to out-of-distribution data (+6% AUC) and robustness to impaired/laundered data (+13%). + +## Please Cite + +``` +@inproceedings{cozzolino2023raising, + author={Davide Cozzolino and Giovanni Poggi and + Riccardo Corvi and Matthias Nießner and Luisa + Verdoliva}, + title={{Raising the Bar of AI-generated Image + Detection with CLIP}}, + booktitle={IEEE/CVF Conference on Computer Vision + and Pattern Recognition Workshops (CVPRW)}, + year={2024}, +} +``` \ No newline at end of file diff --git a/detectors/CLIP-D/__pycache__/parser.cpython-310.pyc b/detectors/CLIP-D/__pycache__/parser.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0ed86c21aad9c989282c84884489b0f1e77bf269 Binary files /dev/null and b/detectors/CLIP-D/__pycache__/parser.cpython-310.pyc differ diff --git a/detectors/CLIP-D/checkpoint/.DS_Store b/detectors/CLIP-D/checkpoint/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..d48b163ea33eab6a41d80f5dcd117dd784a52cd2 Binary files /dev/null and b/detectors/CLIP-D/checkpoint/.DS_Store differ diff --git a/detectors/CLIP-D/checkpoint/pretrained/.DS_Store b/detectors/CLIP-D/checkpoint/pretrained/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..f47c3dab2fe94113811bb1af58fd11660e56024a Binary files /dev/null and b/detectors/CLIP-D/checkpoint/pretrained/.DS_Store differ diff --git a/detectors/CLIP-D/checkpoint/pretrained/weights/best.pt b/detectors/CLIP-D/checkpoint/pretrained/weights/best.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa5119050fdd73e792f3de6b2e202558c7fd8e43 --- /dev/null +++ b/detectors/CLIP-D/checkpoint/pretrained/weights/best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34315ca92adbf17921f5aa1e28f8fe5bf7d56dd8126205c9e264cbfb26582d12 +size 15452 diff --git a/detectors/CLIP-D/detect.py b/detectors/CLIP-D/detect.py new file mode 100644 index 0000000000000000000000000000000000000000..7cc85da514bfcdbee4616fd3d9de7366276df6b1 --- /dev/null +++ b/detectors/CLIP-D/detect.py @@ -0,0 +1,109 @@ +# ---------------------------------------------------------------------------- +# IMPORTS +# ---------------------------------------------------------------------------- +import os +import sys +import time +import torch +import argparse +from PIL import Image +import torchvision.transforms as transforms + +# Add project root to path for imports +project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +sys.path.append(project_root) +from support.detect_utils import format_result, save_result, get_device + +from networks import create_architecture + +# ---------------------------------------------------------------------------- +# IMAGE PREPROCESSING +# ---------------------------------------------------------------------------- +def preprocess_image(image_path, size=224): + """Load and preprocess a single image for model input.""" + if not os.path.exists(image_path): + raise FileNotFoundError(f"Image not found: {image_path}") + + image = Image.open(image_path).convert('RGB') + transform = transforms.Compose([ + transforms.Resize(size), + transforms.CenterCrop(size), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + return transform(image).unsqueeze(0) # Add batch dimension + +# ---------------------------------------------------------------------------- +# ARGUMENT PARSING +# ---------------------------------------------------------------------------- +def parse_args(): + parser = argparse.ArgumentParser(description='CLIP-D single image detector') + parser.add_argument('--image', type=str, required=True, help='Path to input image') + parser.add_argument('--model', type=str, default='pretrained', help='Name of the model checkpoint directory') + parser.add_argument('--output', type=str, help='Path to save detection result JSON') + parser.add_argument('--device', type=str, help='Device to run on (e.g., cuda:0, cuda:1, cpu)') + return parser.parse_args() + +def main(): + args = parse_args() + + # Setup device + device = torch.device(args.device) if args.device else get_device() + + # Load model + try: + load_path = f'./detectors/CLIP-D/checkpoint/{args.model}/weights/best.pt' + + if not os.path.exists(load_path): + raise FileNotFoundError(f"Model weights not found at: {load_path}") + + checkpoint = torch.load(load_path, map_location=device) + # Initialize model and load state + model = create_architecture("opencliplinearnext_clipL14commonpool", pretrained=False, num_classes=1).to(device) + if 'model' in checkpoint: + model.load_state_dict(checkpoint['model']) + else: + model.load_state_dict(checkpoint) + model.eval() + except Exception as e: + print(f"Error loading model: {e}") + return + + # Load and preprocess image + try: + image_tensor = preprocess_image(args.image) + image_tensor = image_tensor.to(device) + except Exception as e: + print(f"Error loading image: {e}") + return + + # Run detection + start_time = time.time() + with torch.no_grad(): + try: + score = model(image_tensor) + prediction = torch.sigmoid(score) + + confidence = prediction.item() + + result = format_result( + 'fake' if confidence>0.5 else 'real', + confidence, + time.time() - start_time + ) + + # Print result + print(f"Prediction: {result['prediction']}") + print(f"Confidence: {result['confidence']:.4f}") + print(f"Time: {result['elapsed_time']:.3f}s") + + # Save result if output path provided + if args.output: + save_result(result, args.output) + + except Exception as e: + print(f"Error during detection: {e}") + return + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/detectors/CLIP-D/networks/__init__.py b/detectors/CLIP-D/networks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0593d6110daeb9b04f558800a13109f045740519 --- /dev/null +++ b/detectors/CLIP-D/networks/__init__.py @@ -0,0 +1,70 @@ +''' +Copyright 2024 Image Processing Research Group of University Federico +II of Naples ('GRIP-UNINA'). All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + +def create_architecture(name_arch, pretrained=False, num_classes=1): + if name_arch == "res50nodown": + from .resnet_mod import resnet50 + + if pretrained: + model = resnet50(pretrained=True, stride0=1, dropout=0.5).change_output(num_classes) + else: + model = resnet50(num_classes=num_classes, stride0=1, dropout=0.5) + elif name_arch == "res50": + from .resnet_mod import resnet50 + + if pretrained: + model = resnet50(pretrained=True, stride0=2).change_output(num_classes) + else: + model = resnet50(num_classes=num_classes, stride0=2) + elif name_arch.startswith('opencliplinear_'): + from .openclipnet import OpenClipLinear + model = OpenClipLinear(num_classes=num_classes, pretrain=name_arch[15:], normalize=True) + elif name_arch.startswith('opencliplinearnext_'): + from .openclipnet import OpenClipLinear + model = OpenClipLinear(num_classes=num_classes, pretrain=name_arch[19:], normalize=True, next_to_last=True) + else: + assert False + return model + +def count_parameters(model): + return sum(p.numel() for p in model.parameters() if p.requires_grad) + +def load_weights(model, model_path): + from torch import load + dat = load(model_path, map_location='cpu') + if 'model' in dat: + if ('module._conv_stem.weight' in dat['model']) or \ + ('module.fc.fc1.weight' in dat['model']) or \ + ('module.fc.weight' in dat['model']): + model.load_state_dict( + {key[7:]: dat['model'][key] for key in dat['model']}) + else: + model.load_state_dict(dat['model']) + elif 'state_dict' in dat: + model.load_state_dict(dat['state_dict']) + elif 'net' in dat: + model.load_state_dict(dat['net']) + elif 'main.0.weight' in dat: + model.load_state_dict(dat) + elif '_fc.weight' in dat: + model.load_state_dict(dat) + elif 'conv1.weight' in dat: + model.load_state_dict(dat) + else: + print(list(dat.keys())) + assert False + return model diff --git a/detectors/CLIP-D/networks/__pycache__/__init__.cpython-310.pyc b/detectors/CLIP-D/networks/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3ad7b8260231eba512d1004ec39dce5453127830 Binary files /dev/null and b/detectors/CLIP-D/networks/__pycache__/__init__.cpython-310.pyc differ diff --git a/detectors/CLIP-D/networks/__pycache__/openclipnet.cpython-310.pyc b/detectors/CLIP-D/networks/__pycache__/openclipnet.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c1f95c01470d5e95b8575fdafd4ca22b31049bcc Binary files /dev/null and b/detectors/CLIP-D/networks/__pycache__/openclipnet.cpython-310.pyc differ diff --git a/detectors/CLIP-D/networks/__pycache__/resnet_mod.cpython-310.pyc b/detectors/CLIP-D/networks/__pycache__/resnet_mod.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..772e7bb738f3512969584bee09f2d4ee7c01e6e5 Binary files /dev/null and b/detectors/CLIP-D/networks/__pycache__/resnet_mod.cpython-310.pyc differ diff --git a/detectors/CLIP-D/networks/openclipnet.py b/detectors/CLIP-D/networks/openclipnet.py new file mode 100644 index 0000000000000000000000000000000000000000..f1b13ca0825305382e2b5a55694dbe80f36aad7e --- /dev/null +++ b/detectors/CLIP-D/networks/openclipnet.py @@ -0,0 +1,85 @@ +''' +Copyright 2024 Image Processing Research Group of University Federico +II of Naples ('GRIP-UNINA'). All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + +import torch +import torch.nn as nn +import torch.nn.functional as F +import open_clip +from .resnet_mod import ChannelLinear + +dict_pretrain = { + 'clipL14openai' : ('ViT-L-14', 'openai'), + 'clipL14laion400m' : ('ViT-L-14', 'laion400m_e32'), + 'clipL14laion2B' : ('ViT-L-14', 'laion2b_s32b_b82k'), + 'clipL14datacomp' : ('ViT-L-14', 'laion/CLIP-ViT-L-14-DataComp.XL-s13B-b90K', 'open_clip_pytorch_model.bin'), + 'clipL14commonpool' : ('ViT-L-14', "laion/CLIP-ViT-L-14-CommonPool.XL-s13B-b90K", 'open_clip_pytorch_model.bin'), + 'clipaL14datacomp' : ('ViT-L-14-CLIPA', 'datacomp1b'), + 'cocaL14laion2B' : ('coca_ViT-L-14', 'laion2b_s13b_b90k'), + 'clipg14laion2B' : ('ViT-g-14', 'laion2b_s34b_b88k'), + 'eva2L14merged2b' : ('EVA02-L-14', 'merged2b_s4b_b131k'), + 'clipB16laion2B' : ('ViT-B-16', 'laion2b_s34b_b88k'), +} + + +class OpenClipLinear(nn.Module): + def __init__(self, num_classes=1, pretrain='clipL14commonpool', normalize=True, next_to_last=False): + super(OpenClipLinear, self).__init__() + + # Modified to handle download failures gracefully + # The checkpoint only contains fc weights, so we need the pretrained backbone + if len(dict_pretrain[pretrain])==2: + try: + backbone = open_clip.create_model(dict_pretrain[pretrain][0], pretrained=dict_pretrain[pretrain][1]) + except Exception as e: + print(f"WARNING: Could not download pretrained weights ({e}). Using random initialization.") + backbone = open_clip.create_model(dict_pretrain[pretrain][0], pretrained=None) + else: + try: + from huggingface_hub import hf_hub_download + backbone = open_clip.create_model(dict_pretrain[pretrain][0], pretrained=hf_hub_download(*dict_pretrain[pretrain][1:])) + except Exception as e: + print(f"WARNING: Could not download pretrained weights ({e}). Using random initialization.") + backbone = open_clip.create_model(dict_pretrain[pretrain][0], pretrained=None) + + if next_to_last: + self.num_features = backbone.visual.proj.shape[0] + backbone.visual.proj = None + else: + self.num_features = backbone.visual.output_dim + + self.bb = [backbone, ] + self.normalize = normalize + + self.fc = ChannelLinear(self.num_features, num_classes) + torch.nn.init.normal_(self.fc.weight.data, 0.0, 0.02) + + def to(self, *args, **kwargs): + self.bb[0].to(*args, **kwargs) + super(OpenClipLinear, self).to(*args, **kwargs) + return self + + def forward_features(self, x): + with torch.no_grad(): + self.bb[0].eval() + features = self.bb[0].encode_image(x, normalize=self.normalize) + return features + + def forward_head(self, x): + return self.fc(x) + + def forward(self, x): + return self.forward_head(self.forward_features(x)) diff --git a/detectors/CLIP-D/networks/resnet_mod.py b/detectors/CLIP-D/networks/resnet_mod.py new file mode 100644 index 0000000000000000000000000000000000000000..8b71c0108c985f58548f40ef1eb680c99fd1908d --- /dev/null +++ b/detectors/CLIP-D/networks/resnet_mod.py @@ -0,0 +1,335 @@ +''' +Copyright 2024 Image Processing Research Group of University Federico +II of Naples ('GRIP-UNINA'). All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + +import torch +import torch.nn as nn +import torch.utils.model_zoo as model_zoo + +__all__ = ["ResNet", "resnet18", "resnet34", "resnet50", "resnet101", "resnet152"] + + +model_urls = { + "resnet18": "https://download.pytorch.org/models/resnet18-5c106cde.pth", + "resnet34": "https://download.pytorch.org/models/resnet34-333f7ec4.pth", + "resnet50": "https://download.pytorch.org/models/resnet50-19c8e357.pth", + "resnet101": "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth", + "resnet152": "https://download.pytorch.org/models/resnet152-b121ed2d.pth", +} + +class ChannelLinear(nn.Linear): + def __init__( + self, in_features: int, out_features: int, bias: bool = True, pool=None + ) -> None: + super(ChannelLinear, self).__init__(in_features, out_features, bias) + self.compute_axis = 1 + self.pool = pool + + def forward(self, x): + axis_ref = len(x.shape) - 1 + x = torch.transpose(x, self.compute_axis, axis_ref) + out_shape = list(x.shape) + out_shape[-1] = self.out_features + x = x.reshape(-1, x.shape[-1]) + x = x.matmul(self.weight.t()) + if self.bias is not None: + x = x + self.bias[None, :] + x = torch.transpose(x.view(out_shape), axis_ref, self.compute_axis) + if self.pool is not None: + x = self.pool(x) + return x + + +def conv3x3(in_planes, out_planes, stride=1, padding=1): + """3x3 convolution with padding""" + return nn.Conv2d( + in_planes, out_planes, kernel_size=3, stride=stride, padding=padding, bias=False + ) + + +def conv1x1(in_planes, out_planes, stride=1): + """1x1 convolution""" + return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, padding=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride, padding=padding) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes, padding=padding) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + self.padding = padding + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.padding == 0: + identity = identity[..., 1:-1, 1:-1] + if self.downsample is not None: + identity = self.downsample(identity) + if self.padding == 0: + identity = identity[..., 1:-1, 1:-1] + + out += identity + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, padding=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = conv1x1(inplanes, planes) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = conv3x3(planes, planes, stride, padding=padding) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = conv1x1(planes, planes * self.expansion) + self.bn3 = nn.BatchNorm2d(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + self.padding = padding + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.padding == 0: + identity = identity[..., 1:-1, 1:-1] + if self.downsample is not None: + identity = self.downsample(identity) + + out += identity + out = self.relu(out) + + return out + +class ResNet(nn.Module): + def __init__( + self, + block, + layers, + num_classes=1000, + zero_init_residual=False, + stride0=2, + padding=1, + dropout=0.0, + gap_size=None, + ): + super(ResNet, self).__init__() + self.inplanes = 64 + + self.conv1 = nn.Conv2d( + 3, 64, kernel_size=7, stride=stride0, padding=3 * padding, bias=False + ) + self.bn1 = nn.BatchNorm2d(64) + if dropout > 0: + self.dropout = nn.Dropout(dropout) + else: + self.dropout = None + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=stride0, padding=padding) + self.layer1 = self._make_layer(block, 64, layers[0], padding=padding) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2, padding=padding) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2, padding=padding) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2, padding=padding) + + if gap_size is None: + self.gap_size = None + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + elif gap_size < 0: + with torch.no_grad(): + y = self.forward_features( + torch.zeros((1, 3, -gap_size, -gap_size), dtype=torch.float32) + ).shape + print("gap_size:", -gap_size, ">>", y[-1]) + self.gap_size = y[-1] + self.avgpool = nn.AvgPool2d(kernel_size=self.gap_size, stride=1, padding=0) + elif gap_size == 1: + self.gap_size = gap_size + self.avgpool = None + else: + self.gap_size = gap_size + self.avgpool = nn.AvgPool2d(kernel_size=self.gap_size, stride=1, padding=0) + self.num_features = 512 * block.expansion + self.fc = ChannelLinear(self.num_features, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + if zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + nn.init.constant_(m.bn3.weight, 0) + elif isinstance(m, BasicBlock): + nn.init.constant_(m.bn2.weight, 0) + + def _make_layer(self, block, planes, blocks, stride=1, padding=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + conv1x1(self.inplanes, planes * block.expansion, stride), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append( + block( + self.inplanes, + planes, + stride=stride, + downsample=downsample, + padding=padding, + ) + ) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append(block(self.inplanes, planes, padding=padding)) + + return nn.Sequential(*layers) + + def change_output(self, num_classes): + self.fc = ChannelLinear(self.num_features, num_classes) + torch.nn.init.normal_(self.fc.weight.data, 0.0, 0.02) + return self + + def change_input(self, num_inputs): + data = self.conv1.weight.data + old_num_inputs = int(data.shape[1]) + if num_inputs > old_num_inputs: + times = num_inputs // old_num_inputs + if (times * old_num_inputs) < num_inputs: + times = times + 1 + data = data.repeat(1, times, 1, 1) / times + elif num_inputs == old_num_inputs: + return self + + data = data[:, :num_inputs, :, :] + print(self.conv1.weight.data.shape, "->", data.shape) + self.conv1.weight.data = data + + return self + + def forward_features(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + return x + + def forward_head(self, x): + if self.avgpool is not None: + x = self.avgpool(x) + if self.dropout is not None: + x = self.dropout(x) + y = self.fc(x) + if self.gap_size is None: + y = torch.squeeze(torch.squeeze(y, -1), -1) + return y + + def forward(self, x): + x = self.forward_features(x) + x = self.forward_head(x) + return x + + +def resnet18(pretrained=False, **kwargs): + """Constructs a ResNet-18 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls["resnet18"])) + return model + + +def resnet34(pretrained=False, **kwargs): + """Constructs a ResNet-34 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls["resnet34"])) + return model + + +def resnet50(pretrained=False, **kwargs): + """Constructs a ResNet-50 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls["resnet50"])) + return model + + +def resnet101(pretrained=False, **kwargs): + """Constructs a ResNet-101 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls["resnet101"])) + return model + + +def resnet152(pretrained=False, **kwargs): + """Constructs a ResNet-152 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls["resnet152"])) + return model diff --git a/detectors/CLIP-D/parser.py b/detectors/CLIP-D/parser.py new file mode 100644 index 0000000000000000000000000000000000000000..94a514f1c98d2fe9247d5787ab9cb40bf2fec978 --- /dev/null +++ b/detectors/CLIP-D/parser.py @@ -0,0 +1,25 @@ +import argparse + +def get_parser(): + parser = argparse.ArgumentParser() + parser.add_argument("--name", type=str, default="test", help="run name") + parser.add_argument("--arch", type=str, default="opencliplinearnext_clipL14commonpresool", help="architecture name") + + parser.add_argument("--task", type=str, help="Task: train/test") + parser.add_argument("--device", type=str, default="cuda:0", help="cuda device to use") + + parser.add_argument("--split_file", type=str, help="Path to split json") + parser.add_argument("--data_root", type=str, help="Path to dataset") + parser.add_argument("--data_keys", type=str, help="Dataset specifications") + + parser.add_argument("--batch_size", type=int, default=64, help='Dataloader batch size') + parser.add_argument("--num_threads", type=int, default=14, help='# threads for loading data') + + parser.add_argument("--lr", type=float, default=0.0001, help="initial learning rate") + parser.add_argument("--weight_decay", type=float, default=0.0, help="weight decay") + parser.add_argument("--beta1", type=float, default=0.9, help="momentum term of adam") + + parser.add_argument("--num_epoches", type=int, default=1000, help="# of epoches at starting learning rate") + parser.add_argument("--earlystop_epoch", type=int, default=5, help="Number of epochs without loss reduction before lowering the learning rate") + + return parser \ No newline at end of file diff --git a/detectors/CLIP-D/test.py b/detectors/CLIP-D/test.py new file mode 100644 index 0000000000000000000000000000000000000000..77d989aaeaf9d7d2bbfcb6ae510451466cf01ca5 --- /dev/null +++ b/detectors/CLIP-D/test.py @@ -0,0 +1,167 @@ +import os +from tqdm import tqdm +import torch +import pandas as pd +import json +import time +import numpy as np +from sklearn.metrics import roc_auc_score, accuracy_score +from networks import create_architecture, count_parameters +from utils.dataset import create_dataloader +from utils.processing import add_processing_arguments +from parser import get_parser + +def test(loader, model, settings, device): + model.eval() + + start_time = time.time() + + # File paths + output_dir = f'./results/{settings.name}/data/{settings.data_keys}' + os.makedirs(output_dir, exist_ok=True) + + csv_filename = os.path.join(output_dir, 'results.csv') + metrics_filename = os.path.join(output_dir, 'metrics.json') + image_results_filename = os.path.join(output_dir, 'image_results.json') + + # Collect all results + all_scores = [] + all_labels = [] + all_paths = [] + image_results = [] + + # Extract training dataset keys from model name (format: "training_keys_freeze_down" or "training_keys") + training_dataset_keys = [] + model_name = settings.name + if '_freeze_down' in model_name: + training_name = model_name.replace('_freeze_down', '') + else: + training_name = model_name + if '&' in training_name: + training_dataset_keys = training_name.split('&') + else: + training_dataset_keys = [training_name] + + # Write CSV header + with open(csv_filename, 'w') as f: + f.write(f"{','.join(['name', 'pro', 'flag'])}\n") + + with torch.no_grad(): + with tqdm(loader, unit='batch', mininterval=0.5) as tbatch: + tbatch.set_description(f'Validation') + for data_dict in tbatch: + data = data_dict['img'].to(device) + labels = data_dict['target'].to(device) + paths = data_dict['path'] + + scores = model(data).squeeze(1) + + # Collect results + for score, label, path in zip(scores, labels, paths): + score_val = score.item() + label_val = label.item() + + all_scores.append(score_val) + all_labels.append(label_val) + all_paths.append(path) + + image_results.append({ + 'path': path, + 'score': score_val, + 'label': label_val + }) + + # Write to CSV (maintain backward compatibility) + with open(csv_filename, 'a') as f: + for score, label, path in zip(scores, labels, paths): + f.write(f"{path}, {score.item()}, {label.item()}\n") + + # Calculate metrics + all_scores = np.array(all_scores) + all_labels = np.array(all_labels) + + # Convert scores to predictions (threshold at 0, as used in train.py: y_pred > 0.0) + predictions = (all_scores > 0).astype(int) + + # Calculate overall metrics + total_accuracy = accuracy_score(all_labels, predictions) + + # TPR (True Positive Rate) = TP / (TP + FN) = accuracy on fake images (label==1) + fake_mask = all_labels == 1 + if fake_mask.sum() > 0: + tpr = accuracy_score(all_labels[fake_mask], predictions[fake_mask]) + else: + tpr = 0.0 + + # TNR per dataset key (True Negative Rate) = TN / (TN + FP) = accuracy on real images (label==0) + tnr_per_dataset = {} + + # Calculate TNR on real images (label==0) in the test set + real_mask = all_labels == 0 + if real_mask.sum() > 0: + # Overall TNR calculated on all real images in the test set + tnr = accuracy_score(all_labels[real_mask], predictions[real_mask]) + else: + tnr = 0.0 + + # Map TNR to training dataset keys (as shown in the example JSON structure) + # The TNR is calculated on the test set, but organized by training dataset keys + #for training_key in training_dataset_keys: + # tnr_per_dataset[training_key] = overall_tnr + + # AUC calculation (needs probabilities, so we'll use sigmoid on scores) + if len(np.unique(all_labels)) > 1: # Need both classes for AUC + # Apply sigmoid to convert scores to probabilities + probabilities = torch.sigmoid(torch.tensor(all_scores)).numpy() + auc = roc_auc_score(all_labels, probabilities) + else: + auc = 0.0 + + execution_time = time.time() - start_time + + # Prepare metrics JSON + metrics = { + 'TPR': float(tpr), + 'TNR': float(tnr), + 'Acc total': float(total_accuracy), + 'AUC': float(auc), + 'execution time': float(execution_time) + } + + # Write metrics JSON + with open(metrics_filename, 'w') as f: + json.dump(metrics, f, indent=2) + + # Write individual image results JSON + with open(image_results_filename, 'w') as f: + json.dump(image_results, f, indent=2) + + print(f'\nMetrics saved to {metrics_filename}') + print(f'Image results saved to {image_results_filename}') + print(f'\nMetrics:') + print(f' TPR: {tpr:.4f}') + print(f' TNR: {tnr:.4f}') + print(f' Accuracy: {total_accuracy:.4f}') + print(f' AUC: {auc:.4f}') + print(f' Execution time: {execution_time:.2f} seconds') + +if __name__ == '__main__': + parser = get_parser() + parser = add_processing_arguments(parser) + settings = parser.parse_args() + + device = torch.device(settings.device if torch.cuda.is_available() else 'cpu') + + test_dataloader = create_dataloader(settings, split='test') + + model = create_architecture(settings.arch, pretrained=True, num_classes=1).to(device) + num_parameters = count_parameters(model) + print(f"Arch: {settings.arch} with #parameters {num_parameters}") + + load_path = f'./checkpoint/{settings.name}/weights/best.pt' + + print('loading the model from %s' % load_path) + model.load_state_dict(torch.load(load_path, map_location=device)['model']) + model.to(device) + + test(test_dataloader, model, settings, device) diff --git a/detectors/CLIP-D/train.py b/detectors/CLIP-D/train.py new file mode 100644 index 0000000000000000000000000000000000000000..3e7e7dba4a5aabe4e217f1a0b5f553443f80799f --- /dev/null +++ b/detectors/CLIP-D/train.py @@ -0,0 +1,65 @@ +import os +import tqdm +from utils import TrainingModel, create_dataloader, EarlyStopping +from sklearn.metrics import balanced_accuracy_score, roc_auc_score +from utils.processing import add_processing_arguments +from parser import get_parser + +if __name__ == "__main__": + parser = get_parser() + parser = add_processing_arguments(parser) + + opt = parser.parse_args() + + os.makedirs(os.path.join('checkpoint', opt.name,'weights'), exist_ok=True) + + valid_data_loader = create_dataloader(opt, split="val") + train_data_loader = create_dataloader(opt, split="train") + print() + print("# validation batches = %d" % len(valid_data_loader)) + print("# training batches = %d" % len(train_data_loader)) + model = TrainingModel(opt) + early_stopping = None + start_epoch = model.total_steps // len(train_data_loader) + print() + + for epoch in range(start_epoch, opt.num_epoches+1): + if epoch > start_epoch: + # Training + pbar = tqdm.tqdm(train_data_loader) + for data in pbar: + loss = model.train_on_batch(data).item() + total_steps = model.total_steps + pbar.set_description(f"Train loss: {loss:.4f}") + + # Save model + model.save_networks(epoch) + + # Validation + print("Validation ...", flush=True) + y_true, y_pred, y_path = model.predict(valid_data_loader) + acc = balanced_accuracy_score(y_true, y_pred > 0.0) + auc = roc_auc_score(y_true, y_pred) + lr = model.get_learning_rate() + print("After {} epoches: val acc = {}; val auc = {}".format(epoch, acc, auc), flush=True) + + # Early Stopping + if early_stopping is None: + early_stopping = EarlyStopping( + init_score=acc, patience=opt.earlystop_epoch, + delta=0.001, verbose=True, + ) + print('Save best model', flush=True) + model.save_networks('best') + else: + if early_stopping(acc): + print('Save best model', flush=True) + model.save_networks('best') + if early_stopping.early_stop: + cont_train = model.adjust_learning_rate() + if cont_train: + print("Learning rate dropped by 10, continue training ...", flush=True) + early_stopping.reset_counter() + else: + print("Early stopping.", flush=True) + break diff --git a/detectors/CLIP-D/utils/__init__.py b/detectors/CLIP-D/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3b1bde9fb6193c6cb36e3ebab15c7f81ccf3730d --- /dev/null +++ b/detectors/CLIP-D/utils/__init__.py @@ -0,0 +1,52 @@ +''' +Copyright 2024 Image Processing Research Group of University Federico +II of Naples ('GRIP-UNINA'). All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + +from .dataset import create_dataloader +from .training import TrainingModel +class EarlyStopping: + def __init__(self, init_score=None, patience=1, verbose=False, delta=0): + self.best_score = init_score + self.patience = patience + self.delta = delta + self.verbose = verbose + self.count_down = self.patience + self.early_stop = False + + def __call__(self, score): + if self.best_score is None: + if self.verbose: + print(f'Score set to {score:.6f}.') + self.best_score = score + self.count_down = self.patience + return True + elif score <= self.best_score + self.delta: + self.count_down -= 1 + if self.verbose: + print(f'EarlyStopping count_down: {self.count_down} on {self.patience}') + if self.count_down <= 0: + self.early_stop = True + return False + else: + if self.verbose: + print(f'Score increased from ({self.best_score:.6f} to {score:.6f}).') + self.best_score = score + self.count_down = self.patience + return True + + def reset_counter(self): + self.count_down = self.patience + self.early_stop = False diff --git a/detectors/CLIP-D/utils/__pycache__/__init__.cpython-310.pyc b/detectors/CLIP-D/utils/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3867ae427fd090a2e4038f9d31ebf4c94e3ebc32 Binary files /dev/null and b/detectors/CLIP-D/utils/__pycache__/__init__.cpython-310.pyc differ diff --git a/detectors/CLIP-D/utils/__pycache__/dataset.cpython-310.pyc b/detectors/CLIP-D/utils/__pycache__/dataset.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d0612836a5bc63fed605244079dc4b48ca1443e7 Binary files /dev/null and b/detectors/CLIP-D/utils/__pycache__/dataset.cpython-310.pyc differ diff --git a/detectors/CLIP-D/utils/__pycache__/processing.cpython-310.pyc b/detectors/CLIP-D/utils/__pycache__/processing.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..340eeab41435f3d78b65dc460fde821356d1298d Binary files /dev/null and b/detectors/CLIP-D/utils/__pycache__/processing.cpython-310.pyc differ diff --git a/detectors/CLIP-D/utils/__pycache__/training.cpython-310.pyc b/detectors/CLIP-D/utils/__pycache__/training.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..44bb4a8ba994a8ac4a158486540317239de56c76 Binary files /dev/null and b/detectors/CLIP-D/utils/__pycache__/training.cpython-310.pyc differ diff --git a/detectors/CLIP-D/utils/dataset.py b/detectors/CLIP-D/utils/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..fa65849e2c287d763bdee8ab6a40e77a75bccc77 --- /dev/null +++ b/detectors/CLIP-D/utils/dataset.py @@ -0,0 +1,144 @@ +''' +Copyright 2024 Image Processing Research Group of University Federico +II of Naples ('GRIP-UNINA'). All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + +import os +import json +import torch +import bisect +import numpy as np +from torch.utils.data.sampler import WeightedRandomSampler, RandomSampler +from torchvision import datasets +from .processing import make_processing + +from PIL import Image, ImageFile +ImageFile.LOAD_TRUNCATED_IMAGES = True + +def create_dataloader(opt, split=None): + if split == "train": + opt.split = 'train' + is_train=True + + elif split == "val": + opt.split = 'val' + is_train=False + + elif split == "test": + opt.split = 'test' + is_train=False + + else: + raise ValueError(f"Unknown split {split}") + + dataset = TrueFake_dataset(opt) + + data_loader = torch.utils.data.DataLoader( + dataset, + batch_size=opt.batch_size, + shuffle=is_train, + num_workers=int(opt.num_threads), + ) + return data_loader + +def parse_dataset(settings): + gen_keys = { + 'gan1':['StyleGAN'], + 'gan2':['StyleGAN2'], + 'gan3':['StyleGAN3'], + 'sd15':['StableDiffusion1.5'], + 'sd2':['StableDiffusion2'], + 'sd3':['StableDiffusion3'], + 'sdXL':['StableDiffusionXL'], + 'flux':['FLUX.1'], + 'realFFHQ':['FFHQ'], + 'realFORLAB':['FORLAB'] + } + + gen_keys['all'] = [gen_keys[key][0] for key in gen_keys.keys()] + # gen_keys['gan'] = [gen_keys[key][0] for key in gen_keys.keys() if 'gan' in key] + # gen_keys['sd'] = [gen_keys[key][0] for key in gen_keys.keys() if 'sd' in key] + gen_keys['real'] = [gen_keys[key][0] for key in gen_keys.keys() if 'real' in key] + + mod_keys = { + 'pre': ['PreSocial'], + 'fb': ['Facebook'], + 'tl': ['Telegram'], + 'tw': ['X'], + } + + mod_keys['all'] = [mod_keys[key][0] for key in mod_keys.keys()] + mod_keys['shr'] = [mod_keys[key][0] for key in mod_keys.keys() if key in ['fb', 'tl', 'tw']] + + need_real = (settings.split in ['train', 'val'] and not len([data for data in settings.data_keys.split('&') if 'real' in data.split(':')[0]])) + + assert not need_real, 'Train task without real data, this will not get handeled automatically, terminating' + + dataset_list = [] + for data in settings.data_keys.split('&'): + gen, mod = data.split(':') + dataset_list.append({'gen':gen_keys[gen], 'mod':mod_keys[mod]}) + + return dataset_list + +class TrueFake_dataset(datasets.DatasetFolder): + def __init__(self, settings): + self.data_root = settings.data_root + self.split = settings.split + + with open(settings.split_file, "r") as f: + split_list = sorted(json.load(f)[self.split]) + + dataset_list = parse_dataset(settings) + + self.samples = [] + self.info = [] + for dict in dataset_list: + generators = dict['gen'] + modifiers = dict['mod'] + + for mod in modifiers: + for dataset_root, dataset_dirs, dataset_files in os.walk(os.path.join(self.data_root, mod), topdown=True, followlinks=True): + if len(dataset_dirs): + continue + (label, gen, sub) = f'{dataset_root}/'.replace(os.path.join(self.data_root, mod) + os.sep, '').split(os.sep)[:3][:3] + + if gen in generators: + for filename in sorted(dataset_files): + if os.path.splitext(filename)[1].lower() in ['.png', '.jpg', '.jpeg']: + if self._in_list(split_list, os.path.join(gen, sub, os.path.splitext(filename)[0])): + self.samples.append(os.path.join(dataset_root, filename)) + self.info.append((mod, label, gen, sub)) + + self.transform = make_processing(settings) + print(self.transform) + + def _in_list(self, split, elem): + i = bisect.bisect_left(split, elem) + return i != len(split) and split[i] == elem + + def __len__(self): + return len(self.samples) + + def __getitem__(self, index): + path = self.samples[index] + mod, label, gen, sub = self.info[index] + + sample = Image.open(path).convert('RGB') + sample = self.transform(sample) + + target = 1.0 if label == 'Fake' else 0.0 + + return {'img':sample, 'target':target, 'path':path} \ No newline at end of file diff --git a/detectors/CLIP-D/utils/processing.py b/detectors/CLIP-D/utils/processing.py new file mode 100644 index 0000000000000000000000000000000000000000..2587bf40dfab42bdf16b66f9e5473efa8f11924a --- /dev/null +++ b/detectors/CLIP-D/utils/processing.py @@ -0,0 +1,133 @@ +''' +Copyright 2024 Image Processing Research Group of University Federico +II of Naples ('GRIP-UNINA'). All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' +import torchvision.transforms.v2 as Tv2 + +def make_processing(opt): + opt = parse_arguments(opt) + transforms_list = list() # list of transforms + + if opt.task == 'train': + transforms_aug = make_aug(opt) # make data-augmentation transforms + if transforms_aug is not None: + transforms_list.append(transforms_aug) + + transforms_post = make_post(opt) # make post-data-augmentation transforms + if transforms_post is not None: + transforms_list.append(transforms_post) + + transforms_list.append(make_normalize(opt)) # make normalization + + t = Tv2.Compose(transforms_list) + + return t + + +def add_processing_arguments(parser): + # parser is an argparse.ArgumentParser + # + # ICASSP2023: --cropSize 96 --loadSize -1 --resizeSize -1 --norm_type resnet --resize_prob 0.2 --jitter_prob 0.8 --colordist_prob 0.2 --cutout_prob 0.2 --noise_prob 0.2 --blur_prob 0.5 --cmp_prob 0.5 --rot90_prob 1.0 --hpf_prob 0.0 --blur_sig 0.0,3.0 --cmp_method cv2,pil --cmp_qual 30,100 --resize_size 256 --resize_ratio 0.75 + # ICME2021 : --cropSize 96 --loadSize -1 --resizeSize -1 --norm_type resnet --resize_prob 0.0 --jitter_prob 0.0 --colordist_prob 0.0 --cutout_prob 0.0 --noise_prob 0.0 --blur_prob 0.5 --cmp_prob 0.5 --rot90_prob 1.0 --hpf_prob 0.0 --blur_sig 0.0,3.0 --cmp_method cv2,pil --cmp_qual 30,100 + # + + parser.add_argument("--resizeSize", type=int, default=224, help="scale images to this size post augumentation") + + # data-augmentation probabilities + parser.add_argument("--resize_prob", type=float, default=0.0) + parser.add_argument("--cmp_prob", type=float, default=0.0) + + # data-augmentation parameters + parser.add_argument("--cmp_qual", default="75") + parser.add_argument("--resize_size", type=int, default=256) + parser.add_argument("--resize_ratio", type=float, default=1.0) + + # other + parser.add_argument("--norm_type", type=str, default="clip") + + return parser + + +def parse_arguments(opt): + if not isinstance(opt.cmp_qual, list): + opt.cmp_qual = [int(s) for s in opt.cmp_qual.split(",")] + return opt + + +def make_post(opt): + transforms_list = list() + if opt.resizeSize > 0: + print("\nUsing Post Resizing\n") + transforms_list.append(Tv2.Resize(opt.resizeSize, interpolation=Tv2.InterpolationMode.BICUBIC)) + transforms_list.append(Tv2.CenterCrop((opt.resizeSize, opt.resizeSize))) + + if len(transforms_list) == 0: + return None + else: + return Tv2.Compose(transforms_list) + + +def make_aug(opt): + # AUG + transforms_list_aug = list() + + if (opt.resize_size > 0) and (opt.resize_prob > 0): # opt.resized_ratio + transforms_list_aug.append( + Tv2.RandomApply( + [ + Tv2.RandomResizedCrop( + size=opt.resize_size, + scale=(5/8, 1.0), + ratio=(opt.resize_ratio, 1.0 / opt.resize_ratio), + ) + ], + opt.resize_prob, + ) + ) + + if opt.cmp_prob > 0: + transforms_list_aug.append( + Tv2.RandomApply( + [ + Tv2.JPEG( + opt.cmp_qual + ) + ], + opt.cmp_prob, + ) + ) + + if len(transforms_list_aug) > 0: + return Tv2.Compose(transforms_list_aug) + else: + return None + + +def make_normalize(opt): + transforms_list = list() + + if opt.norm_type == "clip": + print("normalize CLIP") + transforms_list.append(Tv2.ToTensor()) + transforms_list.append( + Tv2.Normalize( + mean=(0.48145466, 0.4578275, 0.40821073), + std=(0.26862954, 0.26130258, 0.27577711), + ) + ) + else: + assert False + + return Tv2.Compose(transforms_list) diff --git a/detectors/CLIP-D/utils/training.py b/detectors/CLIP-D/utils/training.py new file mode 100644 index 0000000000000000000000000000000000000000..72cc5466fbdb80fad744707c74f8bc35f676d80a --- /dev/null +++ b/detectors/CLIP-D/utils/training.py @@ -0,0 +1,105 @@ +''' +Copyright 2024 Image Processing Research Group of University Federico +II of Naples ('GRIP-UNINA'). All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + +import os +import torch +import numpy as np +import tqdm +from networks import create_architecture, count_parameters + +class TrainingModel(torch.nn.Module): + + def __init__(self, opt): + super(TrainingModel, self).__init__() + + self.opt = opt + self.total_steps = 0 + self.save_dir = os.path.join('checkpoint', opt.name,'weights') + self.device = torch.device(opt.device if torch.cuda.is_available() else 'cpu') + + self.model = create_architecture(opt.arch, pretrained=True, num_classes=1) + num_parameters = count_parameters(self.model) + print(f"Arch: {opt.arch} with #trainable {num_parameters}") + + self.loss_fn = torch.nn.BCEWithLogitsLoss().to(self.device) + parameters = filter(lambda p: p.requires_grad, self.model.parameters()) + self.optimizer = torch.optim.Adam(parameters, lr=opt.lr, betas=(opt.beta1, 0.999), weight_decay=opt.weight_decay) + + self.model.to(self.device) + + def adjust_learning_rate(self, min_lr=1e-6): + for param_group in self.optimizer.param_groups: + param_group["lr"] /= 10.0 + if param_group["lr"] < min_lr: + return False + return True + + def get_learning_rate(self): + for param_group in self.optimizer.param_groups: + return param_group["lr"] + + def train_on_batch(self, data): + self.total_steps += 1 + self.model.train() + input = data['img'].to(self.device) + label = data['target'].to(self.device).float() + output = self.model(input) + if len(output.shape) == 4: + ss = output.shape + loss = self.loss_fn( + output, + label[:, None, None, None].repeat( + (1, int(ss[1]), int(ss[2]), int(ss[3])) + ), + ) + else: + loss = self.loss_fn(output.squeeze(1), label) + self.optimizer.zero_grad() + loss.backward() + self.optimizer.step() + return loss.cpu() + + def save_networks(self, epoch): + save_filename = f'{epoch}.pt' + save_path = os.path.join(self.save_dir, save_filename) + + # serialize model and optimizer to dict + state_dict = { + 'model': self.model.state_dict(), + 'optimizer': self.optimizer.state_dict(), + 'total_steps': self.total_steps, + } + + torch.save(state_dict, save_path) + + def predict(self, data_loader): + model = self.model.eval() + with torch.no_grad(): + y_true, y_pred, y_path = [], [], [] + for data in tqdm.tqdm(data_loader): + img = data['img'] + label = data['target'].cpu().numpy() + paths = list(data['path']) + out_tens = model(img.to(self.device)).cpu().numpy()[:, -1] + assert label.shape == out_tens.shape + + y_pred.extend(out_tens.tolist()) + y_true.extend(label.tolist()) + y_path.extend(paths) + + y_true, y_pred = np.array(y_true), np.array(y_pred) + return y_true, y_pred, y_path diff --git a/detectors/NPR/README.md b/detectors/NPR/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e12bb398edd8f8a499201a0edaa692232fad00f8 --- /dev/null +++ b/detectors/NPR/README.md @@ -0,0 +1,49 @@ +# Rethinking the Up-Sampling Operations in CNN-based Generative Network for Generalizable Deepfake Detection + +[![Official Github Repo](https://img.shields.io/badge/Github%20page-222222.svg?style=for-the-badge&logo=github)](https://github.com/chuangchuangtan/NPR-DeepfakeDetection) +[![Paper](https://img.shields.io/badge/-arXiv-B31B1B.svg?style=for-the-badge)](https://arxiv.org/abs/2312.10461) + +Original Paper: +[Rethinking the Up-Sampling Operations in CNN-based Generative Network for Generalizable Deepfake Detection](https://arxiv.org/abs/2312.10461). + +Authors: Chuangchuang Tan, Huan Liu, Yao Zhao, Shikui Wei, Guanghua Gu, Ping Liu, Yunchao Wei. + +## Abstract + +Recently, the proliferation of highly realistic synthetic +images, facilitated through a variety of GANs and Diffu- +sions, has significantly heightened the susceptibility to mis- +use. While the primary focus of deepfake detection has tra- +ditionally centered on the design of detection algorithms, +an investigative inquiry into the generator architectures has +remained conspicuously absent in recent years. This paper +contributes to this lacuna by rethinking the architectures of +CNN-based generator, thereby establishing a generalized +representation of synthetic artifacts. Our findings illumi- +nate that the up-sampling operator can, beyond frequency- +based artifacts, produce generalized forgery artifacts. In +particular, the local interdependence among image pixels +caused by upsampling operators is significantly demon- +strated in synthetic images generated by GAN or diffusion. +Building upon this observation, we introduce the concept of +Neighboring Pixel Relationships(NPR) as a means to cap- +ture and characterize the generalized structural artifacts +stemming from up-sampling operations. A comprehensive +analysis is conducted on an open-world dataset, comprising +samples generated by 28 distinct generative models. This +analysis culminates in the establishment of a novel state-of- +the-art performance, showcasing a remarkable 11.6% im- +provement over existing methods + + +## Please Cite + +``` +@inproceedings{tan2024rethinking, +title={Rethinking the up-sampling operations in cnn-based generative network for generalizable deepfake detection}, +author={Tan, Chuangchuang and Zhao, Yao and Wei, Shikui and Gu, Guanghua and Liu, Ping and Wei, Yunchao}, +booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, +pages={28130--28139}, +year={2024} +} +``` \ No newline at end of file diff --git a/detectors/NPR/__pycache__/util.cpython-310.pyc b/detectors/NPR/__pycache__/util.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..154443e1f337ad766f69b880c617741f184ae6c6 Binary files /dev/null and b/detectors/NPR/__pycache__/util.cpython-310.pyc differ diff --git a/detectors/NPR/__pycache__/validate.cpython-310.pyc b/detectors/NPR/__pycache__/validate.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..140666367248604756b8b5aa93fe21bd6c60ed29 Binary files /dev/null and b/detectors/NPR/__pycache__/validate.cpython-310.pyc differ diff --git a/detectors/NPR/checkpoint/pretrained/weights/best.pt b/detectors/NPR/checkpoint/pretrained/weights/best.pt new file mode 100644 index 0000000000000000000000000000000000000000..3541bf4d7a84252a75ba5d4540d089b7aa2ef32e --- /dev/null +++ b/detectors/NPR/checkpoint/pretrained/weights/best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb76594b7304c23249206ab2ac434dbd153c4114660ecc11a23eb82848f0721d +size 5831180 diff --git a/detectors/NPR/data/__init__.py b/detectors/NPR/data/__init__.py new file mode 100755 index 0000000000000000000000000000000000000000..ff170d8a5b197013118b5b16656ad061f3674330 --- /dev/null +++ b/detectors/NPR/data/__init__.py @@ -0,0 +1,201 @@ +import torch +import numpy as np +from torch.utils.data.sampler import WeightedRandomSampler + +from .datasets import dataset_folder +from torchvision.datasets import DatasetFolder +import json +import bisect +from PIL import Image +import torchvision.transforms.v2 as Tv2 +''' +def get_dataset(opt): + dset_lst = [] + for cls in opt.classes: + root = opt.dataroot + '/' + cls + dset = dataset_folder(opt, root) + dset_lst.append(dset) + return torch.utils.data.ConcatDataset(dset_lst) +''' + +import os +# def get_dataset(opt): +# classes = os.listdir(opt.dataroot) if len(opt.classes) == 0 else opt.classes +# if '0_real' not in classes or '1_fake' not in classes: +# dset_lst = [] +# for cls in classes: +# root = opt.dataroot + '/' + cls +# dset = dataset_folder(opt, root) +# dset_lst.append(dset) +# return torch.utils.data.ConcatDataset(dset_lst) +# return dataset_folder(opt, opt.dataroot) + +# def get_bal_sampler(dataset): +# targets = [] +# for d in dataset.datasets: +# targets.extend(d.targets) + +# ratio = np.bincount(targets) +# w = 1. / torch.tensor(ratio, dtype=torch.float) +# sample_weights = w[targets] +# sampler = WeightedRandomSampler(weights=sample_weights, +# num_samples=len(sample_weights)) +# return sampler + + +# def create_dataloader(opt): +# shuffle = not opt.serial_batches if (opt.isTrain and not opt.class_bal) else False +# dataset = get_dataset(opt) +# sampler = get_bal_sampler(dataset) if opt.class_bal else None + +# data_loader = torch.utils.data.DataLoader(dataset, +# batch_size=opt.batch_size, +# shuffle=shuffle, +# sampler=sampler, +# num_workers=int(opt.num_threads)) +# return data_loader + + +def parse_dataset(settings): + gen_keys = { + 'gan1':['StyleGAN'], + 'gan2':['StyleGAN2'], + 'gan3':['StyleGAN3'], + 'sd15':['StableDiffusion1.5'], + 'sd2':['StableDiffusion2'], + 'sd3':['StableDiffusion3'], + 'sdXL':['StableDiffusionXL'], + 'flux':['FLUX.1'], + 'realFFHQ':['FFHQ'], + 'realFORLAB':['FORLAB'] + } + + gen_keys['all'] = [gen_keys[key][0] for key in gen_keys.keys()] + # gen_keys['gan'] = [gen_keys[key][0] for key in gen_keys.keys() if 'gan' in key] + # gen_keys['sd'] = [gen_keys[key][0] for key in gen_keys.keys() if 'sd' in key] + gen_keys['real'] = [gen_keys[key][0] for key in gen_keys.keys() if 'real' in key] + + mod_keys = { + 'pre': ['PreSocial'], + 'fb': ['Facebook'], + 'tl': ['Telegram'], + 'tw': ['X'], + } + + mod_keys['all'] = [mod_keys[key][0] for key in mod_keys.keys()] + mod_keys['shr'] = [mod_keys[key][0] for key in mod_keys.keys() if key in ['fb', 'tl', 'tw']] + + need_real = (settings.task == 'train' and not len([data.split(':')[0] for data in settings.data_keys.split('&') if 'real' in data.split(':')[0]])) + + assert not need_real, 'Train task without real data, this will not get handeled automatically, terminating' + + dataset_list = [] + for data in settings.data_keys.split('&'): + gen, mod = data.split(':') + dataset_list.append({'gen':gen_keys[gen], 'mod':mod_keys[mod]}) + + return dataset_list + +class TrueFake_dataset(DatasetFolder): + def __init__(self, settings): + self.data_root = settings.data_root + self.split = settings.split + + with open(settings.split_file, "r") as f: + split_list = sorted(json.load(f)[self.split]) + + dataset_list = parse_dataset(settings) + + self.samples = [] + self.info = [] + for dict in dataset_list: + generators = dict['gen'] + modifiers = dict['mod'] + + for mod in modifiers: + for dataset_root, dataset_dirs, dataset_files in os.walk(os.path.join(self.data_root, mod), topdown=True, followlinks=True): + if len(dataset_dirs): + continue + + (label, gen, sub) = f'{dataset_root}/'.replace(os.path.join(self.data_root, mod) + os.sep, '').split(os.sep)[:3] + + if gen in generators: + for filename in sorted(dataset_files): + if os.path.splitext(filename)[1].lower() in ['.png', '.jpg', '.jpeg']: + if self._in_list(split_list, os.path.join(gen, sub, os.path.splitext(filename)[0])): + self.samples.append(os.path.join(dataset_root, filename)) + self.info.append((mod, label, gen, sub)) + + if settings.isTrain: + crop_func = Tv2.RandomCrop(settings.cropSize) + elif settings.no_crop: + crop_func = Tv2.Identity() + else: + crop_func = Tv2.CenterCrop(settings.cropSize) + + if settings.isTrain and not settings.no_flip: + flip_func = Tv2.RandomHorizontalFlip() + else: + flip_func = Tv2.Identity() + + if not settings.isTrain and settings.no_resize: + rz_func = Tv2.Identity() + else: + rz_func = Tv2.Resize((settings.loadSize, settings.loadSize)) + + self.transform = Tv2.Compose([ + rz_func, + crop_func, + flip_func, + Tv2.ToTensor(), + Tv2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ]) + + + def _in_list(self, split, elem): + i = bisect.bisect_left(split, elem) + return i != len(split) and split[i] == elem + + def __len__(self): + return len(self.samples) + + def __getitem__(self, index): + path = self.samples[index] + mod, label, gen, sub = self.info[index] + + image = Image.open(path).convert('RGB') + sample = self.transform(image) + + target = 1.0 if label == 'Fake' else 0.0 + + return sample, target, path + + +def create_dataloader(settings, split=None): + if split == "train": + settings.split = 'train' + is_train=True + + elif split == "val": + settings.split = 'val' + settings.batch_size = settings.batch_size//4 + is_train=False + + elif split == "test": + settings.split = 'test' + settings.batch_size = settings.batch_size//4 + is_train=False + + else: + raise ValueError(f"Unknown split {split}") + + dataset = TrueFake_dataset(settings) + + data_loader = torch.utils.data.DataLoader( + dataset, + batch_size=settings.batch_size, + num_workers=int(settings.num_threads), + shuffle = is_train, + collate_fn=None, + ) + return data_loader diff --git a/detectors/NPR/data/__pycache__/__init__.cpython-310.pyc b/detectors/NPR/data/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..16d67175135848a0607fba9f8dff50e4c08da1f1 Binary files /dev/null and b/detectors/NPR/data/__pycache__/__init__.cpython-310.pyc differ diff --git a/detectors/NPR/data/__pycache__/datasets.cpython-310.pyc b/detectors/NPR/data/__pycache__/datasets.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7c4cf1a65f94d2a9340b494445e326b358beb76a Binary files /dev/null and b/detectors/NPR/data/__pycache__/datasets.cpython-310.pyc differ diff --git a/detectors/NPR/data/datasets.py b/detectors/NPR/data/datasets.py new file mode 100755 index 0000000000000000000000000000000000000000..d2d8a9b603abed4287e4d0dd1a7d29f6624d8d74 --- /dev/null +++ b/detectors/NPR/data/datasets.py @@ -0,0 +1,139 @@ +import cv2 +import numpy as np +import torchvision.datasets as datasets +import torchvision.transforms as transforms +import torchvision.transforms.functional as TF +from random import random, choice +from io import BytesIO +from PIL import Image +from PIL import ImageFile +from scipy.ndimage.filters import gaussian_filter +from torchvision.transforms import InterpolationMode + +ImageFile.LOAD_TRUNCATED_IMAGES = True + +def dataset_folder(opt, root): + if opt.mode == 'binary': + return binary_dataset(opt, root) + if opt.mode == 'filename': + return FileNameDataset(opt, root) + raise ValueError('opt.mode needs to be binary or filename.') + + +def binary_dataset(opt, root): + if opt.isTrain: + crop_func = transforms.RandomCrop(opt.cropSize) + elif opt.no_crop: + crop_func = transforms.Lambda(lambda img: img) + else: + crop_func = transforms.CenterCrop(opt.cropSize) + + if opt.isTrain and not opt.no_flip: + flip_func = transforms.RandomHorizontalFlip() + else: + flip_func = transforms.Lambda(lambda img: img) + if not opt.isTrain and opt.no_resize: + rz_func = transforms.Lambda(lambda img: img) + else: + # rz_func = transforms.Lambda(lambda img: custom_resize(img, opt)) + rz_func = transforms.Resize((opt.loadSize, opt.loadSize)) + + dset = datasets.ImageFolder( + root, + transforms.Compose([ + rz_func, + # transforms.Lambda(lambda img: data_augment(img, opt)), + crop_func, + flip_func, + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ])) + return dset + + +class FileNameDataset(datasets.ImageFolder): + def name(self): + return 'FileNameDataset' + + def __init__(self, opt, root): + self.opt = opt + super().__init__(root) + + def __getitem__(self, index): + # Loading sample + path, target = self.samples[index] + return path + + +def data_augment(img, opt): + img = np.array(img) + + if random() < opt.blur_prob: + sig = sample_continuous(opt.blur_sig) + gaussian_blur(img, sig) + + if random() < opt.jpg_prob: + method = sample_discrete(opt.jpg_method) + qual = sample_discrete(opt.jpg_qual) + img = jpeg_from_key(img, qual, method) + + return Image.fromarray(img) + + +def sample_continuous(s): + if len(s) == 1: + return s[0] + if len(s) == 2: + rg = s[1] - s[0] + return random() * rg + s[0] + raise ValueError("Length of iterable s should be 1 or 2.") + + +def sample_discrete(s): + if len(s) == 1: + return s[0] + return choice(s) + + +def gaussian_blur(img, sigma): + gaussian_filter(img[:,:,0], output=img[:,:,0], sigma=sigma) + gaussian_filter(img[:,:,1], output=img[:,:,1], sigma=sigma) + gaussian_filter(img[:,:,2], output=img[:,:,2], sigma=sigma) + + +def cv2_jpg(img, compress_val): + img_cv2 = img[:,:,::-1] + encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), compress_val] + result, encimg = cv2.imencode('.jpg', img_cv2, encode_param) + decimg = cv2.imdecode(encimg, 1) + return decimg[:,:,::-1] + + +def pil_jpg(img, compress_val): + out = BytesIO() + img = Image.fromarray(img) + img.save(out, format='jpeg', quality=compress_val) + img = Image.open(out) + # load from memory before ByteIO closes + img = np.array(img) + out.close() + return img + + +jpeg_dict = {'cv2': cv2_jpg, 'pil': pil_jpg} +def jpeg_from_key(img, compress_val, key): + method = jpeg_dict[key] + return method(img, compress_val) + + +# rz_dict = {'bilinear': Image.BILINEAR, + # 'bicubic': Image.BICUBIC, + # 'lanczos': Image.LANCZOS, + # 'nearest': Image.NEAREST} +rz_dict = {'bilinear': InterpolationMode.BILINEAR, + 'bicubic': InterpolationMode.BICUBIC, + 'lanczos': InterpolationMode.LANCZOS, + 'nearest': InterpolationMode.NEAREST} +def custom_resize(img, opt): + interp = sample_discrete(opt.rz_interp) + return TF.resize(img, (opt.loadSize,opt.loadSize), interpolation=rz_dict[interp]) diff --git a/detectors/NPR/detect.py b/detectors/NPR/detect.py new file mode 100644 index 0000000000000000000000000000000000000000..7b61e2109d4489be3e0cf96eede4b52f445877e8 --- /dev/null +++ b/detectors/NPR/detect.py @@ -0,0 +1,106 @@ +import os +import sys +import time +import yaml +import torch +from PIL import Image +import torchvision.transforms as transforms +import argparse + +# Add project root to path for imports +project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +sys.path.append(project_root) +from support.detect_utils import format_result, save_result, get_device +import networks.resnet as resnet +from networks.resnet import resnet50 + + +def parse_args(): + parser = argparse.ArgumentParser(description='NPR single image detector') + parser.add_argument('--image', type=str, required=True, help='Path to input image') + parser.add_argument('--model', type=str, default='checkpoint/best.pt', help='Path to model checkpoint') + parser.add_argument('--output', type=str, help='Path to save detection result JSON') + parser.add_argument('--device', type=str, help='Device to run on (e.g., cuda:0, cuda:1, cpu)') + parser.add_argument('--config', type=str, default='configs/npr.yaml', help='Path to config file') + return parser.parse_args() + +def load_config(config_path): + """Load configuration from YAML file.""" + with open(config_path, 'r') as f: + return yaml.safe_load(f) + +def load_image(image_path, size=224): + """Load and preprocess image.""" + if not os.path.exists(image_path): + raise FileNotFoundError(f"Image not found: {image_path}") + + image = Image.open(image_path).convert('RGB') + transform = transforms.Compose([ + transforms.Resize(size), + transforms.CenterCrop(size), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + return transform(image).unsqueeze(0) + +def main(): + args = parse_args() + + # Setup device + device = torch.device(args.device) if args.device else get_device() + + # Load model + try: + # Initialize model + model = resnet50(num_classes=1).to(device) + load_path = f'./detectors/NPR/checkpoint/{args.model}/weights/best.pt' + + if not os.path.exists(load_path): + raise FileNotFoundError(f"Model weights not found at: {load_path}") + + checkpoint = torch.load(load_path, map_location=device) + + model.load_state_dict(checkpoint, strict=True) + + model.eval() + except Exception as e: + print(f"Error loading model: {e}") + return + + # Load and preprocess image + try: + image_tensor = load_image(args.image).to(device) + except Exception as e: + print(f"Error loading image: {e}") + return + + # Run detection + start_time = time.time() + with torch.no_grad(): + try: + score = model(image_tensor) + prediction = torch.sigmoid(score) + + confidence = prediction.item() + + result = format_result( + 'fake' if confidence>0.5 else 'real', + confidence, + time.time() - start_time + ) + + # Print result + print(f"Prediction: {result['prediction']}") + print(f"Confidence: {result['confidence']:.4f}") + print(f"Time: {result['elapsed_time']:.3f}s") + + # Save result if output path provided + if args.output: + save_result(result, args.output) + + except Exception as e: + print(f"Error during detection: {e}") + return + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/detectors/NPR/networks/__init__.py b/detectors/NPR/networks/__init__.py new file mode 100755 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/detectors/NPR/networks/__pycache__/__init__.cpython-310.pyc b/detectors/NPR/networks/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dc88b87ae8652559c64795a888a720eaff9fd0da Binary files /dev/null and b/detectors/NPR/networks/__pycache__/__init__.cpython-310.pyc differ diff --git a/detectors/NPR/networks/__pycache__/base_model.cpython-310.pyc b/detectors/NPR/networks/__pycache__/base_model.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fffcac5b5fed21a037a04ef6ad5237952348559f Binary files /dev/null and b/detectors/NPR/networks/__pycache__/base_model.cpython-310.pyc differ diff --git a/detectors/NPR/networks/__pycache__/resnet.cpython-310.pyc b/detectors/NPR/networks/__pycache__/resnet.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..34c82b218bc94bba421ae5b6fa6abd885d1daade Binary files /dev/null and b/detectors/NPR/networks/__pycache__/resnet.cpython-310.pyc differ diff --git a/detectors/NPR/networks/__pycache__/trainer.cpython-310.pyc b/detectors/NPR/networks/__pycache__/trainer.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9a55acec3c7d8290b5fb425a7bd463179a3bad9a Binary files /dev/null and b/detectors/NPR/networks/__pycache__/trainer.cpython-310.pyc differ diff --git a/detectors/NPR/networks/base_model.py b/detectors/NPR/networks/base_model.py new file mode 100755 index 0000000000000000000000000000000000000000..5c09359167b8c0d3f23476f494b3c227519f6035 --- /dev/null +++ b/detectors/NPR/networks/base_model.py @@ -0,0 +1,97 @@ +# from pix2pix +import os +import torch +import torch.nn as nn +from torch.nn import init +from torch.optim import lr_scheduler + + +class BaseModel(nn.Module): + def __init__(self, opt): + super(BaseModel, self).__init__() + self.opt = opt + self.total_steps = 0 + self.isTrain = opt.isTrain + self.lr = opt.lr + # self.save_dir = os.path.join(opt.checkpoints_dir, opt.name) + # self.save_dir = os.path.join(opt.checkpoints_dir, opt.name) + self.save_dir = os.path.join(f'./checkpoint/{opt.name}/weights/') + os.makedirs(self.save_dir, exist_ok=True) + #self.device = torch.device('cuda:{}'.format(opt.gpu_ids[0])) if opt.gpu_ids else torch.device('cpu') + self.device = torch.device(opt.device if torch.cuda.is_available() else 'cpu') + + def save_networks(self, epoch): + # save_filename = 'model_epoch_%s.pth' % epoch + save_filename = f'{epoch}.pt' + save_path = os.path.join(self.save_dir, save_filename) + + # serialize model and optimizer to dict + # state_dict = { + # 'model': self.model.state_dict(), + # 'optimizer' : self.optimizer.state_dict(), + # 'total_steps' : self.total_steps, + # } + + torch.save(self.model.state_dict(), save_path) + print(f'Saving model {save_path}') + + # load models from the disk + def load_networks(self, epoch): + # load_filename = 'model_epoch_%s.pth' % epoch + load_filename = f'{epoch}.pt' + load_path = os.path.join(self.save_dir, load_filename) + + print('loading the model from %s' % load_path) + # if you are using PyTorch newer than 0.4 (e.g., built from + # GitHub source), you can remove str() on self.device + state_dict = torch.load(load_path, map_location=self.device) + if hasattr(state_dict, '_metadata'): + del state_dict._metadata + + self.model.load_state_dict(state_dict['model']) + self.total_steps = state_dict['total_steps'] + + if self.isTrain and not self.opt.new_optim: + self.optimizer.load_state_dict(state_dict['optimizer']) + ### move optimizer state to GPU + for state in self.optimizer.state.values(): + for k, v in state.items(): + if torch.is_tensor(v): + state[k] = v.to(self.device) + + for g in self.optimizer.param_groups: + g['lr'] = self.opt.lr + + def eval(self): + self.model.eval() + + def train(self): + self.model.train() + + def test(self): + with torch.no_grad(): + self.forward() + + +def init_weights(net, init_type='normal', gain=0.02): + def init_func(m): + classname = m.__class__.__name__ + if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1): + if init_type == 'normal': + init.normal_(m.weight.data, 0.0, gain) + elif init_type == 'xavier': + init.xavier_normal_(m.weight.data, gain=gain) + elif init_type == 'kaiming': + init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') + elif init_type == 'orthogonal': + init.orthogonal_(m.weight.data, gain=gain) + else: + raise NotImplementedError('initialization method [%s] is not implemented' % init_type) + if hasattr(m, 'bias') and m.bias is not None: + init.constant_(m.bias.data, 0.0) + elif classname.find('BatchNorm2d') != -1: + init.normal_(m.weight.data, 1.0, gain) + init.constant_(m.bias.data, 0.0) + + print('initialize network with %s' % init_type) + net.apply(init_func) diff --git a/detectors/NPR/networks/resnet.py b/detectors/NPR/networks/resnet.py new file mode 100755 index 0000000000000000000000000000000000000000..2a5f40ae344bb6e292c89cfb265dbf059829e5b4 --- /dev/null +++ b/detectors/NPR/networks/resnet.py @@ -0,0 +1,235 @@ +import torch.nn as nn +import torch.utils.model_zoo as model_zoo +from torch.nn import functional as F +from typing import Any, cast, Dict, List, Optional, Union +import numpy as np + +__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', + 'resnet152'] + + +model_urls = { + 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', + 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', + 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', + 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', + 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', +} + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + + +def conv1x1(in_planes, out_planes, stride=1): + """1x1 convolution""" + return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = conv1x1(inplanes, planes) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = conv3x3(planes, planes, stride) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = conv1x1(planes, planes * self.expansion) + self.bn3 = nn.BatchNorm2d(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class ResNet(nn.Module): + + def __init__(self, block, layers, num_classes=1, zero_init_residual=False): + super(ResNet, self).__init__() + + self.unfoldSize = 2 + self.unfoldIndex = 0 + assert self.unfoldSize > 1 + assert -1 < self.unfoldIndex and self.unfoldIndex < self.unfoldSize*self.unfoldSize + self.inplanes = 64 + self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64 , layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + # self.fc1 = nn.Linear(512 * block.expansion, 1) + self.fc1 = nn.Linear(512, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + # Zero-initialize the last BN in each residual branch, + # so that the residual branch starts with zeros, and each residual block behaves like an identity. + # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 + if zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + nn.init.constant_(m.bn3.weight, 0) + elif isinstance(m, BasicBlock): + nn.init.constant_(m.bn2.weight, 0) + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + conv1x1(self.inplanes, planes * block.expansion, stride), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + def interpolate(self, img, factor): + return F.interpolate(F.interpolate(img, scale_factor=factor, mode='nearest', recompute_scale_factor=True), scale_factor=1/factor, mode='nearest', recompute_scale_factor=True) + def forward(self, x): + # n,c,w,h = x.shape + # if -1*w%2 != 0: x = x[:,:,:w%2*-1,: ] + # if -1*h%2 != 0: x = x[:,:,: ,:h%2*-1] + # factor = 0.5 + # x_half = F.interpolate(x, scale_factor=factor, mode='nearest', recompute_scale_factor=True) + # x_re = F.interpolate(x_half, scale_factor=1/factor, mode='nearest', recompute_scale_factor=True) + # NPR = x - x_re + # n,c,w,h = x.shape + # if w%2 == 1 : x = x[:,:,:-1,:] + # if h%2 == 1 : x = x[:,:,:,:-1] + NPR = x - self.interpolate(x, 0.5) + + x = self.conv1(NPR*2.0/3.0) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + + x = self.avgpool(x) + x = x.view(x.size(0), -1) + x = self.fc1(x) + + return x + + +def resnet18(pretrained=False, **kwargs): + """Constructs a ResNet-18 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) + return model + + +def resnet34(pretrained=False, **kwargs): + """Constructs a ResNet-34 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) + return model + + +def resnet50(pretrained=False, **kwargs): + """Constructs a ResNet-50 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) + return model + + +def resnet101(pretrained=False, **kwargs): + """Constructs a ResNet-101 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) + return model + + +def resnet152(pretrained=False, **kwargs): + """Constructs a ResNet-152 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) + return model diff --git a/detectors/NPR/networks/trainer.py b/detectors/NPR/networks/trainer.py new file mode 100755 index 0000000000000000000000000000000000000000..b902c41d777eee8293d5405f4d3e2964b10539c5 --- /dev/null +++ b/detectors/NPR/networks/trainer.py @@ -0,0 +1,67 @@ +import functools +import torch +import torch.nn as nn +from networks.resnet import resnet50 +from networks.base_model import BaseModel, init_weights + + +class Trainer(BaseModel): + def name(self): + return 'Trainer' + + def __init__(self, opt): + super(Trainer, self).__init__(opt) + + if self.isTrain and not opt.continue_train: + self.model = resnet50(pretrained=False, num_classes=1) + + if not self.isTrain or opt.continue_train: + self.model = resnet50(num_classes=1) + + if self.isTrain: + self.loss_fn = nn.BCEWithLogitsLoss() + # initialize optimizers + if opt.optim == 'adam': + self.optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, self.model.parameters()), + lr=opt.lr, betas=(opt.beta1, 0.999)) + elif opt.optim == 'sgd': + self.optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, self.model.parameters()), + lr=opt.lr, momentum=0.0, weight_decay=0) + else: + raise ValueError("optim should be [adam, sgd]") + + # if not self.isTrain or opt.continue_train: + # self.load_networks(opt.epoch) + # self.model.to(opt.gpu_ids[0]) + self.model.to(opt.device) + + + def adjust_learning_rate(self, min_lr=1e-6): + for param_group in self.optimizer.param_groups: + param_group['lr'] *= 0.9 + if param_group['lr'] < min_lr: + return False + self.lr = param_group['lr'] + print('*'*25) + print(f'Changing lr from {param_group["lr"]/0.9} to {param_group["lr"]}') + print('*'*25) + return True + + def set_input(self, input): + self.input = input[0].to(self.device) + self.label = input[1].to(self.device).float() + + + def forward(self): + self.output = self.model(self.input) + + def get_loss(self): + return self.loss_fn(self.output.squeeze(1), self.label) + + def optimize_parameters(self): + self.forward() + self.loss = self.loss_fn(self.output.squeeze(1), self.label) + self.optimizer.zero_grad() + self.loss.backward() + self.optimizer.step() + diff --git a/detectors/NPR/opt.txt b/detectors/NPR/opt.txt new file mode 100644 index 0000000000000000000000000000000000000000..d48b44b1e95d12b363490c474fdedba59fdd80bd --- /dev/null +++ b/detectors/NPR/opt.txt @@ -0,0 +1,43 @@ +----------------- Options --------------- + arch: res50 + batch_size: 64 + beta1: 0.9 + blur_prob: 0 + blur_sig: 0.5 + continue_train: False + cropSize: 224 + data_aug: False + data_keys: gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre [default: None] + data_root: /media/mmlab/Datasets_4TB/TrueFake [default: None] + delr_freq: 10 + device: cuda:1 [default: cpu] + earlystop_epoch: 15 + epoch_count: 1 + init_gain: 0.02 + init_type: normal + isTrain: True [default: None] + jpg_method: cv2 + jpg_prob: 0 + jpg_qual: 75 + last_epoch: -1 + loadSize: 256 + loss_freq: 20 + lr: 0.0002 + mode: binary + name: gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre [default: experiment_name] + new_optim: False + niter: 50 + no_flip: False + num_threads: 8 + optim: adam + resize_or_crop: scale_and_crop + rz_interp: bilinear + save_epoch_freq: 20 + save_latest_freq: 2000 + serial_batches: False + split_file: /home/stefano.dellanna/02_TrueFake/split.json [default: None] + suffix: + task: train [default: None] + train_split: train + val_split: val +----------------- End ------------------- diff --git a/detectors/NPR/options/__init__.py b/detectors/NPR/options/__init__.py new file mode 100755 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/detectors/NPR/options/__pycache__/__init__.cpython-310.pyc b/detectors/NPR/options/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7187b27e05ffc886384b01962a6e47abaf06076b Binary files /dev/null and b/detectors/NPR/options/__pycache__/__init__.cpython-310.pyc differ diff --git a/detectors/NPR/options/__pycache__/base_options.cpython-310.pyc b/detectors/NPR/options/__pycache__/base_options.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ca0a48908f4f30910d77356cbba79999cac8b901 Binary files /dev/null and b/detectors/NPR/options/__pycache__/base_options.cpython-310.pyc differ diff --git a/detectors/NPR/options/__pycache__/test_options.cpython-310.pyc b/detectors/NPR/options/__pycache__/test_options.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7340d1d92332039986676e9ea1dea65c8bd98526 Binary files /dev/null and b/detectors/NPR/options/__pycache__/test_options.cpython-310.pyc differ diff --git a/detectors/NPR/options/__pycache__/train_options.cpython-310.pyc b/detectors/NPR/options/__pycache__/train_options.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b670d5e3676eef5d74fc7483faed77700be4fca4 Binary files /dev/null and b/detectors/NPR/options/__pycache__/train_options.cpython-310.pyc differ diff --git a/detectors/NPR/options/base_options.py b/detectors/NPR/options/base_options.py new file mode 100755 index 0000000000000000000000000000000000000000..82498c6582b3262082c8e35172b487f21ff1e5c5 --- /dev/null +++ b/detectors/NPR/options/base_options.py @@ -0,0 +1,125 @@ +import argparse +import os +import time +import util +import torch +#import models +#import data + + +class BaseOptions(): + def __init__(self): + self.initialized = False + + def initialize(self, parser): + parser.add_argument('--mode', default='binary') + parser.add_argument('--arch', type=str, default='res50', help='architecture for binary classification') + + parser.add_argument("--task", type=str, help="Task: train/test") + # data augmentation + parser.add_argument('--rz_interp', default='bilinear') + parser.add_argument('--blur_prob', type=float, default=0) + parser.add_argument('--blur_sig', default='0.5') + parser.add_argument('--jpg_prob', type=float, default=0) + parser.add_argument('--jpg_method', default='cv2') + parser.add_argument('--jpg_qual', default='75') + + # parser.add_argument('--dataroot', default='./dataset/', help='path to images (should have subfolders trainA, trainB, valA, valB, etc)') + # parser.add_argument('--classes', default='', help='image classes to train on') + parser.add_argument("--split_file", type=str, help="Path to split json") + parser.add_argument("--data_root", type=str, help="Path to dataset") + parser.add_argument("--data_keys", type=str, help="Dataset specifications") + + # parser.add_argument('--class_bal', action='store_true') + parser.add_argument('--batch_size', type=int, default=64, help='input batch size') + parser.add_argument('--loadSize', type=int, default=256, help='scale images to this size') + parser.add_argument('--cropSize', type=int, default=224, help='then crop to this size') + # parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU') + parser.add_argument('--device', type=str, default='cpu', help='') + parser.add_argument('--name', type=str, default='experiment_name', help='name of the experiment. It decides where to store samples and models') + # parser.add_argument('--epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model') + parser.add_argument('--num_threads', default=8, type=int, help='# threads for loading data') + # parser.add_argument('--checkpoints_dir', type=str, default='./checkpoints', help='models are saved here') + parser.add_argument('--serial_batches', action='store_true', help='if true, takes images in order to make batches, otherwise takes them randomly') + parser.add_argument('--resize_or_crop', type=str, default='scale_and_crop', help='scaling and cropping of images at load time [resize_and_crop|crop|scale_width|scale_width_and_crop|none]') + parser.add_argument('--no_flip', action='store_true', help='if specified, do not flip the images for data augmentation') + parser.add_argument('--init_type', type=str, default='normal', help='network initialization [normal|xavier|kaiming|orthogonal]') + parser.add_argument('--init_gain', type=float, default=0.02, help='scaling factor for normal, xavier and orthogonal.') + parser.add_argument('--suffix', default='', type=str, help='customized suffix: opt.name = opt.name + suffix: e.g., {model}_{netG}_size{loadSize}') + parser.add_argument('--delr_freq', type=int, default=10, help='frequency of changing lr') + + + self.initialized = True + return parser + + def gather_options(self): + # initialize parser with basic options + if not self.initialized: + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser = self.initialize(parser) + + # get the basic options + opt, _ = parser.parse_known_args() + self.parser = parser + + return opt #parser.parse_args() + + def print_options(self, opt): + message = '' + message += '----------------- Options ---------------\n' + for k, v in sorted(vars(opt).items()): + comment = '' + default = self.parser.get_default(k) + if v != default: + comment = '\t[default: %s]' % str(default) + message += '{:>25}: {:<30}{}\n'.format(str(k), str(v), comment) + message += '----------------- End -------------------' + print(message) + + # save to the disk + + # expr_dir = os.path.join(opt.checkpoints_dir, opt.name) + expr_dir = os.path.join('checkpoint', opt.name) + util.mkdirs(expr_dir) + file_name = os.path.join(expr_dir, 'opt.txt') + with open(file_name, 'wt') as opt_file: + opt_file.write(message) + opt_file.write('\n') + + def parse(self, print_options=True): + + opt = self.gather_options() + opt.isTrain = self.isTrain # train or test + #opt.name = opt.name + time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime()) + # process opt.suffix + if opt.suffix: + suffix = ('_' + opt.suffix.format(**vars(opt))) if opt.suffix != '' else '' + opt.name = opt.name + suffix + + if print_options: + self.print_options(opt) + + # set gpu ids + # str_ids = opt.gpu_ids.split(',') + # opt.gpu_ids = [] + # for str_id in str_ids: + # id = int(str_id) + # if id >= 0: + # opt.gpu_ids.append(id) + # if len(opt.gpu_ids) > 0: + # torch.cuda.set_device(opt.gpu_ids[0]) + + # additional + # opt.classes = opt.classes.split(',') + opt.rz_interp = opt.rz_interp.split(',') + opt.blur_sig = [float(s) for s in opt.blur_sig.split(',')] + opt.jpg_method = opt.jpg_method.split(',') + opt.jpg_qual = [int(s) for s in opt.jpg_qual.split(',')] + if len(opt.jpg_qual) == 2: + opt.jpg_qual = list(range(opt.jpg_qual[0], opt.jpg_qual[1] + 1)) + elif len(opt.jpg_qual) > 2: + raise ValueError("Shouldn't have more than 2 values for --jpg_qual.") + + self.opt = opt + return self.opt diff --git a/detectors/NPR/options/test_options.py b/detectors/NPR/options/test_options.py new file mode 100755 index 0000000000000000000000000000000000000000..1dba350e3cadf758c2dc39e99a55b4e844e7b16d --- /dev/null +++ b/detectors/NPR/options/test_options.py @@ -0,0 +1,17 @@ +from .base_options import BaseOptions + + +class TestOptions(BaseOptions): + def initialize(self, parser): + parser = BaseOptions.initialize(self, parser) + # parser.add_argument('--dataroot') + parser.add_argument('--model_path') + parser.add_argument('--no_resize', action='store_true') + parser.add_argument('--no_crop', action='store_true') + parser.add_argument('--eval', action='store_true', help='use eval mode during test time.') + parser.add_argument('--earlystop_epoch', type=int, default=15) + parser.add_argument('--lr', type=float, default=0.00002, help='initial learning rate for adam') + parser.add_argument('--niter', type=int, default=0, help='# of iter at starting learning rate') + + self.isTrain = False + return parser diff --git a/detectors/NPR/options/train_options.py b/detectors/NPR/options/train_options.py new file mode 100755 index 0000000000000000000000000000000000000000..57290604ce6b30905c8716094e277a0541cb39ee --- /dev/null +++ b/detectors/NPR/options/train_options.py @@ -0,0 +1,30 @@ +from .base_options import BaseOptions + + +class TrainOptions(BaseOptions): + def initialize(self, parser): + parser = BaseOptions.initialize(self, parser) + parser.add_argument('--earlystop_epoch', type=int, default=15) + parser.add_argument('--data_aug', action='store_true', help='if specified, perform additional data augmentation (photometric, blurring, jpegging)') + parser.add_argument('--optim', type=str, default='adam', help='optim to use [sgd, adam]') + parser.add_argument('--new_optim', action='store_true', help='new optimizer instead of loading the optim state') + parser.add_argument('--loss_freq', type=int, default=20, help='frequency of showing loss on tensorboard') + parser.add_argument('--save_latest_freq', type=int, default=2000, help='frequency of saving the latest results') + parser.add_argument('--save_epoch_freq', type=int, default=20, help='frequency of saving checkpoints at the end of epochs') + parser.add_argument('--continue_train', action='store_true', help='continue training: load the latest model') + parser.add_argument('--epoch_count', type=int, default=1, help='the starting epoch count, we save the model by , +, ...') + parser.add_argument('--last_epoch', type=int, default=-1, help='starting epoch count for scheduler intialization') + parser.add_argument('--niter', type=int, default=50, help='# of iter at starting learning rate') + # parser.add_argument('--niter', type=int, default=50, help='# of iter at starting learning rate') + parser.add_argument('--beta1', type=float, default=0.9, help='momentum term of adam') + parser.add_argument('--lr', type=float, default=0.0002, help='initial learning rate for adam') + # parser.add_argument('--model_path') + # parser.add_argument('--no_resize', action='store_true') + # parser.add_argument('--no_crop', action='store_true') + + parser.add_argument('--train_split', type=str, default='train', help='train, val, test, etc') + parser.add_argument('--val_split', type=str, default='val', help='train, val, test, etc') + + + self.isTrain = True + return parser diff --git a/detectors/NPR/test.py b/detectors/NPR/test.py new file mode 100644 index 0000000000000000000000000000000000000000..dfb81f973b1718e5e5da9c115ead23e5c8897416 --- /dev/null +++ b/detectors/NPR/test.py @@ -0,0 +1,173 @@ +import sys +import time +import os +import csv +import torch +import json +from util import Logger, printSet +from validate import validate +from networks.resnet import resnet50 +from options.test_options import TestOptions +import networks.resnet as resnet +import numpy as np +import random +from data import create_dataloader +from sklearn.metrics import roc_auc_score, accuracy_score + +from tqdm import tqdm +import pandas as pd + +def seed_torch(seed=1029): + random.seed(seed) + os.environ['PYTHONHASHSEED'] = str(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) # if you are using multi-GPU. + torch.backends.cudnn.benchmark = False + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.enabled = False +seed_torch(100) + +opt = TestOptions().parse(print_options=False) +opt.model_path = os.path.join(f'./checkpoint/{opt.name}/weights/best.pt') +print(f'Model_path {opt.model_path}') + + +# get model +model = resnet50(num_classes=1) +model.load_state_dict(torch.load(opt.model_path, map_location='cpu'), strict=True) +model.to(opt.device) +model.eval() + +opt.no_resize = False +opt.no_crop = True + +output_dir = f'./results/{opt.name}/data/{opt.data_keys}' +os.makedirs(output_dir, exist_ok=True) + +test_dataloader = create_dataloader(opt, split='test') + +model.eval() + +# File paths +csv_filename = os.path.join(output_dir, 'results.csv') +metrics_filename = os.path.join(output_dir, 'metrics.json') +image_results_filename = os.path.join(output_dir, 'image_results.json') + +# Extract training dataset keys from model name (format: "training_keys_freeze_down" or "training_keys") +training_dataset_keys = [] +model_name = opt.name +if '_freeze_down' in model_name: + training_name = model_name.replace('_freeze_down', '') +else: + training_name = model_name +if '&' in training_name: + training_dataset_keys = training_name.split('&') +else: + training_dataset_keys = [training_name] + +# Collect all results +all_scores = [] +all_labels = [] +all_paths = [] +image_results = [] + +start_time = time.time() + +# Write CSV header +with open(csv_filename, 'w') as f: + f.write(f"{','.join(['name', 'pro', 'flag'])}\n") + +with torch.no_grad(): + with tqdm(test_dataloader, unit='batch', mininterval=0.5) as tbatch: + tbatch.set_description(f'Validation') + for (data, labels, paths) in tbatch: + data = data.to(opt.device) + labels = labels.to(opt.device) + + scores = model(data).squeeze(1) + + # Collect results + for score, label, path in zip(scores, labels, paths): + score_val = score.item() + label_val = label.item() + + all_scores.append(score_val) + all_labels.append(label_val) + all_paths.append(path) + + image_results.append({ + 'path': path, + 'score': score_val, + 'label': label_val + }) + + # Write to CSV (maintain backward compatibility) + with open(csv_filename, 'a') as f: + for score, label, path in zip(scores, labels, paths): + f.write(f"{path}, {score.item()}, {label.item()}\n") + +# Calculate metrics +all_scores = np.array(all_scores) +all_labels = np.array(all_labels) + +# Convert scores to probabilities using sigmoid (as done in validate.py) +probabilities = torch.sigmoid(torch.tensor(all_scores)).numpy() + +# Convert probabilities to predictions using threshold 0.5 (as done in validate.py) +predictions = (probabilities > 0.5).astype(int) + +# Calculate overall metrics +total_accuracy = accuracy_score(all_labels, predictions) + +# TPR (True Positive Rate) = TP / (TP + FN) = accuracy on fake images (label==1) +fake_mask = all_labels == 1 +if fake_mask.sum() > 0: + tpr = accuracy_score(all_labels[fake_mask], predictions[fake_mask]) +else: + tpr = 0.0 + +# Calculate TNR on real images (label==0) in the test set +real_mask = all_labels == 0 +if real_mask.sum() > 0: + # Overall TNR calculated on all real images in the test set + tnr = accuracy_score(all_labels[real_mask], predictions[real_mask]) +else: + tnr = 0.0 + +# AUC calculation (using probabilities) +if len(np.unique(all_labels)) > 1: # Need both classes for AUC + auc = roc_auc_score(all_labels, probabilities) +else: + auc = 0.0 + +execution_time = time.time() - start_time + +# Prepare metrics JSON +metrics = { + 'TPR': float(tpr), + 'TNR': float(tnr), + 'Acc total': float(total_accuracy), + 'AUC': float(auc), + 'execution time': float(execution_time) +} + +# Write metrics JSON +with open(metrics_filename, 'w') as f: + json.dump(metrics, f, indent=2) + +# Write individual image results JSON +with open(image_results_filename, 'w') as f: + json.dump(image_results, f, indent=2) + +print(f'\nMetrics saved to {metrics_filename}') +print(f'Image results saved to {image_results_filename}') +print(f'\nMetrics:') +print(f' TPR: {tpr:.4f}') +print(f' TNR: {tnr:.4f}') +print(f' Accuracy: {total_accuracy:.4f}') +print(f' AUC: {auc:.4f}') +print(f' Execution time: {execution_time:.2f} seconds') + + diff --git a/detectors/NPR/train.py b/detectors/NPR/train.py new file mode 100755 index 0000000000000000000000000000000000000000..e0ae87cfcbca94b8ab515052f26410b7443b507d --- /dev/null +++ b/detectors/NPR/train.py @@ -0,0 +1,85 @@ +import os +import sys +import time +import torch +import torch.nn +import argparse +from PIL import Image +import numpy as np +from validate import validate +from data import create_dataloader +from networks.trainer import Trainer +from options.train_options import TrainOptions +from options.test_options import TestOptions +from util import Logger +from tqdm import tqdm +import random +def seed_torch(seed=1029): + random.seed(seed) + os.environ['PYTHONHASHSEED'] = str(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) # if you are using multi-GPU. + torch.backends.cudnn.benchmark = False + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.enabled = False + + +def get_val_opt(): + val_opt = TrainOptions().parse(print_options=False) + val_opt.isTrain = False + val_opt.no_resize = False + val_opt.no_crop = False + val_opt.serial_batches = True + + return val_opt + + +if __name__ == '__main__': + opt_train = TrainOptions().parse() + seed_torch(100) + + print(' '.join(list(sys.argv)) ) + opt_val = get_val_opt() + + train_loader = create_dataloader(opt_train, split='train') + val_loader = create_dataloader(opt_val, split='val') + + + model = Trainer(opt_train) + + model.train() + print(f'cwd: {os.getcwd()}') + for epoch in range(opt_train.niter): + if epoch > 0: + epoch_start_time = time.time() + iter_data_time = time.time() + epoch_iter = 0 + + #for i, data in enumerate(train_loader): + with tqdm(train_loader, unit='batch', mininterval=0.5) as tepoch: + tepoch.set_description(f'Epoch {epoch}', refresh=False) + for i, data in enumerate(tepoch): + model.total_steps += 1 + epoch_iter += opt_train.batch_size + + model.set_input(data) + model.optimize_parameters() + tepoch.set_postfix(loss=model.loss.item()) + + if epoch % opt_train.delr_freq == 0 and epoch != 0: + print('changing lr at the end of epoch %d, iters %d' % (epoch, model.total_steps)) + model.adjust_learning_rate() + + + # Validation + model.eval() + acc, ap = validate(model.model, val_loader)[:2] + print("(Val @ epoch {}) acc: {}; ap: {}".format(epoch, acc, ap)) + model.train() + if epoch == 0: + model.save_networks('best') + elif acc >= model.best_acc: + model.save_networks('best') + diff --git a/detectors/NPR/util.py b/detectors/NPR/util.py new file mode 100755 index 0000000000000000000000000000000000000000..a41c60c8e33f942fe6cf8f09d57b0d538acb378b --- /dev/null +++ b/detectors/NPR/util.py @@ -0,0 +1,48 @@ +import sys +import os +import torch + + +def mkdirs(paths): + if isinstance(paths, list) and not isinstance(paths, str): + for path in paths: + mkdir(path) + else: + mkdir(paths) + + +def mkdir(path): + if not os.path.exists(path): + os.makedirs(path) + + +def unnormalize(tens, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]): + # assume tensor of shape NxCxHxW + return tens * torch.Tensor(std)[None, :, None, None] + torch.Tensor( + mean)[None, :, None, None] + + + + +class Logger(object): + """Log stdout messages.""" + + def __init__(self, outfile): + self.terminal = sys.stdout + self.log = open(outfile, "a") + sys.stdout = self + + def write(self, message): + self.terminal.write(message) + self.log.write(message) + + def flush(self): + self.terminal.flush() + + +def printSet(set_str): + set_str = str(set_str) + num = len(set_str) + print("="*num*3) + print(" "*num + set_str) + print("="*num*3) \ No newline at end of file diff --git a/detectors/NPR/validate.py b/detectors/NPR/validate.py new file mode 100755 index 0000000000000000000000000000000000000000..bc7028d3a4284464ea716b374720602300d0a21b --- /dev/null +++ b/detectors/NPR/validate.py @@ -0,0 +1,45 @@ +import torch +import numpy as np +from networks.resnet import resnet50 +from sklearn.metrics import average_precision_score, precision_recall_curve, accuracy_score +from options.test_options import TestOptions +from data import create_dataloader +from tqdm import tqdm + + + +def validate(model, data_loader): + with torch.no_grad(): + y_true, y_pred = [], [] + with tqdm(data_loader, unit='batch', mininterval=0.5) as tbatch: + tbatch.set_description(f'Validation') + for (img, label, _) in tbatch: + in_tens = img.cuda() + y_pred.extend(model(in_tens).sigmoid().flatten().tolist()) + y_true.extend(label.flatten().tolist()) + + y_true, y_pred = np.array(y_true), np.array(y_pred) + r_acc = accuracy_score(y_true[y_true==0], y_pred[y_true==0] > 0.5) + f_acc = accuracy_score(y_true[y_true==1], y_pred[y_true==1] > 0.5) + acc = accuracy_score(y_true, y_pred > 0.5) + ap = average_precision_score(y_true, y_pred) + print(f'Got accuracy {acc:.2f} \n') + return acc, ap, r_acc, f_acc, y_true, y_pred + + +if __name__ == '__main__': + opt = TestOptions().parse(print_options=False) + + model = resnet50(num_classes=1) + state_dict = torch.load(opt.model_path, map_location='cpu') + model.load_state_dict(state_dict['model']) + model.cuda() + model.eval() + + acc, avg_precision, r_acc, f_acc, y_true, y_pred = validate(model, opt) + + print("accuracy:", acc) + print("average precision:", avg_precision) + + print("accuracy of real images:", r_acc) + print("accuracy of fake images:", f_acc) diff --git a/detectors/P2G/.DS_Store b/detectors/P2G/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..c8bda4145932914d9d69d160116416790de52912 Binary files /dev/null and b/detectors/P2G/.DS_Store differ diff --git a/detectors/P2G/LICENSE b/detectors/P2G/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..7187dc8fd9ce64b23b911d35612bfac40c1ea920 --- /dev/null +++ b/detectors/P2G/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Francesco Laiti. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/detectors/P2G/README.md b/detectors/P2G/README.md new file mode 100644 index 0000000000000000000000000000000000000000..adbc75c3eedb7ebc6afaa4363e95403ad62e02bf --- /dev/null +++ b/detectors/P2G/README.md @@ -0,0 +1,34 @@ +# ✋ Prompt2Guard: Conditioned Prompt-Optimization for Continual Deepfake Detection + + +[![Official Github Repo](https://img.shields.io/badge/Github%20page-222222.svg?style=for-the-badge&logo=github)](https://github.com/laitifranz/Prompt2Guard) +[![Paper](https://img.shields.io/badge/-arXiv-B31B1B.svg?style=for-the-badge)](https://arxiv.org/abs/2407.21554) + + +Original Paper: [Conditioned Prompt-Optimization for Continual Deepfake Detection](https://arxiv.org/abs/2407.21554). + +Authors: Francesco Laiti, Benedetta Liberatori, Thomas De Min, Elisa Ricci + +## Abstract + +The rapid advancement of generative models has significantly enhanced the realism and customization of digital content creation. The increasing power of these tools, coupled with their ease of access, fuels the creation of photorealistic fake content, termed deepfakes, that raises substantial concerns about their potential misuse. In response, there has been notable progress in developing detection mechanisms to identify content produced by these advanced systems. + +However, existing methods often struggle to adapt to the continuously evolving landscape of deepfake generation. This paper introduces Prompt2Guard, a novel solution for exemplar-free continual deepfake detection of images, that leverages Vision-Language Models (VLMs) and domain-specific multimodal prompts. Compared to previous VLM-based approaches that are either bounded by prompt selection accuracy or necessitate multiple forward passes, we leverage a prediction ensembling technique with read-only prompts. Read-only prompts do not interact with VLMs internal representation, mitigating the need for multiple forward passes. Thus, we enhance efficiency and accuracy in detecting generated content. Additionally, our method exploits a text-prompt conditioning tailored to deepfake detection, which we demonstrate is beneficial in our setting. + +We evaluate Prompt2Guard on CDDB-Hard, a continual deepfake detection benchmark composed of five deepfake detection datasets spanning multiple domains and generators, achieving a new state-of-the-art. Additionally, our results underscore the effectiveness of our approach in addressing the challenges posed by continual deepfake detection, paving the way for more robust and adaptable solutions in deepfake detection. + +## Please Cite + +``` +@misc{laiti2024conditionedpromptoptimizationcontinualdeepfake, + title={Conditioned Prompt-Optimization for Continual + Deepfake Detection}, + author={Francesco Laiti and Benedetta Liberatori and + Thomas De Min and Elisa Ricci}, + year={2024}, + eprint={2407.21554}, + archivePrefix={arXiv}, + primaryClass={cs.CV}, + url={https://arxiv.org/abs/2407.21554}, +} +``` \ No newline at end of file diff --git a/detectors/P2G/__pycache__/parser.cpython-310.pyc b/detectors/P2G/__pycache__/parser.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5a47de68cb879c2cc938cc983b5f37d14b02ad04 Binary files /dev/null and b/detectors/P2G/__pycache__/parser.cpython-310.pyc differ diff --git a/detectors/P2G/checkpoint/.DS_Store b/detectors/P2G/checkpoint/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..4ad9585d07d5258000c763c4152a1bf3a530b1bb Binary files /dev/null and b/detectors/P2G/checkpoint/.DS_Store differ diff --git a/detectors/P2G/checkpoint/pretrained/.DS_Store b/detectors/P2G/checkpoint/pretrained/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..558f64c2312a1bbcf5d0fc0ba3ca154a4dc1371d Binary files /dev/null and b/detectors/P2G/checkpoint/pretrained/.DS_Store differ diff --git a/detectors/P2G/checkpoint/pretrained/weights/best.pt b/detectors/P2G/checkpoint/pretrained/weights/best.pt new file mode 100644 index 0000000000000000000000000000000000000000..741a1347da2bbf57c626ac53b64d639617d17094 --- /dev/null +++ b/detectors/P2G/checkpoint/pretrained/weights/best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15f07688d6c7f035fa886da4d03b1883c1738b8b2486336d5400d84475c4b771 +size 28458 diff --git a/detectors/P2G/configs/cddb_inference.json b/detectors/P2G/configs/cddb_inference.json new file mode 100644 index 0000000000000000000000000000000000000000..8fe2978db0fa0daa6daf6b9688a9d420fe7853cf --- /dev/null +++ b/detectors/P2G/configs/cddb_inference.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e1ca89037f003a2376b8db71aa15031c44b11bfe11242405dc4a1061a71fa6a +size 396 diff --git a/detectors/P2G/configs/cddb_training.json b/detectors/P2G/configs/cddb_training.json new file mode 100644 index 0000000000000000000000000000000000000000..e7ae8cf8f94e4b9c85bd804fa85c287ee754fb28 --- /dev/null +++ b/detectors/P2G/configs/cddb_training.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed704925f1b2661047fc50e46976e32aa0d715059df92afd0605c85ef462ead2 +size 978 diff --git a/detectors/P2G/configs/test.json b/detectors/P2G/configs/test.json new file mode 100644 index 0000000000000000000000000000000000000000..2783a6dbde6029ed4a8241669bba249fb9119e24 --- /dev/null +++ b/detectors/P2G/configs/test.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52fcb5abbaf1f773357f8564e6ca44787b59d3a9fd69c24db164287d15b79ebe +size 422 diff --git a/detectors/P2G/configs/test_template.json b/detectors/P2G/configs/test_template.json new file mode 100644 index 0000000000000000000000000000000000000000..403bcc4b5c50b43358af482348e104cdec4ec5d3 --- /dev/null +++ b/detectors/P2G/configs/test_template.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1788dfd27f46120a648332bae15b249a33844ca184501ed13c71489915eab4b1 +size 350 diff --git a/detectors/P2G/configs/train.json b/detectors/P2G/configs/train.json new file mode 100644 index 0000000000000000000000000000000000000000..de06ac63906a67936bf0688655a312b1372ad93a --- /dev/null +++ b/detectors/P2G/configs/train.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd707f19a87cf64774684a57a79b892e59cd80952daf104cc108bd8e46715601 +size 1151 diff --git a/detectors/P2G/configs/train_template.json b/detectors/P2G/configs/train_template.json new file mode 100644 index 0000000000000000000000000000000000000000..c06b2e4fcd00603bce7f84b7ca0b8434095a28c4 --- /dev/null +++ b/detectors/P2G/configs/train_template.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8b40f5e9ed5c0e36e8c1960a5e379d68ab87f8ad4fe75fde6bd70daa658407d +size 1014 diff --git a/detectors/P2G/detect.py b/detectors/P2G/detect.py new file mode 100644 index 0000000000000000000000000000000000000000..d5443d5b5a1cb1955ca36e6f34ff3c5071c6191e --- /dev/null +++ b/detectors/P2G/detect.py @@ -0,0 +1,219 @@ +import os +import sys +import time +import torch +import argparse +import json +import pickle +from PIL import Image + +# Add project root to path for imports +project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +sys.path.append(project_root) +# Also add this detector's src folder so `models` package inside P2G can be imported +this_dir = os.path.dirname(os.path.abspath(__file__)) +src_dir = os.path.join(this_dir, 'src') +if src_dir not in sys.path: + sys.path.insert(0, src_dir) + +from support.detect_utils import format_result, save_result, get_device, load_image + +# Import P2G model (now that src is on sys.path) +from models.slinet_det import SliNet + +def parse_args(): + parser = argparse.ArgumentParser(description='P2G single image detector') + parser.add_argument('--image', type=str, required=True, help='Path to input image') + parser.add_argument('--model', type=str, default=None, help='Path to model checkpoint (can be relative to detectors/P2G)') + parser.add_argument('--output', type=str, help='Path to save detection result JSON') + parser.add_argument('--device', type=str, help='Device to run on (e.g., cuda:0, cuda:1, cpu)') + parser.add_argument('--config', type=str, default='configs/test.json', help='Path to config file (relative to detectors/P2G)') + return parser.parse_args() + +def load_config(config_path): + """Load configuration from JSON file. Accepts absolute or relative path (relative to detectors/P2G).""" + if not os.path.isabs(config_path): + config_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), config_path) + with open(config_path, 'r') as f: + return json.load(f) + +def main(): + args = parse_args() + + # Setup device + if args.device: + if args.device.startswith('cuda') and not torch.cuda.is_available(): + print("CUDA is not available, falling back to CPU") + device = torch.device('cpu') + else: + device = torch.device(args.device) + else: + device = get_device() + + # Load config (JSON used by P2G) + try: + config = load_config(args.config) + except Exception as e: + print(f"Error loading config '{args.config}': {e}") + return + + # Resolve model checkpoint path (allow passing relative path or None) + if args.model is None: + model_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'checkpoint', 'pretrained', 'weights', 'best.pt') + else: + model_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'checkpoint', str(args.model), 'weights', 'best.pt') + + + if not os.path.exists(model_path): + print(f"Model checkpoint not found: {model_path}") + return + # Load checkpoint early so we can populate config/args expected by SliNet + try: + checkpoint = torch.load(model_path, map_location=device) + except Exception as e: + print(f"Error loading checkpoint '{model_path}': {e}") + return + + # populate required fields for SliNet from checkpoint (as prepare_model does in eval.py) + # Use checkpoint values when available, otherwise fall back to config or sensible defaults + try: + config['K'] = checkpoint.get('K', config.get('K', 5)) + config['topk_classes'] = checkpoint.get('topk_classes', config.get('topk_classes', 1)) + # eval.py used 'ensembling_flags' in checkpoint and assigned to args['ensembling'] + config['ensembling'] = checkpoint.get('ensembling_flags', config.get('ensembling', [False, False, False, False])) + # number of tasks + if 'tasks' in checkpoint: + config['num_tasks'] = checkpoint['tasks'] + 1 + config['task_name'] = range(config['num_tasks']) + except Exception: + # If any key is missing we continue with defaults — SliNet will raise more specific errors if needed + pass + + # ensure device in config is a torch.device + config['device'] = device + + # Instantiate model and load state dict + try: + model = SliNet(config) + state_dict = checkpoint.get('model_state_dict', checkpoint.get('model', checkpoint)) + model.load_state_dict(state_dict, strict=False) + model.eval() + model = model.to(device) + except Exception as e: + print(f"Error loading model: {e}") + return + + # Load and preprocess image + try: + image_tensor, _ = load_image(args.image, size=224) + image_tensor = image_tensor.to(device) + except Exception as e: + print(f"Error loading image: {e}") + return + + # Load object labels from classes.pkl + try: + pkl_path = os.path.join(src_dir, 'utils', 'classes.pkl') + with open(pkl_path, 'rb') as f: + object_labels_dict = pickle.load(f) + # Normalize to a relative path key like those stored in classes.pkl + rel_path = os.path.relpath(args.image, config.get('data_path', project_root)).replace(os.sep, '/') + candidates = [rel_path, rel_path.lstrip('/'), '/' + rel_path] + found_key = None + for k in candidates: + if k in object_labels_dict: + found_key = k + break + if found_key is None: + basename = os.path.basename(rel_path) + for k in object_labels_dict.keys(): + if k.endswith('/' + basename) or k.endswith(basename): + found_key = k + break + def ensure_topk_tuples(label_list, topk=5): + # Convert to tuples if needed + tuples = [(lbl, 1.0) if isinstance(lbl, str) else lbl for lbl in label_list] + # Remove duplicates, keep order + seen = set() + unique = [] + for t in tuples: + if t[0] not in seen: + unique.append(t) + seen.add(t[0]) + # Pad or truncate to topk + while len(unique) < topk: + unique.append(('unknown', 1.0)) + return unique[:topk] + + if found_key is None: + fallback_val = next(iter(object_labels_dict.values())) + object_label = ensure_topk_tuples(fallback_val, topk=5) + print(f"[warn] object label not found for '{rel_path}' (requested '{args.image}'), using fallback label") + else: + val = object_labels_dict[found_key] + object_label = ensure_topk_tuples(val, topk=5) + except Exception as e: + print(f"Error loading object labels: {e}") + return + + # Run detection + start_time = time.time() + with torch.no_grad(): + try: + # Always wrap as batch size 1 + object_label = [object_label] + outputs = model(image_tensor, object_label[0]) + + # Robust output handling: model may return logits, 2-class scores, or tensors of different shapes. + confidence = None + if isinstance(outputs, dict) and 'logits' in outputs: + out = torch.as_tensor(outputs['logits']).detach().cpu() + # If logits is 2-class, use softmax + if out.ndim == 2 and out.shape[1] == 2: + probs = torch.softmax(out, dim=1) + confidence = float(probs[0, 1]) + else: + confidence = float(torch.sigmoid(out.mean()).item()) + elif torch.is_tensor(outputs): + out = outputs.detach().cpu() + if out.ndim == 0: + confidence = float(torch.sigmoid(out).item()) + elif out.ndim == 1: + if out.numel() == 2: + probs = torch.softmax(out, dim=0) + confidence = float(probs[1]) + else: + confidence = float(torch.sigmoid(out.mean()).item()) + elif out.ndim == 2: + if out.shape[0] >= 1 and out.shape[1] == 2: + probs = torch.softmax(out, dim=1) + confidence = float(probs[0, 1]) + else: + confidence = float(torch.sigmoid(out.mean()).item()) + else: + confidence = float(torch.sigmoid(out.mean()).item()) + else: + try: + confidence = float(outputs) + except Exception: + confidence = 0.0 + + if confidence is None: + confidence = 0.0 + + label = 'fake' if confidence > 0.5 else 'real' + + result = format_result(label, confidence, time.time() - start_time) + print(f"Prediction: {result['prediction']}") + print(f"Confidence: {result['confidence']:.4f}") + print(f"Time: {result['elapsed_time']:.3f}s") + + # Save result if output path provided + if args.output: + save_result(result, args.output) + except Exception as e: + print(f"Error during detection: {e}") + return + +if __name__ == '__main__': + main() diff --git a/detectors/P2G/logs/gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre/2025-11-13-11:06:36/info.log b/detectors/P2G/logs/gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre/2025-11-13-11:06:36/info.log new file mode 100644 index 0000000000000000000000000000000000000000..dd8106ea335ec0603df51dac6403aeee5349cdac --- /dev/null +++ b/detectors/P2G/logs/gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre/2025-11-13-11:06:36/info.log @@ -0,0 +1,47 @@ +2025-11-13 11:06:36,644 [trainer.py] => batch_size: 32 +2025-11-13 11:06:36,645 [trainer.py] => batch_size_eval: 512 +2025-11-13 11:06:36,645 [trainer.py] => epochs: 20 +2025-11-13 11:06:36,645 [trainer.py] => warmup_epoch: 1 +2025-11-13 11:06:36,645 [trainer.py] => init_lr: 0.01 +2025-11-13 11:06:36,645 [trainer.py] => lrate: 0.01 +2025-11-13 11:06:36,645 [trainer.py] => label_smoothing: 0.1 +2025-11-13 11:06:36,645 [trainer.py] => topk_classes: 5 +2025-11-13 11:06:36,645 [trainer.py] => K: 7 +2025-11-13 11:06:36,645 [trainer.py] => enable_prev_prompt: False +2025-11-13 11:06:36,645 [trainer.py] => ensembling: [False, False, True, False] +2025-11-13 11:06:36,645 [trainer.py] => precision: fp16 +2025-11-13 11:06:36,645 [trainer.py] => dataset: TrueFake +2025-11-13 11:06:36,645 [trainer.py] => task_name: ['gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre'] +2025-11-13 11:06:36,645 [trainer.py] => data_path: /media/NAS/DeepShield/ +2025-11-13 11:06:36,645 [trainer.py] => split_file: /home/andrea.montibeller/Image-Deepfake-Detectors-Public-Library/split.json +2025-11-13 11:06:36,645 [trainer.py] => multiclass: [0, 0, 0, 0, 0] +2025-11-13 11:06:36,645 [trainer.py] => class_order: [0, 1] +2025-11-13 11:06:36,645 [trainer.py] => memory_size: 0 +2025-11-13 11:06:36,645 [trainer.py] => memory_per_class: 0 +2025-11-13 11:06:36,645 [trainer.py] => fixed_memory: True +2025-11-13 11:06:36,645 [trainer.py] => shuffle: False +2025-11-13 11:06:36,645 [trainer.py] => init_cls: 2 +2025-11-13 11:06:36,645 [trainer.py] => increment: 2 +2025-11-13 11:06:36,646 [trainer.py] => embd_dim: 768 +2025-11-13 11:06:36,646 [trainer.py] => torch_seed: 2 +2025-11-13 11:06:36,646 [trainer.py] => seed: [1993] +2025-11-13 11:06:36,646 [trainer.py] => EPSILON: 1e-08 +2025-11-13 11:06:36,646 [trainer.py] => init_milestones: [20, 30, 40] +2025-11-13 11:06:36,646 [trainer.py] => init_lr_decay: 0.1 +2025-11-13 11:06:36,646 [trainer.py] => init_weight_decay: 0.0005 +2025-11-13 11:06:36,646 [trainer.py] => milestones: [20, 30] +2025-11-13 11:06:36,646 [trainer.py] => lrate_decay: 0.1 +2025-11-13 11:06:36,646 [trainer.py] => weight_decay: 0.0002 +2025-11-13 11:06:36,646 [trainer.py] => run_name: gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre +2025-11-13 11:06:36,646 [trainer.py] => device: cuda:0 +2025-11-13 11:06:36,646 [trainer.py] => task: train +2025-11-13 11:06:36,646 [trainer.py] => num_workers: 12 +2025-11-13 11:06:36,646 [trainer.py] => config: configs/train.json +2025-11-13 11:06:36,646 [trainer.py] => wandb: False +2025-11-13 11:06:54,249 [data_manager.py] => [0, 1] +2025-11-13 11:06:54,255 [slinet.py] => CFG -> INPUTSIZE: (224, 224) +2025-11-13 11:06:54,256 [slinet.py] => CFG -> backbonename: ViT-B/16 +2025-11-13 11:06:57,340 [trainer.py] => All params: 149629697 +2025-11-13 11:06:57,341 [trainer.py] => Trainable params: 149629697 +2025-11-13 11:06:57,341 [prompt2guard.py] => Learning on 0-2 +2025-11-13 11:06:57,421 [prompt2guard.py] => Parameters to be updated: {'prompt_learner.0.img_prompt', 'prompt_learner.0.text_prompt'} diff --git a/detectors/P2G/logs/gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre/2025-11-13-11:24:26/info.log b/detectors/P2G/logs/gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre/2025-11-13-11:24:26/info.log new file mode 100644 index 0000000000000000000000000000000000000000..9b31244250ce41918b8b8b14b373ebbf28bf6595 --- /dev/null +++ b/detectors/P2G/logs/gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre/2025-11-13-11:24:26/info.log @@ -0,0 +1,47 @@ +2025-11-13 11:24:26,329 [trainer.py] => batch_size: 32 +2025-11-13 11:24:26,330 [trainer.py] => batch_size_eval: 512 +2025-11-13 11:24:26,330 [trainer.py] => epochs: 20 +2025-11-13 11:24:26,330 [trainer.py] => warmup_epoch: 1 +2025-11-13 11:24:26,330 [trainer.py] => init_lr: 0.01 +2025-11-13 11:24:26,330 [trainer.py] => lrate: 0.01 +2025-11-13 11:24:26,330 [trainer.py] => label_smoothing: 0.1 +2025-11-13 11:24:26,330 [trainer.py] => topk_classes: 5 +2025-11-13 11:24:26,330 [trainer.py] => K: 7 +2025-11-13 11:24:26,330 [trainer.py] => enable_prev_prompt: False +2025-11-13 11:24:26,330 [trainer.py] => ensembling: [False, False, True, False] +2025-11-13 11:24:26,330 [trainer.py] => precision: fp16 +2025-11-13 11:24:26,330 [trainer.py] => dataset: TrueFake +2025-11-13 11:24:26,330 [trainer.py] => task_name: ['gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre'] +2025-11-13 11:24:26,330 [trainer.py] => data_path: /media/NAS/DeepShield/ +2025-11-13 11:24:26,330 [trainer.py] => split_file: /home/andrea.montibeller/Image-Deepfake-Detectors-Public-Library/split.json +2025-11-13 11:24:26,330 [trainer.py] => multiclass: [0, 0, 0, 0, 0] +2025-11-13 11:24:26,330 [trainer.py] => class_order: [0, 1] +2025-11-13 11:24:26,330 [trainer.py] => memory_size: 0 +2025-11-13 11:24:26,330 [trainer.py] => memory_per_class: 0 +2025-11-13 11:24:26,330 [trainer.py] => fixed_memory: True +2025-11-13 11:24:26,330 [trainer.py] => shuffle: False +2025-11-13 11:24:26,330 [trainer.py] => init_cls: 2 +2025-11-13 11:24:26,330 [trainer.py] => increment: 2 +2025-11-13 11:24:26,330 [trainer.py] => embd_dim: 768 +2025-11-13 11:24:26,330 [trainer.py] => torch_seed: 2 +2025-11-13 11:24:26,330 [trainer.py] => seed: [1993] +2025-11-13 11:24:26,330 [trainer.py] => EPSILON: 1e-08 +2025-11-13 11:24:26,330 [trainer.py] => init_milestones: [20, 30, 40] +2025-11-13 11:24:26,330 [trainer.py] => init_lr_decay: 0.1 +2025-11-13 11:24:26,330 [trainer.py] => init_weight_decay: 0.0005 +2025-11-13 11:24:26,330 [trainer.py] => milestones: [20, 30] +2025-11-13 11:24:26,331 [trainer.py] => lrate_decay: 0.1 +2025-11-13 11:24:26,331 [trainer.py] => weight_decay: 0.0002 +2025-11-13 11:24:26,331 [trainer.py] => run_name: gan2:pre&sdXL:pre&realFFHQ:pre&realFORLAB:pre +2025-11-13 11:24:26,331 [trainer.py] => device: cuda:0 +2025-11-13 11:24:26,331 [trainer.py] => task: train +2025-11-13 11:24:26,331 [trainer.py] => num_workers: 12 +2025-11-13 11:24:26,331 [trainer.py] => config: configs/train.json +2025-11-13 11:24:26,331 [trainer.py] => wandb: False +2025-11-13 11:24:41,502 [data_manager.py] => [0, 1] +2025-11-13 11:24:41,509 [slinet.py] => CFG -> INPUTSIZE: (224, 224) +2025-11-13 11:24:41,509 [slinet.py] => CFG -> backbonename: ViT-B/16 +2025-11-13 11:24:44,571 [trainer.py] => All params: 149629697 +2025-11-13 11:24:44,572 [trainer.py] => Trainable params: 149629697 +2025-11-13 11:24:44,572 [prompt2guard.py] => Learning on 0-2 +2025-11-13 11:24:44,649 [prompt2guard.py] => Parameters to be updated: {'prompt_learner.0.text_prompt', 'prompt_learner.0.img_prompt'} diff --git a/detectors/P2G/parser.py b/detectors/P2G/parser.py new file mode 100644 index 0000000000000000000000000000000000000000..1b333c98e986c6597d428c59fb515acc0feb995d --- /dev/null +++ b/detectors/P2G/parser.py @@ -0,0 +1,15 @@ +import argparse + +def get_parser(): + parser = argparse.ArgumentParser() + parser.add_argument("--name", type=str, default="test", help="run name") + parser.add_argument("--device", type=str, default="cuda:0", help="cuda device to use") + parser.add_argument("--split_file", type=str, help="Path to split json") + parser.add_argument("--data_root", type=str, help="Path to dataset") + parser.add_argument("--data_keys", type=str, help="Dataset specifications") + + parser.add_argument("--task", type=str, help="Unused") + parser.add_argument("--num_threads", type=int, help="Unused") + parser.add_argument("--num_epoches", type=int, help="Unused") + + return parser \ No newline at end of file diff --git a/detectors/P2G/src/.DS_Store b/detectors/P2G/src/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..154b1b3bf578e6784017659462d0be5f9bdb7339 Binary files /dev/null and b/detectors/P2G/src/.DS_Store differ diff --git a/detectors/P2G/src/__pycache__/eval.cpython-310.pyc b/detectors/P2G/src/__pycache__/eval.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c514afd67fd958d3eafa8a06e1ba9b3f22154d85 Binary files /dev/null and b/detectors/P2G/src/__pycache__/eval.cpython-310.pyc differ diff --git a/detectors/P2G/src/__pycache__/trainer.cpython-310.pyc b/detectors/P2G/src/__pycache__/trainer.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b6c94b69ecc3dd9178fa21599b79f87a6a3d6e7a Binary files /dev/null and b/detectors/P2G/src/__pycache__/trainer.cpython-310.pyc differ diff --git a/detectors/P2G/src/eval.py b/detectors/P2G/src/eval.py new file mode 100644 index 0000000000000000000000000000000000000000..83582d36963428c0ad47522389304496d31d91db --- /dev/null +++ b/detectors/P2G/src/eval.py @@ -0,0 +1,550 @@ +import argparse +import json +import os +from tqdm import tqdm +import io +import pickle +import copy +import torch +from torch.utils.data import DataLoader, Dataset +from torchvision import transforms +from PIL import Image +import numpy as np +from einops import reduce, rearrange +import bisect +from models.slinet import SliNet +import pandas as pd +import time +from sklearn.metrics import roc_auc_score, accuracy_score +def parse_dataset(data_keys): + gen_keys = { + 'gan1':['StyleGAN'], + 'gan2':['StyleGAN2'], + 'gan3':['StyleGAN3'], + 'sd15':['StableDiffusion1.5'], + 'sd2':['StableDiffusion2'], + 'sd3':['StableDiffusion3'], + 'sdXL':['StableDiffusionXL'], + 'flux':['FLUX.1'], + 'realFFHQ':['FFHQ'], + 'realFORLAB':['FORLAB'] + } + + gen_keys['all'] = [gen_keys[key][0] for key in gen_keys.keys()] + # gen_keys['gan'] = [gen_keys[key][0] for key in gen_keys.keys() if 'gan' in key] + # gen_keys['sd'] = [gen_keys[key][0] for key in gen_keys.keys() if 'sd' in key] + gen_keys['real'] = [gen_keys[key][0] for key in gen_keys.keys() if 'real' in key] + + mod_keys = { + 'pre': ['PreSocial'], + 'fb': ['Facebook'], + 'tl': ['Telegram'], + 'tw': ['Twitter'], + } + + mod_keys['all'] = [mod_keys[key][0] for key in mod_keys.keys()] + mod_keys['shr'] = [mod_keys[key][0] for key in mod_keys.keys() if key in ['fb', 'tl', 'tw']] + + dataset_list = [] + for data in data_keys.split('&'): + gen, mod = data.split(':') + dataset_list.append({'gen':gen_keys[gen], 'mod':mod_keys[mod]}) + + return dataset_list + +class DummyDataset(Dataset): + def __init__(self, data_path, data_type, data_scenario, data_compression, split_file=None): + self.do_compress = [ + data_compression[0], + data_compression[1], + ] # enable/disable compression from flag - jpeg quality + self.trsf = transforms.Compose( + [ + transforms.Resize( + 256, interpolation=transforms.InterpolationMode.BICUBIC + ), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ), + ] + ) + + images = [] + labels = [] + + # print(f'--- Data compression: {data_compression} ---') + + if data_type == "cddb": + if data_scenario == "cddb_hard": + subsets = [ + "gaugan", + "biggan", + "wild", + "whichfaceisreal", + "san", + ] # <- CDDB Hard + multiclass = [0, 0, 0, 0, 0] + elif data_scenario == "ood": + subsets = ["deepfake", "glow", "stargan_gf"] # <- OOD experiments + multiclass = [0, 1, 1] + else: + raise RuntimeError( + f"Unexpected data_scenario value: {data_scenario}. Expected 'cddb_hard' or 'ood'." + ) + print(f"--- Test on {subsets} with {data_scenario} scenario ---") + for id, name in enumerate(subsets): + root_ = os.path.join(data_path, name, "val") + # sub_classes = [''] + sub_classes = os.listdir(root_) if multiclass[id] else [""] + for cls in sub_classes: + for imgname in os.listdir(os.path.join(root_, cls, "0_real")): + images.append(os.path.join(root_, cls, "0_real", imgname)) + labels.append(0 + 2 * id) + + for imgname in os.listdir(os.path.join(root_, cls, "1_fake")): + images.append(os.path.join(root_, cls, "1_fake", imgname)) + labels.append(1 + 2 * id) + + elif data_type == "TrueFake": + print(f"--- Test on {data_scenario} ---") + + with open(split_file, "r") as f: + splits = json.load(f) + test_split = sorted(splits["test"]) + + + dataset_list = parse_dataset(data_scenario) + + for dict in dataset_list: + generators = dict['gen'] + modifiers = dict['mod'] + + for mod in modifiers: + for dataset_root, dataset_dirs, dataset_files in os.walk(os.path.join(data_path, mod), topdown=True, followlinks=True): + if len(dataset_dirs): + continue + + (label, gen, sub) = f'{dataset_root}/'.replace(os.path.join(data_path, mod) + os.sep, '').split(os.sep)[:3] + + if gen in generators: + for filename in sorted(dataset_files): + if os.path.splitext(filename)[1].lower() in ['.png', '.jpg', '.jpeg']: + if self._in_list(test_split, os.path.join(gen, sub, os.path.splitext(filename)[0])): + images.append(os.path.join(dataset_root, filename)) + labels.append(1 if label == 'Fake' else 0) + + else: + pass + + assert len(images) == len(labels), "Data size error!" + self.images = images + self.labels = labels + self.dataset_path = data_path + + with open("./src/utils/classes.pkl", "rb") as f: + self.object_labels = pickle.load(f) + + def __len__(self): + return len(self.images) + + def __getitem__(self, idx): + img_path = os.path.join(self.dataset_path, self.images[idx]) + image = self.trsf( + self.pil_loader(img_path, self.do_compress[0], self.do_compress[1]) + ) + label = self.labels[idx] + #object_label = self.object_labels[img_path.replace(self.dataset_path, "")][0:5] + + # Normalize to a relative path key like those stored in classes.pkl + rel_path = os.path.relpath(img_path, self.dataset_path).replace(os.sep, '/') + + # Try a few variants to match keys stored in classes.pkl + candidates = [rel_path, rel_path.lstrip('/'), '/' + rel_path] + found_key = None + for k in candidates: + if k in self.object_labels: + found_key = k + break + + # If not found, try matching by basename (may be ambiguous but prevents crash) + if found_key is None: + basename = os.path.basename(rel_path) + for k in self.object_labels.keys(): + if k.endswith('/' + basename) or k.endswith(basename): + found_key = k + break + + # If still not found, fall back to the first available label entry to avoid KeyError + if found_key is None: + # pick any available entry as fallback (preserve expected structure) + fallback_val = next(iter(self.object_labels.values())) + object_label = fallback_val[0:5] + print(f"[warn] object label not found for '{rel_path}' (requested '{img_path}'), using fallback label") + else: + object_label = self.object_labels[found_key][0:5] + + return object_label, image, label, img_path + + def pil_loader(self, path, do_compress, quality): + with open(path, "rb") as f: + if do_compress: + f = self.compress_image_to_memory(path, quality=quality) + img = Image.open(f) + return img.convert("RGB") + + def compress_image_to_memory(self, path, quality): + with Image.open(path) as img: + output = io.BytesIO() + img.save(output, "JPEG", quality=quality) + output.seek(0) + return output + + def _in_list(self, split, elem): + i = bisect.bisect_left(split, elem) + return i != len(split) and split[i] == elem + +def setup_parser(): + parser = argparse.ArgumentParser( + description="Reproduce of multiple continual learning algorithms." + ) + # parser.add_argument( + # "--scenario", type=str, default="cddb_hard", help="scenario to test" + # ) + parser.add_argument("--resume", type=str, default="", help="resume model") + parser.add_argument( + "--random_select", action="store_true", help="use random select" + ) + parser.add_argument( + "--upperbound", action="store_true", help="use groundtruth task identification" + ) + parser.add_argument( + "--config", + type=str, + default="configs/cddb_inference.json", + help="Json file of settings.", + ) + parser.add_argument( + "--data_path", + type=str, + default="/home/francesco.laiti/datasets/CDDB/", + help="data path", + ) + parser.add_argument("--datatype", type=str, default="deepfake", help="data type") + parser.add_argument( + "--compression", type=bool, default=False, help="test on compressed data" + ) + parser.add_argument( + "--c_quality", + type=int, + default=100, + help="quality of JPEG compressed (100, 90, 50...)", + ) + return parser + + +def load_json(settings_path): + with open(settings_path) as data_file: + param = json.load(data_file) + return param + + +def load_configuration(): + args = setup_parser().parse_args() + param = load_json(args.config) + if args.resume == "": + args.resume = f'./best.pt' + args_dict = vars(args) + args_dict.update(param) + return args_dict + + +def compute_predictions(outputs): + predictions = {} + + # Top1 + outputs_top1 = rearrange(outputs, "b t p -> b (t p)") + _, predicts_top1 = outputs_top1.max(dim=1) + predictions["top1"] = predicts_top1 % 2 + + # Mean + outputs_mean = reduce(outputs, "b t p -> b p", "mean") + predictions["mean"] = torch.argmax(outputs_mean, dim=-1) + + # Mixture of experts (top & mean) + r_f_tensor = rearrange(outputs, "b t p -> b p t") + r_f_max, _ = torch.max(r_f_tensor, dim=-1) + r_f_mean = reduce(r_f_tensor, "b p t -> b p", "mean") + diff_max = torch.abs(r_f_max[:, 0] - r_f_max[:, 1]) + diff_mean = torch.abs(r_f_mean[:, 0] - r_f_mean[:, 1]) + conditions = diff_mean > diff_max + predicts_based_on_mean = torch.where( + r_f_mean[:, 0] > r_f_mean[:, 1], + torch.zeros_like(conditions), + torch.ones_like(conditions), + ) + predicts_based_on_max = torch.where( + r_f_max[:, 0] > r_f_max[:, 1], + torch.zeros_like(conditions), + torch.ones_like(conditions), + ) + predictions["mix_top_mean"] = torch.where( + conditions, predicts_based_on_mean, predicts_based_on_max + )*1 + + return predictions + + +def accuracy_binary(y_pred, y_true, increment=2): + assert len(y_pred) == len(y_true), "Data length error." + all_acc = {} + all_acc["total"] = float( + "{:.2f}".format((y_pred % 2 == y_true % 2).sum() * 100 / len(y_true)) + ) # * Task-agnostic AA * + + task_acc = [] + for class_id in range(0, np.max(y_true), increment): + idxes = np.where( + np.logical_and(y_true >= class_id, y_true < class_id + increment) + )[0] + label = "{}-{}".format( + str(class_id).rjust(2, "0"), str(class_id + increment - 1).rjust(2, "0") + ) + acc = ((y_pred[idxes] % 2) == (y_true[idxes] % 2)).sum() * 100 / len(idxes) + all_acc[label] = float("{:.2f}".format(acc)) + task_acc.append(acc) + all_acc["task_wise"] = float( + "{:.2f}".format(sum(task_acc) / len(task_acc)) + ) # * Average Accuracy (AA) or Task-wise AA * + return all_acc + + +def prepare_model(args): + checkpoint = torch.load(f'./checkpoint/{args["run_name"]}/weights/best.pt', map_location=args["device"]) + # update config args + args["K"] = checkpoint["K"] + args["topk_classes"] = checkpoint["topk_classes"] + args["ensembling"] = checkpoint["ensembling_flags"] + + # load all prototypes + keys_dict = { + "all_keys": checkpoint["keys"]["all_keys"].unsqueeze(0), # * [Task, N_cluster = 5, 512] + "all_keys_one_cluster": checkpoint["keys"]["all_keys_one_cluster"].unsqueeze(0), # * [Task, 512] + "real_keys_one_cluster": checkpoint["keys"]["real_keys_one_cluster"].unsqueeze(0), # * [Task, 512] + "fake_keys_one_cluster": checkpoint["keys"]["fake_keys_one_cluster"].unsqueeze(0), # * [Task, 512] + } + for key in keys_dict.keys(): + print(f"--- {key}: {keys_dict[key].shape} ---") + + # print(checkpoint["tasks"]) + args["num_tasks"] = checkpoint["tasks"] + 1 + print(f"--- Number of tasks: {args['num_tasks']} ---") + args["task_name"] = range(args["num_tasks"]) + + # build and load model + model = SliNet(args) + model.load_state_dict(checkpoint["model_state_dict"], strict=False) + model.eval() + model = model.to(args["device"]) + + print(f"--- Run: {checkpoint.get('run_name', 'not available')} ---") + + return model, keys_dict + + +def prepare_data_loader(args): + test_dataset = DummyDataset( + args["data_path"], + args["dataset"], + args["scenario"], + [args["compression"], args["c_quality"]], + args["split_file"], + ) + return DataLoader( + test_dataset, + batch_size=args["batch_size_eval"], + shuffle=False, + num_workers=int(os.environ.get("SLURM_CPUS_ON_NODE", 2)), + ) + + +@torch.no_grad +def inference_step(args, model: SliNet, test_loader, keys_dict): + start_time = time.time() + + total_tasks = args["num_tasks"] + + def upperbound_selection(targets): + domain_indices = torch.div(targets, 2, rounding_mode="floor") + domain_prob = torch.zeros( + (len(targets), total_tasks), dtype=torch.float16, device=args["device"] + ) + domain_prob[torch.arange(len(targets)), domain_indices] = 1.0 + return domain_prob + + def process_batch(inputs, targets, object_name): + keys_dict["upperbound"] = upperbound_selection(targets) + if args["upperbound"]: + keys_dict["prototype"] = "upperbound" + outputs = model.interface(inputs, object_name, total_tasks, keys_dict) + + if args["softmax"]: + outputs = torch.nn.functional.softmax(outputs, dim=-1) + return compute_predictions(outputs) + + # File paths + output_dir = f'./results/{args["run_name"]}/data/{args["scenario"]}' + os.makedirs(output_dir, exist_ok=True) + csv_filename = os.path.join(output_dir, 'results.csv') + metrics_filename = os.path.join(output_dir, 'metrics.json') + image_results_filename = os.path.join(output_dir, 'image_results.json') + + # Extract training dataset keys from run_name (format: "training_keys_freeze_down" or "training_keys") + training_dataset_keys = [] + run_name = args["run_name"] + if '_freeze_down' in run_name: + training_name = run_name.replace('_freeze_down', '') + else: + training_name = run_name + if '&' in training_name: + training_dataset_keys = training_name.split('&') + else: + training_dataset_keys = [training_name] + + # Collect all results + all_predictions_top1 = [] + all_predictions_mean = [] + all_predictions_mix = [] + all_labels = [] + all_binary_labels = [] + all_paths = [] + image_results = [] + + # Write CSV header + with open(csv_filename, 'w') as f: + f.write(f"{','.join(['name', 'pro_top', 'pro_mean', 'pro_mix', 'flag'])}\n") + + for _, (object_name, inputs, targets, paths) in tqdm(enumerate(test_loader), total=len(test_loader), mininterval=5): + inputs, targets = inputs.to(args["device"]), targets.to(args["device"]) + predictions = process_batch(inputs, targets, object_name) + + # Collect results + for score_top, score_mean, score_mix, label, path in zip(predictions['top1'], predictions['mean'], predictions['mix_top_mean'], targets, paths): + label_val = label.item() + binary_label = label_val % 2 # Convert to binary (task-agnostic) + + all_predictions_top1.append(score_top.item()) + all_predictions_mean.append(score_mean.item()) + all_predictions_mix.append(score_mix.item()) + all_labels.append(label_val) + all_binary_labels.append(binary_label) + all_paths.append(path) + + image_results.append({ + 'path': path, + 'score_top1': score_top.item(), + 'score_mean': score_mean.item(), + 'score_mix': score_mix.item(), + 'label': label_val, + 'binary_label': binary_label + }) + + # Write to CSV (maintain backward compatibility) + with open(csv_filename, 'a') as f: + for score_top, score_mean, score_mix, label, path in zip(predictions['top1'], predictions['mean'], predictions['mix_top_mean'], targets, paths): + f.write(f"{path}, {score_top.item()}, {score_mean.item()}, {score_mix.item()}, {label.item()}\n") + + # Calculate metrics using 'mix_top_mean' as primary prediction method + all_predictions_mix = np.array(all_predictions_mix) + all_binary_labels = np.array(all_binary_labels) + + # Predictions are already binary (0 or 1) + predictions = all_predictions_mix.astype(int) + + # Calculate overall metrics + total_accuracy = accuracy_score(all_binary_labels, predictions) + + # TPR (True Positive Rate) = TP / (TP + FN) = accuracy on fake images (label==1) + fake_mask = all_binary_labels == 1 + if fake_mask.sum() > 0: + tpr = accuracy_score(all_binary_labels[fake_mask], predictions[fake_mask]) + else: + tpr = 0.0 + + + # Calculate TNR on real images (label==0) in the test set + real_mask = all_binary_labels == 0 + if real_mask.sum() > 0: + # Overall TNR calculated on all real images in the test set + tnr = accuracy_score(all_binary_labels[real_mask], predictions[real_mask]) + else: + tnr = 0.0 + + # AUC calculation + # For AUC, we need probabilities. Since predictions are binary (0/1), we'll use the scores + # We need to convert binary predictions to probabilities. Since we don't have raw logits, + # we'll use a simple approach: normalize predictions or use a threshold-based probability + if len(np.unique(all_binary_labels)) > 1: # Need both classes for AUC + # Use predictions directly as probabilities (they're already 0/1, but AUC needs continuous) + # For binary predictions, we can create probabilities based on the score distribution + # Since mix_top_mean gives us binary predictions, we'll use a simple approach: + # Create probabilities by normalizing or using the predictions directly + # Actually, for AUC with binary predictions, we can use the predictions as-is + # But ideally we'd have probabilities. For now, we'll calculate AUC using predictions + # Note: This might not be ideal, but works for binary classifier outputs + try: + auc = roc_auc_score(all_binary_labels, predictions.astype(float)) + except: + auc = 0.0 + else: + auc = 0.0 + + execution_time = time.time() - start_time + + # Prepare metrics JSON + metrics = { + 'TPR': float(tpr), + 'TNR': float(tnr), + 'Acc total': float(total_accuracy), + 'AUC': float(auc), + 'execution time': float(execution_time) + } + + # Write metrics JSON + with open(metrics_filename, 'w') as f: + json.dump(metrics, f, indent=2) + + # Write individual image results JSON + with open(image_results_filename, 'w') as f: + json.dump(image_results, f, indent=2) + + print(f'\nMetrics saved to {metrics_filename}') + print(f'Image results saved to {image_results_filename}') + print(f'\nMetrics (using mix_top_mean):') + print(f' TPR: {tpr:.4f}') + print(f' TNR: {tnr:.4f}') + print(f' Accuracy: {total_accuracy:.4f}') + print(f' AUC: {auc:.4f}') + print(f' Execution time: {execution_time:.2f} seconds') + + +def pretty_print(data): + return json.dumps(data, indent=4, sort_keys=True) + + +if __name__ == "__main__": + args = load_configuration() + print(args) + #args["device"] = "cuda" if torch.cuda.is_available() else "cpu" + scenarios = copy.deepcopy(args["scenario"]) + model, keys_dict = prepare_model(args) + keys_dict["prototype"] = args["prototype"] + + for s in scenarios: + args["scenario"] = s + os.makedirs(f'./results/{args["run_name"]}/data/{args["scenario"]}', exist_ok=True) + + test_loader = prepare_data_loader(args) + inference_step(args, model, test_loader, keys_dict) + \ No newline at end of file diff --git a/detectors/P2G/src/methods/__init__.py b/detectors/P2G/src/methods/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/detectors/P2G/src/methods/__pycache__/__init__.cpython-310.pyc b/detectors/P2G/src/methods/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8f1538a8da8fddbc02752492c6b74f8805659838 Binary files /dev/null and b/detectors/P2G/src/methods/__pycache__/__init__.cpython-310.pyc differ diff --git a/detectors/P2G/src/methods/__pycache__/prompt2guard.cpython-310.pyc b/detectors/P2G/src/methods/__pycache__/prompt2guard.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1e6c1e703ed92f0b7aaaac4955d2e187e4000a99 Binary files /dev/null and b/detectors/P2G/src/methods/__pycache__/prompt2guard.cpython-310.pyc differ diff --git a/detectors/P2G/src/methods/prompt2guard.py b/detectors/P2G/src/methods/prompt2guard.py new file mode 100644 index 0000000000000000000000000000000000000000..76afcadfbce158a48bf5d999ad994b7642b5850d --- /dev/null +++ b/detectors/P2G/src/methods/prompt2guard.py @@ -0,0 +1,785 @@ +import torch +from torch import optim +from torch.nn import functional as F +from torch.utils.data import DataLoader + +import logging +import numpy as np +from tqdm import tqdm +from sklearn.cluster import KMeans +import os + +from utils.toolkit import tensor2numpy, accuracy_domain +from models.slinet import SliNet +from utils.lr_scheduler import build_lr_scheduler +from utils.data_manager import DataManager +from eval import compute_predictions + +import wandb + + +class Prompt2Guard: + + def __init__(self, args: dict): + # Network and device settings + self.network = SliNet(args) + self.device = args["device"] + self.class_num = self.network.class_num + + # Task and class settings + self.cur_task = -1 + self.n_clusters = 5 + self.n_cluster_one = 1 + self.known_classes = 0 + self.total_classes = 0 + + # Key settings, different clusters tested + self.all_keys = [] # consider n_clusters image prototypes for each domain + self.all_keys_one_vector = [] # consider 1 image prototype for each domain + self.real_keys_one_vector = [] # only real images considered to build the prototype + self.fake_keys_one_vector = [] # only fake images considered to build the prototype + + # Learning parameters + self.EPSILON = args["EPSILON"] + self.init_lr = args["init_lr"] + self.init_lr_decay = args["init_lr_decay"] + self.init_weight_decay = args["init_weight_decay"] + self.epochs = args["epochs"] + self.warmup_epoch = args["warmup_epoch"] + self.lrate = args["lrate"] + self.lrate_decay = args["lrate_decay"] + self.batch_size = args["batch_size"] + self.batch_size_eval = args["batch_size_eval"] + self.weight_decay = args["weight_decay"] + self.label_smoothing = args["label_smoothing"] + self.enable_prev_prompt = args["enable_prev_prompt"] + + # System settings + self.num_workers = int( + os.environ.get("SLURM_CPUS_ON_NODE", args["num_workers"]) + ) + self.filename = args["filename"] + + # Other settings + self.args = args + + # # wandb setup + # slurm_job_name = os.environ.get("SLURM_JOB_NAME", 'prompt2guard') + # if slurm_job_name == "bash": + # slurm_job_name += "/localtest" + + # self.wandb_logger = wandb.init( + # project=slurm_job_name.split("/")[0], + # entity="YOUR_USERNAME", + # name=slurm_job_name.split("/")[1], + # mode="disabled" if not args["wandb"] else "online", + # config=args, + # ) + # if self.wandb_logger is None: + # raise ValueError("Failed to initialize wandb logger") + + # self.wandb_logger.define_metric("epoch") + # self.wandb_logger.define_metric("task") + # self.wandb_logger.define_metric("condition") + # self.wandb_logger.define_metric("task_*", step_metric="epoch") + # self.wandb_logger.define_metric("eval_trainer/*", step_metric="task") + # self.wandb_logger.define_metric("inference_*", step_metric="condition") + + def after_task(self, nb_tasks): + self.known_classes = self.total_classes + if self.enable_prev_prompt and self.network.numtask < nb_tasks: + with torch.no_grad(): + self.network.prompt_learner[self.network.numtask].load_state_dict( + self.network.prompt_learner[self.network.numtask - 1].state_dict() + ) + + def incremental_train(self, data_manager: DataManager): + self.cur_task += 1 + self.total_classes = self.known_classes + data_manager.get_task_size( + self.cur_task + ) + self.network.update_fc() + + logging.info("Learning on {}-{}".format(self.known_classes, self.total_classes)) + + train_dataset = data_manager.get_dataset( + np.arange(self.known_classes, self.total_classes), + source="train", + mode="train", + ) + self.train_loader = DataLoader( + train_dataset, + batch_size=self.batch_size, + shuffle=True, + num_workers=self.num_workers, + ) + test_dataset = data_manager.get_dataset( + np.arange(0, self.total_classes), source="test", mode="test" + ) + self.test_loader = DataLoader( + test_dataset, + batch_size=self.batch_size_eval, + shuffle=False, + num_workers=self.num_workers, + ) + + self._train(self.train_loader, self.test_loader) + self.clustering(self.train_loader) + + def _train(self, train_loader, test_loader): + self.network.to(self.device) + for name, param in self.network.named_parameters(): + param.requires_grad_(False) + if "prompt_learner" + "." + str(self.network.numtask - 1) in name: + param.requires_grad_(True) + + # Double check + enabled = set() + for name, param in self.network.named_parameters(): + if param.requires_grad: + enabled.add(name) + logging.info(f"Parameters to be updated: {enabled}") + + if self.cur_task == 0: + optimizer = optim.SGD( + self.network.parameters(), + momentum=0.9, + lr=self.init_lr, + weight_decay=self.init_weight_decay, + ) + scheduler = build_lr_scheduler( + optimizer, + lr_scheduler="cosine", + warmup_epoch=self.warmup_epoch, + warmup_type="constant", + warmup_cons_lr=1e-5, + max_epoch=self.epochs, + ) + self.run_epoch = self.epochs + self.train_function(train_loader, test_loader, optimizer, scheduler) + else: + optimizer = optim.SGD( + self.network.parameters(), + momentum=0.9, + lr=self.lrate, + weight_decay=self.weight_decay, + ) + scheduler = build_lr_scheduler( + optimizer, + lr_scheduler="cosine", + warmup_epoch=self.warmup_epoch, + warmup_type="constant", + warmup_cons_lr=1e-5, + max_epoch=self.epochs, + ) + self.run_epoch = self.epochs + self.train_function(train_loader, test_loader, optimizer, scheduler) + + def train_function(self, train_loader, test_loader, optimizer, scheduler): + prog_bar = tqdm(range(self.run_epoch)) + best_acc = 0.0 # Already present, used for tracking + + # --- Added: Define save path and ensure directory exists --- + # Using the same path as your original save_checkpoint method + save_dir = f'./checkpoint/{self.args["run_name"]}/weights/' + os.makedirs(save_dir, exist_ok=True) + save_path = os.path.join(save_dir, 'best.pt') + # --------------------------------------------------------- + + for _, epoch in enumerate(prog_bar): + losses = 0.0 + correct, total = 0, 0 + + # Set network to train mode + self.network.train() + with tqdm(train_loader, unit='batch', mininterval=10) as tepoch: + tepoch.set_description(f'Epoch {epoch}', refresh=False) + for i, (object_name, inputs, targets) in enumerate(train_loader): + inputs, targets = inputs.to(self.device), targets.to(self.device) + mask = (targets >= self.known_classes).nonzero().view(-1) + inputs = torch.index_select(inputs, 0, mask) + targets = torch.index_select(targets, 0, mask) - self.known_classes + + logits = self.network(inputs, object_name)["logits"] + loss = F.cross_entropy( + logits, targets, label_smoothing=self.label_smoothing + ) + optimizer.zero_grad() + loss.backward() + optimizer.step() + losses += loss.item() + tepoch.set_postfix(loss=loss.item()) + + _, preds = torch.max(logits, dim=1) + correct += preds.eq(targets.expand_as(preds)).cpu().sum() + total += len(targets) + #if i> 10: + # break + + scheduler.step() + train_acc = np.around(tensor2numpy(correct) * 100 / total, decimals=2) + + # Set network to eval mode for computing test_acc + self.network.eval() + test_acc = self._compute_accuracy_domain(self.network, test_loader, epoch) + + # --- Added: Checkpoint saving logic --- + if test_acc > best_acc: + best_acc = test_acc + logging.info(f"New best accuracy: {best_acc}. Saving checkpoint to {save_path}") + + # --- Dynamic values --- + model_state_dict = { + name: param.cpu() # Move to CPU for saving + for name, param in self.network.named_parameters() + if param.requires_grad + } + + # --- Hardcoded values --- + + # WARNING: The 'all_keys' tensor data was incomplete in the prompt (contained '...'). + # It is NOT included here. Please provide the full tensor to save it. + logging.warning("Checkpoint 'all_keys' is not being saved as the provided data was incomplete.") + + # These lists are complete and will be saved. + all_keys_one_cluster_data = [ + 1.9211e-02, -7.6294e-02, 3.2578e-03, 2.5272e-03, -4.6310e-03, + 4.3297e-03, 1.8418e-05, -4.9782e-03, 2.2526e-03, -2.1114e-03, + 1.0109e-02, -2.7512e-02, 9.0561e-03, -1.9394e-02, -1.4694e-02, + 1.3664e-02, 3.0479e-03, -9.8724e-03, -5.1956e-03, -3.5648e-03, + 3.1799e-02, 6.7043e-04, -7.5684e-03, 4.4441e-03, 4.3869e-03, + 5.9395e-03, 1.1765e-02, 5.3444e-03, 3.5152e-03, -4.1580e-03, + 5.9853e-03, -2.6245e-03, -4.7264e-03, -6.5956e-03, -3.2501e-02, + -1.3824e-02, -5.6305e-03, -5.0850e-03, 4.1290e-02, -1.0567e-02, + -2.3212e-03, -1.3599e-03, -9.1782e-03, -1.9608e-03, -5.6496e-03, + -6.1989e-03, -3.9558e-03, 1.1358e-03, 9.8801e-04, 9.9659e-04, + 2.2011e-03, 1.1787e-02, 9.9411e-03, -4.1938e-04, -7.4120e-03, + -3.0609e-02, 1.2871e-02, 2.3331e-02, -3.1972e-04, 1.3802e-02, + 2.4109e-02, -1.4542e-02, -5.2214e-04, 2.3880e-03, -8.7967e-03, + 2.3079e-03, 7.1793e-03, -1.9104e-02, 2.1095e-03, 4.9095e-03, + 1.4954e-02, 3.6407e-02, 3.2745e-02, 7.2365e-03, 1.6739e-02, + 5.6763e-03, 1.4481e-02, 1.3771e-02, 3.7212e-03, -2.2945e-03, + 2.4948e-02, 3.3936e-02, -1.8433e-02, 4.9639e-04, -1.2941e-03, + 8.9417e-03, -1.5440e-03, -9.7427e-03, 7.0152e-03, 4.7827e-04, + 6.1264e-03, -6.9313e-03, 8.3008e-03, 1.0155e-02, -8.6136e-03, + -6.0539e-03, 1.7578e-02, 1.7548e-03, 7.9727e-04, 2.1957e-02, + 5.8098e-03, 1.1665e-02, 1.6342e-02, -5.2185e-02, -8.6746e-03, + -2.7733e-03, -3.7518e-03, -4.6921e-03, -1.7366e-03, -1.6159e-02, + 1.6184e-03, -1.4053e-02, 2.7065e-03, 5.7640e-03, 2.0294e-03, + -1.1093e-02, -2.4395e-03, 7.4310e-03, 6.1760e-03, -9.0103e-03, + -2.3937e-03, -3.1986e-03, 5.9891e-03, 4.6448e-02, 1.3718e-02, + 7.8506e-03, 2.5024e-02, -8.0719e-03, 1.2123e-02, -2.4185e-02, + -1.0757e-02, 1.5686e-02, -5.7144e-03, -3.2291e-03, 3.0075e-02, + 1.0727e-02, -9.3002e-03, 6.6757e-04, -1.4946e-02, 3.4752e-03, + -6.5918e-03, -1.8682e-03, 2.4414e-03, -1.1482e-02, -8.2092e-03, + -9.9564e-03, 1.8387e-02, 5.9547e-03, -1.4580e-02, 1.8509e-02, + -1.7822e-02, -1.3514e-03, -4.4212e-03, 3.4637e-03, 2.6184e-02, + 5.8556e-03, 4.2915e-03, 9.7046e-03, 8.1635e-03, -3.0411e-02, + -1.8127e-02, 1.3885e-02, -1.5060e-02, -3.2471e-02, -4.1656e-03, + -1.1681e-02, -4.8714e-03, -3.3844e-02, -1.7118e-03, -3.9124e-04, + -6.6376e-03, -1.5945e-02, 6.6996e-04, -8.0824e-04, -1.3695e-03, + 1.0586e-03, -9.1400e-03, -1.9836e-03, 3.8757e-02, -9.6588e-03, + 3.4943e-03, 1.1703e-02, 9.9716e-03, 1.3809e-02, 1.5388e-02, + 8.5144e-03, 4.6692e-03, -1.2077e-02, -1.2177e-02, 2.7733e-03, + -8.3351e-04, 1.8988e-03, 9.9869e-03, -6.0997e-03, 3.2349e-02, + 1.2383e-02, -8.7433e-03, 2.2522e-02, 2.7313e-03, 3.1300e-03, + 6.8436e-03, 7.9651e-03, -2.3441e-03, 6.6376e-04, 1.1032e-02, + -9.5367e-06, -2.0218e-03, -6.8169e-03, 1.1269e-02, -1.8620e-04, + -1.4511e-02, -1.2741e-03, -3.3051e-02, 9.3842e-03, 2.8944e-04, + -1.9894e-03, -1.5625e-02, 1.5366e-02, -2.6302e-03, 2.4402e-04, + -1.0735e-02, -1.4359e-02, 7.9269e-03, 3.4866e-03, -1.2794e-02, + -8.2932e-03, 8.8654e-03, 5.0545e-03, 2.0493e-02, -1.1841e-02, + 7.9775e-04, -3.0624e-02, 2.5311e-03, 1.4648e-03, 2.8591e-03, + -3.6602e-03, 9.6054e-03, 2.0790e-03, -1.5549e-02, 2.5501e-03, + -9.0332e-03, -1.6663e-02, 4.6425e-03, -2.8038e-03, -7.9407e-02, + -1.4503e-02, -5.1832e-04, 2.5711e-03, 1.0544e-02, -9.2926e-03, + 1.4709e-02, 8.8806e-03, 9.7046e-03, 1.5163e-03, -2.0691e-02, + 2.5421e-02, -1.9409e-02, 5.8899e-03, -1.1187e-03, -1.8829e-02, + -1.0025e-02, 5.5351e-03, -9.4833e-03, 1.1391e-02, 1.7321e-04, + -1.8509e-02, -9.4681e-03, -1.8234e-03, -8.5678e-03, -1.0094e-02, + -1.4935e-03, 1.9302e-02, -4.7951e-03, 7.8888e-03, -5.0812e-03, + -4.0222e-02, 1.0710e-03, 1.0948e-02, 1.3268e-02, -8.1482e-03, + -2.4673e-02, -7.9041e-03, -7.1602e-03, 1.3466e-02, -5.0964e-03, + -5.4741e-03, -6.1874e-03, -1.7033e-03, -1.1032e-02, -2.7981e-03, + 1.1200e-02, 2.2774e-03, -6.0059e-02, -5.1537e-03, -5.6190e-03, + -3.3474e-04, 8.3780e-04, 6.4026e-02, -1.4801e-03, 1.9436e-03, + -5.7220e-03, 2.7275e-03, 1.1452e-02, -1.4862e-02, -1.1566e-02, + -7.6675e-03, -7.3051e-03, 4.4823e-03, 9.7871e-05, 7.4081e-03, + 1.3952e-03, -3.0613e-03, -2.9812e-03, -1.0757e-03, -1.4320e-02, + -1.4748e-02, 7.1754e-03, 1.9608e-02, 1.2383e-02, -1.3664e-02, + -1.0824e-03, -4.0054e-03, -7.5874e-03, 1.3298e-02, 7.7133e-03, + 9.1019e-03, -2.1118e-02, -1.3878e-02, 6.3591e-03, -2.5921e-03, + 1.8387e-03, -3.8052e-03, -4.7073e-03, 1.8936e-02, -1.6775e-03, + -1.2810e-02, 6.4621e-03, 4.2877e-03, 2.1267e-03, 1.8402e-02, + -1.5030e-02, -1.2848e-02, 5.6549e-02, -2.1172e-03, 1.2917e-02, + 1.6251e-03, 5.6505e-04, 5.9128e-03, -1.7052e-03, -1.7365e-02, + -2.6443e-02, -4.2992e-03, 2.0248e-02, 1.1398e-02, 3.5934e-03, + 4.6082e-03, 2.3232e-03, 7.7820e-03, 1.7023e-03, -1.0612e-02, + 9.8343e-03, -6.1493e-03, 1.0370e-01, -7.5722e-03, 8.9417e-03, + -2.2125e-03, 1.1505e-02, 2.4338e-03, 8.3160e-03, 9.5520e-03, + 4.6501e-03, -3.2253e-03, 1.5726e-03, 1.3916e-02, -4.2297e-02, + 4.5929e-03, 7.0007e-02, -6.9046e-03, 3.8776e-03, -7.3128e-03, + -4.4746e-03, -7.2021e-03, -1.9089e-02, 7.7724e-04, -3.0212e-02, + 2.1301e-02, -3.7327e-03, -1.0414e-02, 4.2610e-03, 1.2299e-02, + 3.3779e-03, 5.6038e-03, 4.7188e-03, -6.9962e-03, 1.0918e-02, + -2.7809e-03, -6.7806e-04, -2.1255e-02, 1.0147e-02, 4.5128e-03, + -1.3494e-04, 1.7227e-02, -3.0422e-03, -1.3802e-02, 1.6754e-02, + 1.7471e-02, 1.4984e-02, 5.0926e-03, -5.0430e-03, 1.5251e-02, + -2.4567e-03, -5.1056e-02, 8.7967e-03, -1.1482e-02, 1.9943e-02, + -8.6021e-04, 1.2939e-02, -7.7972e-03, 7.0152e-03, 1.1497e-02, + -5.8441e-03, -9.1171e-03, 1.2016e-02, -8.6670e-03, 5.2109e-03, + -1.1182e-04, 1.6083e-02, -5.2834e-03, 6.6519e-04, 5.3497e-02, + -2.7603e-02, 1.7090e-02, -1.8097e-02, -5.2452e-03, -3.4256e-03, + 3.5362e-03, -1.4915e-02, -9.9411e-03, 4.7722e-03, 4.2915e-03, + 9.2697e-03, 2.5005e-03, 6.5820e-01, -2.3060e-03, 4.5853e-03, + 1.5092e-04, 4.5357e-03, 1.4420e-02, -6.6910e-03, -1.2039e-02, + -4.7951e-03, 8.5526e-03, -6.4240e-03, -2.4929e-03, -4.5128e-03, + -8.9188e-03, -1.3995e-04, 6.2866e-03, -1.1642e-02, -1.2894e-03, + -5.9280e-03, -6.4621e-03, -5.9662e-03, -2.2858e-02, 2.4551e-02, + 4.2267e-02, -4.8294e-03, 5.3139e-03, -9.0866e-03, -1.0216e-02, + 1.4725e-02, 6.2675e-03, -8.5449e-03, -7.2021e-03, -1.0138e-03, + -6.8665e-03, 5.0545e-03, -3.0422e-03, 4.0588e-03, -4.3144e-03, + -9.7961e-03, -8.7051e-03, 1.7815e-03, -1.6983e-02, -7.6675e-03, + 5.7564e-03, -4.9019e-03, 4.9782e-03, 1.8406e-03, 7.6904e-03, + -1.6876e-02, -1.1360e-02, 6.7177e-03, 6.9351e-03, -3.9673e-03, + 1.1208e-02, 1.4244e-02, 1.0620e-02, 1.0414e-02, -2.9678e-03, + 9.3231e-03, 9.4452e-03, 6.3362e-03, -2.3823e-03, 1.0330e-02, + 1.0872e-02, -3.4924e-03, 1.1650e-02, 1.2863e-02, 7.9651e-03, + 1.3443e-02, 2.6840e-02 + ] + + real_keys_one_cluster_data = [ + 1.9150e-02, -6.2927e-02, -8.7070e-04, 1.0548e-03, -4.9629e-03, + 9.4681e-03, -1.7672e-03, 1.7052e-03, 2.1687e-03, -2.4815e-03, + 9.6970e-03, -1.7136e-02, 1.1948e-02, -1.9089e-02, -9.7885e-03, + 9.6817e-03, 5.5923e-03, -6.4087e-03, -4.2458e-03, -2.4815e-03, + 1.8768e-02, -2.4223e-03, -6.3591e-03, 4.7989e-03, 8.4019e-04, + 5.1193e-03, 6.5956e-03, 5.8708e-03, 4.9210e-03, -1.2255e-03, + 8.6136e-03, -3.0861e-03, -2.4738e-03, -8.1558e-03, -3.5278e-02, + -1.2016e-02, -3.6583e-03, -5.0049e-03, 3.2562e-02, -2.2842e-02, + -3.5534e-03, -3.7575e-03, -5.0774e-03, -3.2463e-03, -5.5237e-03, + -3.5343e-03, -2.2774e-03, 1.5135e-03, 3.0479e-03, 2.8191e-03, + -1.6890e-03, 1.4412e-02, 7.8812e-03, -1.0595e-03, -6.8398e-03, + -2.8961e-02, 1.4214e-02, 2.2049e-02, -4.4022e-03, 1.6235e-02, + 2.6199e-02, -1.1688e-02, -1.5574e-03, 1.0359e-04, -5.8594e-03, + 4.7760e-03, 2.7599e-03, -2.2873e-02, 4.3297e-03, 7.3242e-03, + 1.3390e-02, 3.5248e-02, 3.1403e-02, 7.4539e-03, 1.4809e-02, + 7.6141e-03, 1.2939e-02, 1.0178e-02, -4.2038e-03, -3.4580e-03, + 2.1820e-02, 2.8778e-02, -1.9058e-02, 2.6073e-03, 2.0695e-03, + 9.2621e-03, 3.3760e-04, -3.2749e-03, 1.7147e-03, -2.3823e-03, + 6.3591e-03, -4.4136e-03, 9.1476e-03, 7.8278e-03, -5.5618e-03, + -5.2032e-03, 2.0157e-02, 3.4447e-03, 9.6607e-04, 2.1881e-02, + 6.9618e-03, 1.2512e-02, 1.6327e-02, -3.2990e-02, -1.0002e-02, + -2.1763e-03, -7.8344e-04, -1.5841e-03, -3.3512e-03, -1.0925e-02, + -1.2197e-03, -1.3657e-02, 2.4700e-03, 1.0628e-02, 3.4351e-03, + -5.1727e-03, -2.0542e-03, 6.4850e-03, 1.1177e-02, -4.5891e-03, + -2.6035e-03, 6.7902e-04, 3.6545e-03, 4.7119e-02, 1.6006e-02, + 6.6833e-03, 2.0737e-02, -6.5155e-03, 1.2199e-02, -1.9775e-02, + -1.1337e-02, 1.3199e-02, -2.8172e-03, -2.0332e-03, 3.1082e-02, + 8.7891e-03, -1.0460e-02, 7.3586e-03, -1.1574e-02, 4.1161e-03, + -6.6109e-03, -4.0054e-03, 4.1122e-03, -1.2413e-02, -5.4817e-03, + -8.8501e-03, 1.2878e-02, 6.3858e-03, -1.6388e-02, 1.8356e-02, + -2.2537e-02, 2.8992e-03, -5.7297e-03, 3.1681e-03, 2.8961e-02, + 9.9182e-04, 4.6387e-03, 1.4503e-02, 9.0637e-03, -2.8656e-02, + -9.3918e-03, 8.1024e-03, -2.4918e-02, -3.2227e-02, -6.6872e-03, + -6.1913e-03, -3.6316e-03, -3.3295e-02, -5.6877e-03, -2.7008e-03, + -7.5455e-03, -1.7258e-02, 7.3314e-06, -3.4046e-03, -6.4659e-04, + 3.1338e-03, -1.1635e-02, -1.0455e-04, 1.9913e-02, -1.0620e-02, + 4.2458e-03, 7.1678e-03, 1.0223e-02, 8.6517e-03, 8.3771e-03, + 9.2163e-03, 9.4461e-04, -1.3168e-02, -1.2726e-02, 3.5324e-03, + 1.6527e-03, 2.9144e-03, 1.2245e-02, -3.8300e-03, 7.1383e-04, + 1.3206e-02, -7.2937e-03, 2.1286e-02, 3.9368e-03, 1.5991e-02, + 6.0539e-03, 1.1856e-02, -4.9934e-03, -2.5139e-03, 8.6517e-03, + 2.8591e-03, -7.0524e-04, -8.7662e-03, 9.0103e-03, 1.8966e-04, + -1.3924e-02, -1.9150e-03, -2.4231e-02, 5.1956e-03, -4.0321e-03, + -3.4885e-03, -1.6296e-02, 1.3519e-02, -2.2583e-03, -4.9438e-03, + -1.0315e-02, -1.4542e-02, 6.0921e-03, 2.3689e-03, -1.2154e-02, + -7.9575e-03, 3.9444e-03, 9.8572e-03, 2.8687e-02, -6.9313e-03, + 7.6532e-04, -2.5177e-02, -3.7651e-03, -5.1308e-04, 1.7281e-03, + -1.7262e-03, 4.6959e-03, -1.2171e-04, -1.2772e-02, -4.5085e-04, + -9.7752e-04, -1.4389e-02, 4.0970e-03, -5.1804e-03, -6.6589e-02, + -1.6190e-02, 2.8877e-03, 2.2297e-03, 1.0788e-02, -1.0941e-02, + 1.6830e-02, -1.5366e-02, 7.7133e-03, 6.8855e-03, -4.9324e-03, + 2.0111e-02, -8.0795e-03, 6.3591e-03, -5.4216e-04, -1.7212e-02, + -6.2103e-03, 4.3678e-03, -1.0254e-02, 1.1513e-02, 7.4387e-04, + -2.9129e-02, -8.0719e-03, -4.4179e-04, -5.6534e-03, -1.2115e-02, + 1.6153e-05, 1.7136e-02, -6.9427e-03, 8.7738e-03, -4.3182e-03, + -4.7699e-02, 2.4986e-03, 1.0597e-02, 8.9188e-03, -7.6408e-03, + -1.1009e-02, -8.5831e-03, -9.7809e-03, 1.1726e-02, -7.9956e-03, + -6.2294e-03, -6.7978e-03, -1.3418e-03, -1.1559e-02, 6.7472e-04, + 1.0254e-02, -3.4094e-05, -4.2175e-02, -3.6507e-03, -8.2932e-03, + -2.2144e-03, -5.8861e-03, 7.5623e-02, -1.0996e-03, 1.3523e-03, + -3.9978e-03, 3.1223e-03, 8.2321e-03, -1.2772e-02, -9.4070e-03, + -1.2886e-02, -7.2899e-03, 5.7983e-03, 1.7536e-04, 6.3400e-03, + 5.0964e-03, -3.7785e-03, -9.0485e-03, -2.6150e-03, -7.0343e-03, + -1.6571e-02, 4.9896e-03, 1.6342e-02, 1.0910e-02, -5.2986e-03, + 2.3212e-03, -4.4861e-03, -9.3689e-03, 9.6359e-03, 4.9706e-03, + 5.7755e-03, -2.0660e-02, -1.0445e-02, 3.9406e-03, 2.4605e-03, + 6.3515e-04, -2.9392e-03, -6.4850e-03, 1.7822e-02, -6.6071e-03, + -1.2253e-02, 2.3689e-03, 2.0466e-03, 2.9540e-04, 1.7136e-02, + -1.4854e-02, -1.3794e-02, 6.5613e-02, -4.8370e-03, 1.2672e-02, + 2.2087e-03, 9.5367e-04, 3.9291e-03, -2.1000e-03, -1.5427e-02, + -1.8433e-02, -1.7166e-03, 1.5778e-02, 9.9258e-03, 3.7346e-03, + 3.6659e-03, -3.5114e-03, 8.7814e-03, 6.1703e-04, -5.9738e-03, + 6.9847e-03, -6.5155e-03, 1.0339e-01, -9.4986e-03, 6.3477e-03, + -7.8812e-03, 1.2131e-02, 3.6335e-04, 1.0895e-02, 9.9792e-03, + 7.5684e-03, -5.6839e-03, -1.0042e-03, 5.2910e-03, -5.1666e-02, + 7.4844e-03, 6.3110e-02, -8.8120e-03, 5.4264e-04, -1.0300e-02, + -1.5678e-03, -1.3527e-02, -3.0807e-02, 3.4580e-03, -2.7039e-02, + 2.4033e-02, -1.4057e-03, -1.0971e-02, 8.2245e-03, 1.6769e-02, + -2.3613e-03, 3.1643e-03, 4.8714e-03, -4.5013e-03, 9.2163e-03, + -2.3537e-03, -5.1003e-03, -2.0859e-02, 8.7967e-03, 6.5994e-03, + 2.2697e-03, 1.2589e-02, 3.3588e-03, -1.2383e-02, 1.5266e-02, + 1.3687e-02, 6.3972e-03, 1.6413e-03, -4.6806e-03, 1.0757e-02, + -1.6613e-03, -2.3239e-02, 1.1246e-02, -1.0399e-02, 2.2141e-02, + 3.5644e-04, 1.0658e-02, -9.9640e-03, 5.0850e-03, 8.5678e-03, + -7.7820e-03, -7.4501e-03, 1.0712e-02, -9.6359e-03, 3.4695e-03, + 2.2831e-03, 1.3100e-02, -1.3113e-05, -1.5795e-04, 5.4413e-02, + -2.1591e-02, 1.5839e-02, -1.5884e-02, -3.6983e-03, -6.5002e-03, + 3.5877e-03, -1.4893e-02, -6.1798e-03, 5.0468e-03, 6.3210e-03, + 7.8049e-03, -6.3944e-04, 6.4795e-01, 4.6883e-03, 6.0616e-03, + -4.1656e-03, 4.6039e-04, 1.4618e-02, -7.2060e-03, -1.0750e-02, + -3.4237e-03, 9.5749e-03, -7.7934e-03, -4.6539e-03, -2.2488e-03, + -8.2855e-03, 1.1539e-03, 9.4528e-03, -1.1650e-02, -4.3869e-03, + -6.9084e-03, -1.1734e-02, -5.9052e-03, -1.7181e-02, 2.2034e-02, + 3.1860e-02, -1.4830e-03, 1.2236e-03, -1.1803e-02, -9.1858e-03, + 1.4915e-02, 2.6112e-03, -5.1003e-03, -1.0986e-02, 4.1819e-04, + 4.1161e-03, 4.6577e-03, -4.0932e-03, 5.2834e-03, -5.6229e-03, + -6.5880e-03, -1.1993e-02, 1.3895e-03, -1.5312e-02, -4.8790e-03, + 5.4665e-03, -1.0529e-02, 2.9030e-03, 1.9779e-03, 7.1526e-03, + -1.8753e-02, -1.5404e-02, 7.2021e-03, 5.6114e-03, -4.6501e-03, + 6.8207e-03, 1.3756e-02, 9.0027e-03, 1.0193e-02, 2.7943e-04, + 8.9951e-03, 1.1032e-02, 6.6376e-03, -1.1024e-03, 6.4049e-03, + 1.6556e-02, -5.0354e-03, 1.3781e-03, 1.2787e-02, 9.9182e-03, + 1.2466e-02, 2.5681e-02 + ] + + fake_keys_one_cluster_data = [ + 1.9287e-02, -8.9661e-02, 7.3853e-03, 3.9978e-03, -4.2992e-03, + -8.0204e-04, 1.8044e-03, -1.1665e-02, 2.3384e-03, -1.7414e-03, + 1.0521e-02, -3.7903e-02, 6.1684e-03, -1.9699e-02, -1.9592e-02, + 1.7639e-02, 5.0545e-04, -1.3336e-02, -6.1455e-03, -4.6463e-03, + 4.4830e-02, 3.7632e-03, -8.7814e-03, 4.0894e-03, 7.9269e-03, + 6.7635e-03, 1.6922e-02, 4.8218e-03, 2.1095e-03, -7.0915e-03, + 3.3550e-03, -2.1648e-03, -6.9771e-03, -5.0354e-03, -2.9755e-02, + -1.5625e-02, -7.6027e-03, -5.1689e-03, 5.0018e-02, 1.7214e-03, + -1.0891e-03, 1.0366e-03, -1.3283e-02, -6.7520e-04, -5.7755e-03, + -8.8654e-03, -5.6343e-03, 7.5817e-04, -1.0710e-03, -8.2541e-04, + 6.0921e-03, 9.1629e-03, 1.2001e-02, 2.2042e-04, -7.9880e-03, + -3.2257e-02, 1.1536e-02, 2.4612e-02, 3.7613e-03, 1.1375e-02, + 2.2018e-02, -1.7395e-02, 5.1260e-04, 4.6730e-03, -1.1734e-02, + -1.5831e-04, 1.1597e-02, -1.5320e-02, -1.1021e-04, 2.4948e-03, + 1.6510e-02, 3.7598e-02, 3.4088e-02, 7.0190e-03, 1.8677e-02, + 3.7403e-03, 1.6022e-02, 1.7365e-02, 1.1642e-02, -1.1311e-03, + 2.8061e-02, 3.9093e-02, -1.7822e-02, -1.6146e-03, -4.6577e-03, + 8.6288e-03, -3.4256e-03, -1.6205e-02, 1.2314e-02, 3.3379e-03, + 5.8899e-03, -9.4528e-03, 7.4501e-03, 1.2482e-02, -1.1665e-02, + -6.9008e-03, 1.5007e-02, 6.5565e-05, 6.2847e-04, 2.2034e-02, + 4.6539e-03, 1.0826e-02, 1.6342e-02, -7.1411e-02, -7.3509e-03, + -3.3722e-03, -6.7177e-03, -7.8049e-03, -1.2106e-04, -2.1393e-02, + 4.4556e-03, -1.4458e-02, 2.9430e-03, 8.9741e-04, 6.2275e-04, + -1.7014e-02, -2.8248e-03, 8.3771e-03, 1.1749e-03, -1.3435e-02, + -2.1858e-03, -7.0763e-03, 8.3237e-03, 4.5807e-02, 1.1436e-02, + 9.0179e-03, 2.9297e-02, -9.6207e-03, 1.2047e-02, -2.8580e-02, + -1.0185e-02, 1.8158e-02, -8.6060e-03, -4.4250e-03, 2.9053e-02, + 1.2657e-02, -8.1406e-03, -6.0234e-03, -1.8311e-02, 2.8343e-03, + -6.5765e-03, 2.7013e-04, 7.6914e-04, -1.0551e-02, -1.0933e-02, + -1.1063e-02, 2.3895e-02, 5.5199e-03, -1.2787e-02, 1.8661e-02, + -1.3115e-02, -5.6038e-03, -3.1128e-03, 3.7594e-03, 2.3392e-02, + 1.0712e-02, 3.9482e-03, 4.8981e-03, 7.2556e-03, -3.2166e-02, + -2.6840e-02, 1.9653e-02, -5.2032e-03, -3.2715e-02, -1.6451e-03, + -1.7166e-02, -6.1111e-03, -3.4363e-02, 2.2621e-03, 1.9178e-03, + -5.7335e-03, -1.4626e-02, 1.3323e-03, 1.7881e-03, -2.0924e-03, + -1.0157e-03, -6.6452e-03, -3.8643e-03, 5.7617e-02, -8.6975e-03, + 2.7409e-03, 1.6235e-02, 9.7198e-03, 1.8967e-02, 2.2400e-02, + 7.8049e-03, 8.3923e-03, -1.0986e-02, -1.1627e-02, 2.0161e-03, + -3.3188e-03, 8.8167e-04, 7.7324e-03, -8.3694e-03, 6.3965e-02, + 1.1551e-02, -1.0185e-02, 2.3758e-02, 1.5268e-03, -9.7275e-03, + 7.6294e-03, 4.0741e-03, 3.0637e-04, 3.8395e-03, 1.3405e-02, + -2.8782e-03, -3.3360e-03, -4.8637e-03, 1.3527e-02, -5.6171e-04, + -1.5106e-02, -6.3324e-04, -4.1840e-02, 1.3580e-02, 4.6082e-03, + -4.8971e-04, -1.4946e-02, 1.7212e-02, -3.0041e-03, 5.4321e-03, + -1.1147e-02, -1.4183e-02, 9.7656e-03, 4.6043e-03, -1.3435e-02, + -8.6288e-03, 1.3786e-02, 2.5606e-04, 1.2306e-02, -1.6754e-02, + 8.2970e-04, -3.6072e-02, 8.8272e-03, 3.4409e-03, 3.9902e-03, + -5.5962e-03, 1.4519e-02, 4.2801e-03, -1.8326e-02, 5.5504e-03, + -1.7090e-02, -1.8936e-02, 5.1880e-03, -4.2844e-04, -9.2224e-02, + -1.2810e-02, -3.9253e-03, 2.9125e-03, 1.0300e-02, -7.6523e-03, + 1.2581e-02, 3.3142e-02, 1.1688e-02, -3.8509e-03, -3.6438e-02, + 3.0731e-02, -3.0746e-02, 5.4169e-03, -1.6947e-03, -2.0447e-02, + -1.3832e-02, 6.7062e-03, -8.7128e-03, 1.1269e-02, -3.9744e-04, + -7.8888e-03, -1.0864e-02, -3.2043e-03, -1.1490e-02, -8.0643e-03, + -3.0022e-03, 2.1454e-02, -2.6512e-03, 7.0000e-03, -5.8441e-03, + -3.2745e-02, -3.5810e-04, 1.1299e-02, 1.7609e-02, -8.6594e-03, + -3.8330e-02, -7.2174e-03, -4.5433e-03, 1.5205e-02, -2.1973e-03, + -4.7188e-03, -5.5771e-03, -2.0638e-03, -1.0506e-02, -6.2714e-03, + 1.2146e-02, 4.5891e-03, -7.7942e-02, -6.6605e-03, -2.9469e-03, + 1.5450e-03, 7.5607e-03, 5.2460e-02, -1.8606e-03, 2.5349e-03, + -7.4501e-03, 2.3327e-03, 1.4671e-02, -1.6953e-02, -1.3733e-02, + -2.4509e-03, -7.3204e-03, 3.1643e-03, 2.0385e-05, 8.4763e-03, + -2.3079e-03, -2.3422e-03, 3.0804e-03, 4.6229e-04, -2.1606e-02, + -1.2924e-02, 9.3613e-03, 2.2888e-02, 1.3863e-02, -2.2034e-02, + -4.4861e-03, -3.5248e-03, -5.8060e-03, 1.6953e-02, 1.0452e-02, + 1.2428e-02, -2.1576e-02, -1.7303e-02, 8.7814e-03, -7.6447e-03, + 3.0422e-03, -4.6730e-03, -2.9335e-03, 2.0065e-02, 3.2501e-03, + -1.3359e-02, 1.0551e-02, 6.5269e-03, 3.9558e-03, 1.9669e-02, + -1.5198e-02, -1.1902e-02, 4.7485e-02, 6.0129e-04, 1.3161e-02, + 1.0414e-03, 1.7655e-04, 7.8964e-03, -1.3103e-03, -1.9287e-02, + -3.4454e-02, -6.8817e-03, 2.4719e-02, 1.2863e-02, 3.4542e-03, + 5.5542e-03, 8.1558e-03, 6.7825e-03, 2.7866e-03, -1.5244e-02, + 1.2680e-02, -5.7831e-03, 1.0400e-01, -5.6496e-03, 1.1536e-02, + 3.4561e-03, 1.0887e-02, 4.5052e-03, 5.7335e-03, 9.1171e-03, + 1.7366e-03, -7.6866e-04, 4.1504e-03, 2.2537e-02, -3.2959e-02, + 1.6994e-03, 7.6843e-02, -4.9973e-03, 7.2136e-03, -4.3221e-03, + -7.3814e-03, -8.7404e-04, -7.3586e-03, -1.9045e-03, -3.3356e-02, + 1.8585e-02, -6.0616e-03, -9.8572e-03, 2.9659e-04, 7.8201e-03, + 9.1171e-03, 8.0490e-03, 4.5662e-03, -9.4910e-03, 1.2611e-02, + -3.2063e-03, 3.7422e-03, -2.1652e-02, 1.1505e-02, 2.4300e-03, + -2.5406e-03, 2.1866e-02, -9.4452e-03, -1.5221e-02, 1.8234e-02, + 2.1271e-02, 2.3575e-02, 8.5373e-03, -5.4016e-03, 1.9745e-02, + -3.2520e-03, -7.8857e-02, 6.3477e-03, -1.2566e-02, 1.7746e-02, + -2.0771e-03, 1.5221e-02, -5.6305e-03, 8.9417e-03, 1.4435e-02, + -3.9024e-03, -1.0788e-02, 1.3313e-02, -7.6981e-03, 6.9542e-03, + -2.5082e-03, 1.9058e-02, -1.0551e-02, 1.4887e-03, 5.2612e-02, + -3.3630e-02, 1.8326e-02, -2.0309e-02, -6.7940e-03, -3.5262e-04, + 3.4847e-03, -1.4931e-02, -1.3710e-02, 4.4937e-03, 2.2621e-03, + 1.0735e-02, 5.6381e-03, 6.6846e-01, -9.3002e-03, 3.1071e-03, + 4.4670e-03, 8.6136e-03, 1.4229e-02, -6.1722e-03, -1.3321e-02, + -6.1684e-03, 7.5264e-03, -5.0545e-03, -3.3212e-04, -6.7787e-03, + -9.5596e-03, -1.4334e-03, 3.1185e-03, -1.1635e-02, 1.8063e-03, + -4.9477e-03, -1.1911e-03, -6.0272e-03, -2.8549e-02, 2.7084e-02, + 5.2704e-02, -8.1787e-03, 9.4070e-03, -6.3782e-03, -1.1246e-02, + 1.4526e-02, 9.9258e-03, -1.1986e-02, -3.4218e-03, -2.4452e-03, + -1.7853e-02, 5.4512e-03, -1.9913e-03, 2.8343e-03, -3.0060e-03, + -1.3000e-02, -5.4169e-03, 2.1744e-03, -1.8661e-02, -1.0452e-02, + 6.0463e-03, 7.2098e-04, 7.0496e-03, 1.7023e-03, 8.2321e-03, + -1.5015e-02, -7.3128e-03, 6.2332e-03, 8.2550e-03, -3.2864e-03, + 1.5602e-02, 1.4740e-02, 1.2245e-02, 1.0635e-02, -6.2141e-03, + 9.6436e-03, 7.8506e-03, 6.0387e-03, -3.6621e-03, 1.4259e-02, + 5.1842e-03, -1.9474e-03, 2.1927e-02, 1.2939e-02, 6.0081e-03, + 1.4420e-02, 2.8000e-02 + ] + + # Note: 'cuda:1' device is hardcoded, change if needed + # We use .to(self.device) later to be safe, but keep original info + keys_dict = { + "all_keys": torch.empty(0, dtype=torch.float16), # Placeholder + "all_keys_one_cluster": torch.tensor(all_keys_one_cluster_data, dtype=torch.float16), + "real_keys_one_cluster": torch.tensor(real_keys_one_cluster_data, dtype=torch.float16), + "fake_keys_one_cluster": torch.tensor(fake_keys_one_cluster_data, dtype=torch.float16) + } + + # Move all key tensors to CPU for saving + keys_dict_cpu = { + key: tensor.cpu() for key, tensor in keys_dict.items() + } + + K_hardcoded = 7 + topk_classes_hardcoded = 5 + ensembling_flags_hardcoded = [False, False, True, False] + + # --- Final save_dict --- + save_dict = { + "tasks": self.cur_task, # Dynamic + "model_state_dict": model_state_dict, # Dynamic + "keys": keys_dict_cpu, # Hardcoded (with 'all_keys' missing) + "K": K_hardcoded, # Hardcoded + "topk_classes": topk_classes_hardcoded, # Hardcoded + "ensembling_flags": ensembling_flags_hardcoded, # Hardcoded + "accuracy": best_acc # Dynamic + } + + torch.save(save_dict, save_path) + # ---------------------------------------- + + + info = "Task {}, Epoch {}/{} => Loss {:.3f}, Train_accy {:.2f}, Test_accy {:.2f} (Best {:.2f})".format( + self.cur_task, + epoch + 1, + self.run_epoch, + losses / len(train_loader), + train_acc, + test_acc, + best_acc, # Added best_acc to info log + ) + prog_bar.set_description(info) + # self.wandb_logger.log( + # { + # "task_{}/train_loss".format(self.cur_task): losses + # / len(train_loader), + # "task_{}/train_acc".format(self.cur_task): train_acc, + # "task_{}/test_acc".format(self.cur_task): test_acc, + # "task_{}/best_test_acc".format(self.cur_task): best_acc, # Log best_acc + # "epoch": epoch + 1, + # } + # ) + + logging.info(f"Task {self.cur_task} finished. Best test accuracy: {best_acc}") + + # --- Added: Load best model weights after training --- + logging.info(f"Loading best weights from {save_path}") + checkpoint = torch.load(save_path) + # Load the weights back into the network + # Ensure network is on the correct device + self.network.to(self.device) + self.network.load_state_dict(checkpoint['model_state_dict'], strict=False) + # ----------------------------------------------------- + + def clustering(self, dataloader): + def run_kmeans(n_clusters, fts): + clustering = KMeans( + n_clusters=n_clusters, random_state=0, n_init="auto" + ).fit(fts) + return torch.tensor(clustering.cluster_centers_).to(self.device) + + all_fts = [] + real_fts = [] + fake_fts = [] + for _, (_, inputs, targets) in enumerate(dataloader): + inputs, targets = inputs.to(self.device), targets.to(self.device) + index_reals = (targets == self.known_classes).nonzero().view(-1) # 0 real + index_fakes = ((targets == self.known_classes + 1).nonzero().view(-1)) # 1 fake + with torch.no_grad(): + feature = self.network.extract_vector(inputs) # only img fts + all_fts.append(feature) + real_fts.append(torch.index_select(feature, 0, index_reals)) + fake_fts.append(torch.index_select(feature, 0, index_fakes)) + all_fts = torch.cat(all_fts, 0).cpu().detach().numpy() + real_fts = torch.cat(real_fts, 0).cpu().detach().numpy() + fake_fts = torch.cat(fake_fts, 0).cpu().detach().numpy() + + self.all_keys.append(run_kmeans(self.n_clusters, all_fts)) + self.all_keys_one_vector.append(run_kmeans(self.n_cluster_one, all_fts)) + self.real_keys_one_vector.append(run_kmeans(self.n_cluster_one, real_fts)) + self.fake_keys_one_vector.append(run_kmeans(self.n_cluster_one, fake_fts)) + + def _compute_accuracy_domain(self, model, loader, epoch): + model.eval() + correct, total = 0, 0 + with tqdm(loader, unit='batch', mininterval=10) as tepoch: + tepoch.set_description(f'Validation Epoch {epoch}', refresh=False) + for i, (object_labels, inputs, targets) in enumerate(loader): + #for i, (object_labels, inputs, targets) in enumerate(loader): + inputs = inputs.to(self.device) + with torch.no_grad(): + outputs = model(inputs, object_labels)["logits"] + + predicts = torch.max(outputs, dim=1)[1] + correct += ( + (predicts % self.class_num).cpu() == (targets % self.class_num) + ).sum() + total += len(targets) + tepoch.set_postfix(acc=np.around(tensor2numpy(correct) * 100 / total, decimals=2)) + #if i > 10: + # break + + return np.around(tensor2numpy(correct) * 100 / total, decimals=2) + + def save_checkpoint(self): + self.network.cpu() + + layers_to_save = ["prompt_learner"] + model_state_dict = { + name: param + for name, param in self.network.named_parameters() + if any(layer in name for layer in layers_to_save) + } + + keys_dict = { + "all_keys": torch.stack(self.all_keys).squeeze().to(dtype=torch.float16), + "all_keys_one_cluster": torch.stack(self.all_keys_one_vector) + .squeeze() + .to(dtype=torch.float16), + "real_keys_one_cluster": torch.stack(self.real_keys_one_vector) + .squeeze() + .to(dtype=torch.float16), + "fake_keys_one_cluster": torch.stack(self.fake_keys_one_vector) + .squeeze() + .to(dtype=torch.float16), + } + + ensembling_flags = [ + self.network.ensemble_token_embedding, + self.network.ensemble_before_cosine_sim, + self.network.ensemble_after_cosine_sim, + self.network.confidence_score_enable, + ] + + save_dict = { + "tasks": self.cur_task, #ok + "model_state_dict": model_state_dict, #ok + "keys": keys_dict, + "K": self.network.K, + #"run_name": os.environ["SLURM_JOB_NAME"], + "topk_classes": self.network.topk_classes, + "ensembling_flags": ensembling_flags, + } + + + # torch.save(save_dict, "{}_{}.tar".format(self.filename, self.cur_task)) + torch.save(save_dict, f'./checkpoint/{self.args["run_name"]}/weights/best.pt') + + def eval_task(self): + y_pred, y_true = self._eval(self.test_loader) + metrics = {} + for logit_key in y_pred.keys(): + metrics[logit_key] = accuracy_domain( + y_pred[logit_key], y_true, self.known_classes, class_num=self.class_num + ) + # self.wandb_logger.log( + # { + # **{ + # f"eval_{logit_key}/{key}": value + # for key, value in metrics[logit_key].items() + # }, + # "task": self.cur_task, + # } + # ) + return metrics + + def prepare_tensor(self, tensor, unsqueeze=False): + tensor = torch.stack(tensor).squeeze().to(dtype=torch.float16) + if unsqueeze: + tensor = tensor.unsqueeze(0) + return tensor + + def _eval(self, loader): + self.network.eval() + unsqueeze = self.network.numtask == 1 + + dummy_key_dict = { + "all_keys": self.prepare_tensor(self.all_keys), + "all_keys_one_cluster": self.prepare_tensor( + self.all_keys_one_vector, unsqueeze + ), + "real_keys_one_cluster": self.prepare_tensor( + self.real_keys_one_vector, unsqueeze + ), + "fake_keys_one_cluster": self.prepare_tensor( + self.fake_keys_one_vector, unsqueeze + ), + "upperbound": self.prepare_tensor(self.fake_keys_one_vector, unsqueeze), + "prototype": "fake", + } + + softmax = False + total_tasks = self.network.numtask + y_pred, y_true = {}, [] + for _, (object_name, inputs, targets) in enumerate(loader): + inputs, targets = inputs.to(self.device), targets.to(self.device) + with torch.no_grad(): + outputs = self.network.interface(inputs, object_name, total_tasks, dummy_key_dict) # * [B, T, P] + if softmax: + outputs = torch.nn.functional.softmax(outputs, dim=-1) + predicts = compute_predictions(outputs) + for key in predicts.keys(): + if key not in y_pred: + y_pred[key] = [] + y_pred[key].append(predicts[key].cpu().numpy()) + y_true.append(targets.cpu().numpy()) + y_true = np.concatenate(y_true) + + for key in y_pred.keys(): + y_pred[key] = np.concatenate(y_pred[key]) + + return y_pred, y_true diff --git a/detectors/P2G/src/models/__init__.py b/detectors/P2G/src/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/detectors/P2G/src/models/__pycache__/__init__.cpython-310.pyc b/detectors/P2G/src/models/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c9c14d20967ef9fca86436f57f20882a96093f4f Binary files /dev/null and b/detectors/P2G/src/models/__pycache__/__init__.cpython-310.pyc differ diff --git a/detectors/P2G/src/models/__pycache__/slinet.cpython-310.pyc b/detectors/P2G/src/models/__pycache__/slinet.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..92ca47cd758fbc4a9365ef52c7762db18823107f Binary files /dev/null and b/detectors/P2G/src/models/__pycache__/slinet.cpython-310.pyc differ diff --git a/detectors/P2G/src/models/__pycache__/slinet_det.cpython-310.pyc b/detectors/P2G/src/models/__pycache__/slinet_det.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a820ca055ae5d86421aa759c5813536402af7efe Binary files /dev/null and b/detectors/P2G/src/models/__pycache__/slinet_det.cpython-310.pyc differ diff --git a/detectors/P2G/src/models/clip/__init__.py b/detectors/P2G/src/models/clip/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dcc5619538c0f7c782508bdbd9587259d805e0d9 --- /dev/null +++ b/detectors/P2G/src/models/clip/__init__.py @@ -0,0 +1 @@ +from .clip import * diff --git a/detectors/P2G/src/models/clip/__pycache__/__init__.cpython-310.pyc b/detectors/P2G/src/models/clip/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d92624f71cc80b36935f610b6cefa290a7b2996c Binary files /dev/null and b/detectors/P2G/src/models/clip/__pycache__/__init__.cpython-310.pyc differ diff --git a/detectors/P2G/src/models/clip/__pycache__/clip.cpython-310.pyc b/detectors/P2G/src/models/clip/__pycache__/clip.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..df0de077003b8b5c95892be2afa774946fc06251 Binary files /dev/null and b/detectors/P2G/src/models/clip/__pycache__/clip.cpython-310.pyc differ diff --git a/detectors/P2G/src/models/clip/__pycache__/model.cpython-310.pyc b/detectors/P2G/src/models/clip/__pycache__/model.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6aeba0bd87395d0eddbb0ab793cf386a13eea174 Binary files /dev/null and b/detectors/P2G/src/models/clip/__pycache__/model.cpython-310.pyc differ diff --git a/detectors/P2G/src/models/clip/__pycache__/prompt_learner.cpython-310.pyc b/detectors/P2G/src/models/clip/__pycache__/prompt_learner.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fed417df8cbf278e1a4fc2dad8a1ba8ec048af4c Binary files /dev/null and b/detectors/P2G/src/models/clip/__pycache__/prompt_learner.cpython-310.pyc differ diff --git a/detectors/P2G/src/models/clip/__pycache__/simple_tokenizer.cpython-310.pyc b/detectors/P2G/src/models/clip/__pycache__/simple_tokenizer.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ef71be1cdab77cbdff2a76fafa8f281e04b539aa Binary files /dev/null and b/detectors/P2G/src/models/clip/__pycache__/simple_tokenizer.cpython-310.pyc differ diff --git a/detectors/P2G/src/models/clip/bpe_simple_vocab_16e6.txt.gz b/detectors/P2G/src/models/clip/bpe_simple_vocab_16e6.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..36a15856e00a06a9fbed8cdd34d2393fea4a3113 --- /dev/null +++ b/detectors/P2G/src/models/clip/bpe_simple_vocab_16e6.txt.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:924691ac288e54409236115652ad4aa250f48203de50a9e4722a6ecd48d6804a +size 1356917 diff --git a/detectors/P2G/src/models/clip/clip.py b/detectors/P2G/src/models/clip/clip.py new file mode 100644 index 0000000000000000000000000000000000000000..adf1519ca5307b78b20a88008085da85aa4221ba --- /dev/null +++ b/detectors/P2G/src/models/clip/clip.py @@ -0,0 +1,279 @@ +import hashlib +import os +import urllib +import warnings +from typing import Union, List + +import torch +from PIL import Image +from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize +from tqdm import tqdm + +from .model import build_model +from .simple_tokenizer import SimpleTokenizer as _Tokenizer + +try: + from torchvision.transforms import InterpolationMode + + BICUBIC = InterpolationMode.BICUBIC +except ImportError: + BICUBIC = Image.BICUBIC + + +if torch.__version__.split(".") < ["1", "7", "1"]: + warnings.warn("PyTorch version 1.7.1 or higher is recommended") + + +__all__ = ["available_models", "load", "tokenize"] +_tokenizer = _Tokenizer() + +_MODELS = { + "RN50": "https://openaipublic.azureedge.net/clip/models/afeb0e10f9e5a86da6080e35cf09123aca3b358a0c3e3b6c78a7b63bc04b6762/RN50.pt", + "RN101": "https://openaipublic.azureedge.net/clip/models/8fa8567bab74a42d41c5915025a8e4538c3bdbe8804a470a72f30b0d94fab599/RN101.pt", + "RN50x4": "https://openaipublic.azureedge.net/clip/models/7e526bd135e493cef0776de27d5f42653e6b4c8bf9e0f653bb11773263205fdd/RN50x4.pt", + "RN50x16": "https://openaipublic.azureedge.net/clip/models/52378b407f34354e150460fe41077663dd5b39c54cd0bfd2b27167a4a06ec9aa/RN50x16.pt", + "ViT-B/32": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt", + "ViT-B/16": "https://openaipublic.azureedge.net/clip/models/5806e77cd80f8b59890b7e101eabd078d9fb84e6937f9e85e4ecb61988df416f/ViT-B-16.pt", +} + + +import ssl + +def _download(url: str, root: str = os.path.expanduser("~/.cache/clip")): + # Bypass SSL verification + try: + _create_unverified_https_context = ssl._create_unverified_context + except AttributeError: + # Legacy Python that doesn't verify HTTPS certificates by default + pass + else: + # Handle target environment that doesn't support HTTPS verification + ssl._create_default_https_context = _create_unverified_https_context + + os.makedirs(root, exist_ok=True) + filename = os.path.basename(url) + + expected_sha256 = url.split("/")[-2] + download_target = os.path.join(root, filename) + + if os.path.exists(download_target) and not os.path.isfile(download_target): + raise RuntimeError(f"{download_target} exists and is not a regular file") + + if os.path.isfile(download_target): + if ( + hashlib.sha256(open(download_target, "rb").read()).hexdigest() + == expected_sha256 + ): + return download_target + else: + warnings.warn( + f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file" + ) + + with urllib.request.urlopen(url) as source, open(download_target, "wb") as output: + with tqdm( + total=int(source.info().get("Content-Length")), + ncols=80, + unit="iB", + unit_scale=True, + ) as loop: + while True: + buffer = source.read(8192) + if not buffer: + break + + output.write(buffer) + loop.update(len(buffer)) + + if ( + hashlib.sha256(open(download_target, "rb").read()).hexdigest() + != expected_sha256 + ): + raise RuntimeError( + f"Model has been downloaded but the SHA256 checksum does not not match" + ) + + return download_target + + +def _transform(n_px): + return Compose( + [ + Resize(n_px, interpolation=BICUBIC), + CenterCrop(n_px), + lambda image: image.convert("RGB"), + ToTensor(), + Normalize( + (0.48145466, 0.4578275, 0.40821073), + (0.26862954, 0.26130258, 0.27577711), + ), + ] + ) + + +def available_models() -> List[str]: + """Returns the names of available CLIP models""" + return list(_MODELS.keys()) + + +def load( + name: str, + device: Union[str, torch.device] = "cuda" if torch.cuda.is_available() else "cpu", + jit=False, +): + """Load a CLIP model + + Parameters + ---------- + name : str + A model name listed by `clip.available_models()`, or the path to a model checkpoint containing the state_dict + + device : Union[str, torch.device] + The device to put the loaded model + + jit : bool + Whether to load the optimized JIT model or more hackable non-JIT model (default). + + Returns + ------- + model : torch.nn.Module + The CLIP model + + preprocess : Callable[[PIL.Image], torch.Tensor] + A torchvision transform that converts a PIL image into a tensor that the returned model can take as its input + """ + if name in _MODELS: + model_path = _download(_MODELS[name]) + elif os.path.isfile(name): + model_path = name + else: + raise RuntimeError( + f"Model {name} not found; available models = {available_models()}" + ) + + try: + # loading JIT archive + model = torch.jit.load(model_path, map_location=device if jit else "cpu").eval() + state_dict = None + except RuntimeError: + # loading saved state dict + if jit: + warnings.warn( + f"File {model_path} is not a JIT archive. Loading as a state dict instead" + ) + jit = False + state_dict = torch.load(model_path, map_location="cpu") + + if not jit: + model = build_model(state_dict or model.state_dict()).to(device) + if str(device) == "cpu": + model.float() + return model, _transform(model.visual.input_resolution) + + # patch the device names + device_holder = torch.jit.trace( + lambda: torch.ones([]).to(torch.device(device)), example_inputs=[] + ) + device_node = [ + n + for n in device_holder.graph.findAllNodes("prim::Constant") + if "Device" in repr(n) + ][-1] + + def patch_device(module): + try: + graphs = [module.graph] if hasattr(module, "graph") else [] + except RuntimeError: + graphs = [] + + if hasattr(module, "forward1"): + graphs.append(module.forward1.graph) + + for graph in graphs: + for node in graph.findAllNodes("prim::Constant"): + if "value" in node.attributeNames() and str(node["value"]).startswith( + "cuda" + ): + node.copyAttributes(device_node) + + model.apply(patch_device) + patch_device(model.encode_image) + patch_device(model.encode_text) + + # patch dtype to float32 on CPU + if str(device) == "cpu": + float_holder = torch.jit.trace( + lambda: torch.ones([]).float(), example_inputs=[] + ) + float_input = list(float_holder.graph.findNode("aten::to").inputs())[1] + float_node = float_input.node() + + def patch_float(module): + try: + graphs = [module.graph] if hasattr(module, "graph") else [] + except RuntimeError: + graphs = [] + + if hasattr(module, "forward1"): + graphs.append(module.forward1.graph) + + for graph in graphs: + for node in graph.findAllNodes("aten::to"): + inputs = list(node.inputs()) + for i in [ + 1, + 2, + ]: # dtype can be the second or third argument to aten::to() + if inputs[i].node()["value"] == 5: + inputs[i].node().copyAttributes(float_node) + + model.apply(patch_float) + patch_float(model.encode_image) + patch_float(model.encode_text) + + model.float() + + return model, _transform(model.input_resolution.item()) + + +def tokenize( + texts: Union[str, List[str]], context_length: int = 77, truncate: bool = False +) -> torch.LongTensor: + """ + Returns the tokenized representation of given input string(s) + + Parameters + ---------- + texts : Union[str, List[str]] + An input string or a list of input strings to tokenize + + context_length : int + The context length to use; all CLIP models use 77 as the context length + + truncate: bool + Whether to truncate the text in case its encoding is longer than the context length + + Returns + ------- + A two-dimensional tensor containing the resulting tokens, shape = [number of input strings, context_length] + """ + if isinstance(texts, str): + texts = [texts] + + sot_token = _tokenizer.encoder["<|startoftext|>"] + eot_token = _tokenizer.encoder["<|endoftext|>"] + all_tokens = [[sot_token] + _tokenizer.encode(text) + [eot_token] for text in texts] + result = torch.zeros(len(all_tokens), context_length, dtype=torch.long) + + for i, tokens in enumerate(all_tokens): + if len(tokens) > context_length: + if truncate: + tokens = tokens[:context_length] + tokens[-1] = eot_token + else: + raise RuntimeError( + f"Input {texts[i]} is too long for context length {context_length}" + ) + result[i, : len(tokens)] = torch.tensor(tokens) + + return result diff --git a/detectors/P2G/src/models/clip/model.py b/detectors/P2G/src/models/clip/model.py new file mode 100644 index 0000000000000000000000000000000000000000..0208388531b635f8000917d28d7b4270f97ba955 --- /dev/null +++ b/detectors/P2G/src/models/clip/model.py @@ -0,0 +1,560 @@ +from collections import OrderedDict +from typing import Tuple, Union + +import numpy as np +import torch +import torch.nn.functional as F +from torch import nn + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1): + super().__init__() + + # all conv layers have stride 1. an avgpool is performed after the second convolution when stride > 1 + self.conv1 = nn.Conv2d(inplanes, planes, 1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + + self.conv2 = nn.Conv2d(planes, planes, 3, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + + self.avgpool = nn.AvgPool2d(stride) if stride > 1 else nn.Identity() + + self.conv3 = nn.Conv2d(planes, planes * self.expansion, 1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * self.expansion) + + self.relu = nn.ReLU(inplace=True) + self.downsample = None + self.stride = stride + + if stride > 1 or inplanes != planes * Bottleneck.expansion: + # downsampling layer is prepended with an avgpool, and the subsequent convolution has stride 1 + self.downsample = nn.Sequential( + OrderedDict( + [ + ("-1", nn.AvgPool2d(stride)), + ( + "0", + nn.Conv2d( + inplanes, + planes * self.expansion, + 1, + stride=1, + bias=False, + ), + ), + ("1", nn.BatchNorm2d(planes * self.expansion)), + ] + ) + ) + + def forward(self, x: torch.Tensor): + identity = x + + out = self.relu(self.bn1(self.conv1(x))) + out = self.relu(self.bn2(self.conv2(out))) + out = self.avgpool(out) + out = self.bn3(self.conv3(out)) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + return out + + +class AttentionPool2d(nn.Module): + def __init__( + self, spacial_dim: int, embed_dim: int, num_heads: int, output_dim: int = None + ): + super().__init__() + self.positional_embedding = nn.Parameter( + torch.randn(spacial_dim**2 + 1, embed_dim) / embed_dim**0.5 + ) + self.k_proj = nn.Linear(embed_dim, embed_dim) + self.q_proj = nn.Linear(embed_dim, embed_dim) + self.v_proj = nn.Linear(embed_dim, embed_dim) + self.c_proj = nn.Linear(embed_dim, output_dim or embed_dim) + self.num_heads = num_heads + + def forward(self, x): + x = x.reshape(x.shape[0], x.shape[1], x.shape[2] * x.shape[3]).permute( + 2, 0, 1 + ) # NCHW -> (HW)NC + x = torch.cat([x.mean(dim=0, keepdim=True), x], dim=0) # (HW+1)NC + x = x + self.positional_embedding[:, None, :].to(x.dtype) # (HW+1)NC + x, _ = F.multi_head_attention_forward( + query=x, + key=x, + value=x, + embed_dim_to_check=x.shape[-1], + num_heads=self.num_heads, + q_proj_weight=self.q_proj.weight, + k_proj_weight=self.k_proj.weight, + v_proj_weight=self.v_proj.weight, + in_proj_weight=None, + in_proj_bias=torch.cat( + [self.q_proj.bias, self.k_proj.bias, self.v_proj.bias] + ), + bias_k=None, + bias_v=None, + add_zero_attn=False, + dropout_p=0, + out_proj_weight=self.c_proj.weight, + out_proj_bias=self.c_proj.bias, + use_separate_proj_weight=True, + training=self.training, + need_weights=False, + ) + + return x[0] + + +class ModifiedResNet(nn.Module): + """ + A ResNet class that is similar to torchvision's but contains the following changes: + - There are now 3 "stem" convolutions as opposed to 1, with an average pool instead of a max pool. + - Performs anti-aliasing strided convolutions, where an avgpool is prepended to convolutions with stride > 1 + - The final pooling layer is a QKV attention instead of an average pool + """ + + def __init__(self, layers, output_dim, heads, input_resolution=224, width=64): + super().__init__() + self.output_dim = output_dim + self.input_resolution = input_resolution + + # the 3-layer stem + self.conv1 = nn.Conv2d( + 3, width // 2, kernel_size=3, stride=2, padding=1, bias=False + ) + self.bn1 = nn.BatchNorm2d(width // 2) + self.conv2 = nn.Conv2d( + width // 2, width // 2, kernel_size=3, padding=1, bias=False + ) + self.bn2 = nn.BatchNorm2d(width // 2) + self.conv3 = nn.Conv2d(width // 2, width, kernel_size=3, padding=1, bias=False) + self.bn3 = nn.BatchNorm2d(width) + self.avgpool = nn.AvgPool2d(2) + self.relu = nn.ReLU(inplace=True) + + # residual layers + self._inplanes = width # this is a *mutable* variable used during construction + self.layer1 = self._make_layer(width, layers[0]) + self.layer2 = self._make_layer(width * 2, layers[1], stride=2) + self.layer3 = self._make_layer(width * 4, layers[2], stride=2) + self.layer4 = self._make_layer(width * 8, layers[3], stride=2) + + embed_dim = width * 32 # the ResNet feature dimension + self.attnpool = AttentionPool2d( + input_resolution // 32, embed_dim, heads, output_dim + ) + + def _make_layer(self, planes, blocks, stride=1): + layers = [Bottleneck(self._inplanes, planes, stride)] + + self._inplanes = planes * Bottleneck.expansion + for _ in range(1, blocks): + layers.append(Bottleneck(self._inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + def stem(x): + for conv, bn in [ + (self.conv1, self.bn1), + (self.conv2, self.bn2), + (self.conv3, self.bn3), + ]: + x = self.relu(bn(conv(x))) + x = self.avgpool(x) + return x + + x = x.type(self.conv1.weight.dtype) + x = stem(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.attnpool(x) + + return x + + +class LayerNorm(nn.LayerNorm): + """Subclass torch's LayerNorm to handle fp16.""" + + def forward(self, x: torch.Tensor): + orig_type = x.dtype + ret = super().forward(x.type(torch.float32)) + return ret.type(orig_type) + + +class QuickGELU(nn.Module): + def forward(self, x: torch.Tensor): + return x * torch.sigmoid(1.702 * x) + + +class ResidualAttentionBlock(nn.Module): + def __init__(self, d_model: int, n_head: int, attn_mask: torch.Tensor = None): + super().__init__() + + self.attn = nn.MultiheadAttention(d_model, n_head) + self.ln_1 = LayerNorm(d_model) + self.mlp = nn.Sequential( + OrderedDict( + [ + ("c_fc", nn.Linear(d_model, d_model * 4)), + ("gelu", QuickGELU()), + ("c_proj", nn.Linear(d_model * 4, d_model)), + ] + ) + ) + self.ln_2 = LayerNorm(d_model) + self.attn_mask = attn_mask + + def attention(self, x: torch.Tensor, attn_mask: torch.Tensor): + if attn_mask is None: + self.attn_mask = ( + self.attn_mask.to(dtype=x.dtype, device=x.device) + if self.attn_mask is not None + else None + ) + else: + self.attn_mask = attn_mask.to(dtype=x.dtype, device=x.device) + return self.attn(x, x, x, need_weights=False, attn_mask=self.attn_mask)[0] + + def forward(self, x: torch.Tensor, attn_mask: torch.Tensor): + x = x + self.attention(self.ln_1(x), attn_mask) + x = x + self.mlp(self.ln_2(x)) + return x + + +class Transformer(nn.Module): + def __init__( + self, width: int, layers: int, heads: int, attn_mask: torch.Tensor = None + ): + super().__init__() + self.width = width + self.layers = layers + self.attn_mask = attn_mask + self.resblocks = nn.Sequential( + *[ResidualAttentionBlock(width, heads, attn_mask) for _ in range(layers)] + ) + + def forward(self, x: torch.Tensor, attn_mask: torch.Tensor = None): + if attn_mask is None: + attn_mask = self.attn_mask + for block in self.resblocks: + x = block(x, attn_mask) + return x + + +class VisionTransformer(nn.Module): + def __init__( + self, + input_resolution: int, + patch_size: int, + width: int, + layers: int, + heads: int, + output_dim: int, + ): + super().__init__() + self.input_resolution = input_resolution + self.output_dim = output_dim + self.conv1 = nn.Conv2d( + in_channels=3, + out_channels=width, + kernel_size=patch_size, + stride=patch_size, + bias=False, + ) + + scale = width**-0.5 + self.class_embedding = nn.Parameter(scale * torch.randn(width)) + self.positional_embedding = nn.Parameter( + scale * torch.randn((input_resolution // patch_size) ** 2 + 1, width) + ) + self.ln_pre = LayerNorm(width) + + self.transformer = Transformer(width, layers, heads) + + self.ln_post = LayerNorm(width) + self.proj = nn.Parameter(scale * torch.randn(width, output_dim)) + + def forward(self, x: torch.Tensor, attn_mask: torch.Tensor = None): + x = self.conv1(x) # shape = [*, width, grid, grid] + x = x.reshape(x.shape[0], x.shape[1], -1) # shape = [*, width, grid ** 2] + x = x.permute(0, 2, 1) # shape = [*, grid ** 2, width] + x = torch.cat( + [ + self.class_embedding.to(x.dtype) + + torch.zeros( + x.shape[0], 1, x.shape[-1], dtype=x.dtype, device=x.device + ), + x, + ], + dim=1, + ) # shape = [*, grid ** 2 + 1, width] + x = x + self.positional_embedding.to(x.dtype) + x = self.ln_pre(x) + + x = x.permute(1, 0, 2) # NLD -> LND + x = self.transformer(x, attn_mask) + x = x.permute(1, 0, 2) # LND -> NLD + + x = self.ln_post(x[:, 0, :]) + + if self.proj is not None: + x = x @ self.proj + + return x + + +class CLIP(nn.Module): + def __init__( + self, + embed_dim: int, + # vision + image_resolution: int, + vision_layers: Union[Tuple[int, int, int, int], int], + vision_width: int, + vision_patch_size: int, + # text + context_length: int, + vocab_size: int, + transformer_width: int, + transformer_heads: int, + transformer_layers: int, + ): + super().__init__() + + self.context_length = context_length + + if isinstance(vision_layers, (tuple, list)): + vision_heads = vision_width * 32 // 64 + self.visual = ModifiedResNet( + layers=vision_layers, + output_dim=embed_dim, + heads=vision_heads, + input_resolution=image_resolution, + width=vision_width, + ) + else: + vision_heads = vision_width // 64 + self.visual = VisionTransformer( + input_resolution=image_resolution, + patch_size=vision_patch_size, + width=vision_width, + layers=vision_layers, + heads=vision_heads, + output_dim=embed_dim, + ) + + self.transformer = Transformer( + width=transformer_width, + layers=transformer_layers, + heads=transformer_heads, + attn_mask=self.build_attention_mask(), + ) + + self.vocab_size = vocab_size + self.token_embedding = nn.Embedding(vocab_size, transformer_width) + self.positional_embedding = nn.Parameter( + torch.empty(self.context_length, transformer_width) + ) + self.ln_final = LayerNorm(transformer_width) + + self.text_projection = nn.Parameter(torch.empty(transformer_width, embed_dim)) + self.logit_scale = nn.Parameter(torch.ones([]) * np.log(1 / 0.07)) + + self.initialize_parameters() + + def initialize_parameters(self): + nn.init.normal_(self.token_embedding.weight, std=0.02) + nn.init.normal_(self.positional_embedding, std=0.01) + + if isinstance(self.visual, ModifiedResNet): + if self.visual.attnpool is not None: + std = self.visual.attnpool.c_proj.in_features**-0.5 + nn.init.normal_(self.visual.attnpool.q_proj.weight, std=std) + nn.init.normal_(self.visual.attnpool.k_proj.weight, std=std) + nn.init.normal_(self.visual.attnpool.v_proj.weight, std=std) + nn.init.normal_(self.visual.attnpool.c_proj.weight, std=std) + + for resnet_block in [ + self.visual.layer1, + self.visual.layer2, + self.visual.layer3, + self.visual.layer4, + ]: + for name, param in resnet_block.named_parameters(): + if name.endswith("bn3.weight"): + nn.init.zeros_(param) + + proj_std = (self.transformer.width**-0.5) * ( + (2 * self.transformer.layers) ** -0.5 + ) + attn_std = self.transformer.width**-0.5 + fc_std = (2 * self.transformer.width) ** -0.5 + for block in self.transformer.resblocks: + nn.init.normal_(block.attn.in_proj_weight, std=attn_std) + nn.init.normal_(block.attn.out_proj.weight, std=proj_std) + nn.init.normal_(block.mlp.c_fc.weight, std=fc_std) + nn.init.normal_(block.mlp.c_proj.weight, std=proj_std) + + if self.text_projection is not None: + nn.init.normal_(self.text_projection, std=self.transformer.width**-0.5) + + def build_attention_mask(self): + # lazily create causal attention mask, with full attention between the vision tokens + # pytorch uses additive attention mask; fill with -inf + mask = torch.empty(self.context_length, self.context_length) + mask.fill_(float("-inf")) + mask.triu_(1) # zero out the lower diagonal + return mask + + @property + def dtype(self): + return self.visual.conv1.weight.dtype + + def encode_image(self, image): + return self.visual(image.type(self.dtype)) + + def encode_text(self, text): + x = self.token_embedding(text).type(self.dtype) # [batch_size, n_ctx, d_model] + + x = x + self.positional_embedding.type(self.dtype) + x = x.permute(1, 0, 2) # NLD -> LND + x = self.transformer(x) + x = x.permute(1, 0, 2) # LND -> NLD + x = self.ln_final(x).type(self.dtype) + + # x.shape = [batch_size, n_ctx, transformer.width] + # take features from the eot embedding (eot_token is the highest number in each sequence) + x = x[torch.arange(x.shape[0]), text.argmax(dim=-1)] @ self.text_projection + + return x + + def forward(self, image, text): + image_features = self.encode_image(image) + text_features = self.encode_text(text) + + # normalized features + image_features = image_features / image_features.norm(dim=-1, keepdim=True) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + + # cosine similarity as logits + logit_scale = self.logit_scale.exp() + logits_per_image = logit_scale * image_features @ text_features.t() + logits_per_text = logit_scale * text_features @ image_features.t() + + # shape = [global_batch_size, global_batch_size] + return logits_per_image, logits_per_text + + +def convert_weights(model: nn.Module): + """Convert applicable model parameters to fp16""" + + def _convert_weights_to_fp16(l): + if isinstance(l, (nn.Conv1d, nn.Conv2d, nn.Linear)): + l.weight.data = l.weight.data.half() + if l.bias is not None: + l.bias.data = l.bias.data.half() + + if isinstance(l, nn.MultiheadAttention): + for attr in [ + *[f"{s}_proj_weight" for s in ["in", "q", "k", "v"]], + "in_proj_bias", + "bias_k", + "bias_v", + ]: + tensor = getattr(l, attr) + if tensor is not None: + tensor.data = tensor.data.half() + + for name in ["text_projection", "proj"]: + if hasattr(l, name): + attr = getattr(l, name) + if attr is not None: + attr.data = attr.data.half() + + model.apply(_convert_weights_to_fp16) + + +def build_model(state_dict: dict): + vit = "visual.proj" in state_dict + + if vit: + vision_width = state_dict["visual.conv1.weight"].shape[0] + vision_layers = len( + [ + k + for k in state_dict.keys() + if k.startswith("visual.") and k.endswith(".attn.in_proj_weight") + ] + ) + vision_patch_size = state_dict["visual.conv1.weight"].shape[-1] + grid_size = round( + (state_dict["visual.positional_embedding"].shape[0] - 1) ** 0.5 + ) + image_resolution = vision_patch_size * grid_size + else: + counts: list = [ + len( + set( + k.split(".")[2] + for k in state_dict + if k.startswith(f"visual.layer{b}") + ) + ) + for b in [1, 2, 3, 4] + ] + vision_layers = tuple(counts) + vision_width = state_dict["visual.layer1.0.conv1.weight"].shape[0] + output_width = round( + (state_dict["visual.attnpool.positional_embedding"].shape[0] - 1) ** 0.5 + ) + vision_patch_size = None + assert ( + output_width**2 + 1 + == state_dict["visual.attnpool.positional_embedding"].shape[0] + ) + image_resolution = output_width * 32 + + embed_dim = state_dict["text_projection"].shape[1] + context_length = state_dict["positional_embedding"].shape[0] + vocab_size = state_dict["token_embedding.weight"].shape[0] + transformer_width = state_dict["ln_final.weight"].shape[0] + transformer_heads = transformer_width // 64 + transformer_layers = len( + set( + k.split(".")[2] + for k in state_dict + if k.startswith(f"transformer.resblocks") + ) + ) + + model = CLIP( + embed_dim, + image_resolution, + vision_layers, + vision_width, + vision_patch_size, + context_length, + vocab_size, + transformer_width, + transformer_heads, + transformer_layers, + ) + + for key in ["input_resolution", "context_length", "vocab_size"]: + if key in state_dict: + del state_dict[key] + + convert_weights(model) + model.load_state_dict(state_dict) + return model.eval() diff --git a/detectors/P2G/src/models/clip/prompt_learner.py b/detectors/P2G/src/models/clip/prompt_learner.py new file mode 100644 index 0000000000000000000000000000000000000000..ede9c797cde7b238f3e3917afb6af5c4884d0298 --- /dev/null +++ b/detectors/P2G/src/models/clip/prompt_learner.py @@ -0,0 +1,73 @@ +import torch +import torch.nn as nn + +from models.clip import clip +from models.clip.simple_tokenizer import SimpleTokenizer as _Tokenizer + +_tokenizer = _Tokenizer() + + +def load_clip_to_cpu(cfg): + backbone_name = cfg.backbonename + url = clip._MODELS[backbone_name] + model_path = clip._download(url) + + try: + # loading JIT archive + model = torch.jit.load(model_path, map_location="cpu").eval() + state_dict = None + + except RuntimeError: + state_dict = torch.load(model_path, map_location="cpu") + + model = clip.build_model(state_dict or model.state_dict()) + + return model + + +class PromptLearner(nn.Module): + def __init__(self, cfg, clip_model, k): + super().__init__() + positional_embedding = clip_model.positional_embedding + + assert k >= 1, "K should be bigger than 0" + + self.K = k # the number of prompt pair + self.dtype = clip_model.dtype + self.d_t = clip_model.ln_final.weight.shape[0] # 512 + self.d_v = 768 + + clip_imsize = clip_model.visual.input_resolution # 224 + cfg_imsize = cfg.INPUTSIZE[0] # (224, 224)[0] + assert ( + cfg_imsize == clip_imsize + ), f"cfg_imsize ({cfg_imsize}) must equal to clip_imsize ({clip_imsize})" + + self.initialization_token(clip_model) + + def initialization_token(self, clip_model): + # text token initialization + text_token = clip_model.token_embedding(torch.tensor([49407])) + text_token = text_token.repeat(self.K, 1) + text_noise = torch.randn(self.K, self.d_t) + text_noise = text_noise / text_noise.norm(dim=-1, keepdim=True) + text_token += 0.1 * text_noise + text_token = text_token.type(self.dtype) + self.text_prompt = nn.Parameter(text_token) + + # visual token initialization + visual_token = clip_model.visual.class_embedding + visual_token = visual_token.repeat(self.K, 1) + visual_noise = torch.randn(self.K, self.d_v) + visual_noise = visual_noise / visual_noise.norm(dim=-1, keepdim=True) + visual_token += 0.1 * visual_noise + visual_token = visual_token.type(self.dtype) + self.img_prompt = nn.Parameter(visual_token) + + def forward(self): + return self.text_prompt, self.img_prompt + + +class cfgc(object): + backbonename = "ViT-B/16" + INPUTSIZE = (224, 224) diff --git a/detectors/P2G/src/models/clip/simple_tokenizer.py b/detectors/P2G/src/models/clip/simple_tokenizer.py new file mode 100644 index 0000000000000000000000000000000000000000..7487823a9f4e8e11899a8c041af7729149de8644 --- /dev/null +++ b/detectors/P2G/src/models/clip/simple_tokenizer.py @@ -0,0 +1,150 @@ +import gzip +import html +import os +from functools import lru_cache + +import ftfy +import regex as re + + +@lru_cache() +def default_bpe(): + return os.path.join( + os.path.dirname(os.path.abspath(__file__)), "bpe_simple_vocab_16e6.txt.gz" + ) + + +@lru_cache() +def bytes_to_unicode(): + """ + Returns list of utf-8 byte and a corresponding list of unicode strings. + The reversible bpe codes work on unicode strings. + This means you need a large # of unicode characters in your vocab if you want to avoid UNKs. + When you're at something like a 10B token dataset you end up needing around 5K for decent coverage. + This is a signficant percentage of your normal, say, 32K bpe vocab. + To avoid that, we want lookup tables between utf-8 bytes and unicode strings. + And avoids mapping to whitespace/control characters the bpe code barfs on. + """ + bs = ( + list(range(ord("!"), ord("~") + 1)) + + list(range(ord("¡"), ord("¬") + 1)) + + list(range(ord("®"), ord("ÿ") + 1)) + ) + cs = bs[:] + n = 0 + for b in range(2**8): + if b not in bs: + bs.append(b) + cs.append(2**8 + n) + n += 1 + cs = [chr(n) for n in cs] + return dict(zip(bs, cs)) + + +def get_pairs(word): + """Return set of symbol pairs in a word. + Word is represented as tuple of symbols (symbols being variable-length strings). + """ + pairs = set() + prev_char = word[0] + for char in word[1:]: + pairs.add((prev_char, char)) + prev_char = char + return pairs + + +def basic_clean(text): + text = ftfy.fix_text(text) + text = html.unescape(html.unescape(text)) + return text.strip() + + +def whitespace_clean(text): + text = re.sub(r"\s+", " ", text) + text = text.strip() + return text + + +class SimpleTokenizer(object): + def __init__(self, bpe_path: str = default_bpe()): + self.byte_encoder = bytes_to_unicode() + self.byte_decoder = {v: k for k, v in self.byte_encoder.items()} + merges = gzip.open(bpe_path).read().decode("utf-8").split("\n") + merges = merges[1 : 49152 - 256 - 2 + 1] + merges = [tuple(merge.split()) for merge in merges] + vocab = list(bytes_to_unicode().values()) + vocab = vocab + [v + "" for v in vocab] + for merge in merges: + vocab.append("".join(merge)) + vocab.extend(["<|startoftext|>", "<|endoftext|>"]) + self.encoder = dict(zip(vocab, range(len(vocab)))) + self.decoder = {v: k for k, v in self.encoder.items()} + self.bpe_ranks = dict(zip(merges, range(len(merges)))) + self.cache = { + "<|startoftext|>": "<|startoftext|>", + "<|endoftext|>": "<|endoftext|>", + } + self.pat = re.compile( + r"""<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+""", + re.IGNORECASE, + ) + + def bpe(self, token): + if token in self.cache: + return self.cache[token] + word = tuple(token[:-1]) + (token[-1] + "",) + pairs = get_pairs(word) + + if not pairs: + return token + "" + + while True: + bigram = min(pairs, key=lambda pair: self.bpe_ranks.get(pair, float("inf"))) + if bigram not in self.bpe_ranks: + break + first, second = bigram + new_word = [] + i = 0 + while i < len(word): + try: + j = word.index(first, i) + new_word.extend(word[i:j]) + i = j + except: + new_word.extend(word[i:]) + break + + if word[i] == first and i < len(word) - 1 and word[i + 1] == second: + new_word.append(first + second) + i += 2 + else: + new_word.append(word[i]) + i += 1 + new_word = tuple(new_word) + word = new_word + if len(word) == 1: + break + else: + pairs = get_pairs(word) + word = " ".join(word) + self.cache[token] = word + return word + + def encode(self, text): + bpe_tokens = [] + text = whitespace_clean(basic_clean(text)).lower() + for token in re.findall(self.pat, text): + token = "".join(self.byte_encoder[b] for b in token.encode("utf-8")) + bpe_tokens.extend( + self.encoder[bpe_token] for bpe_token in self.bpe(token).split(" ") + ) + return bpe_tokens + + def decode(self, tokens): + text = "".join([self.decoder[token] for token in tokens]) + text = ( + bytearray([self.byte_decoder[c] for c in text]) + .decode("utf-8", errors="replace") + .replace("", " ") + ) + return text diff --git a/detectors/P2G/src/models/slinet.py b/detectors/P2G/src/models/slinet.py new file mode 100644 index 0000000000000000000000000000000000000000..a33a52185d6fac2409829d8209ccb082713606e9 --- /dev/null +++ b/detectors/P2G/src/models/slinet.py @@ -0,0 +1,538 @@ +import torch +import torch.nn as nn +import copy +from einops import rearrange, reduce + +from models.clip import clip +from models.clip.prompt_learner import cfgc, load_clip_to_cpu, PromptLearner +from utils.class_names import cddb_classnames + +import logging + +import os + +os.environ["CUDA_LAUNCH_BLOCKING"] = "1" + + +class SliNet(nn.Module): + + def __init__(self, args): + super(SliNet, self).__init__() + self.args = args + self.cfg = cfgc() + self.logging_cfg() + + # Load and configure CLIP model + clip_model = load_clip_to_cpu(self.cfg) + if args["precision"] == "fp32": + clip_model.float() + self.clip_model = clip_model + + # Set general parameters + self.K = args["K"] + self.device = args["device"] + self.topk_classes = args["topk_classes"] + + # Set ensembling parameters for object classes, not the prediction ensembling (for that see the evaluation part) + if self.topk_classes > 1: + ( + self.ensemble_token_embedding, + self.ensemble_before_cosine_sim, + self.ensemble_after_cosine_sim, + self.confidence_score_enable, + ) = args["ensembling"] + else: + self.ensemble_token_embedding = self.ensemble_before_cosine_sim = self.ensemble_after_cosine_sim = self.confidence_score_enable = False + + # Set text encoder components + self.token_embedding = clip_model.token_embedding + self.text_pos_embedding = clip_model.positional_embedding + self.text_transformers = clip_model.transformer + self.text_ln_final = clip_model.ln_final + self.text_proj = clip_model.text_projection + + # Set vision encoder components + self.img_patch_embedding = clip_model.visual.conv1 + self.img_cls_embedding = clip_model.visual.class_embedding + self.img_pos_embedding = clip_model.visual.positional_embedding + self.img_pre_ln = clip_model.visual.ln_pre + self.img_transformer = clip_model.visual.transformer + self.img_post_ln = clip_model.visual.ln_post + self.img_proj = clip_model.visual.proj + + # Set logit and dtype + self.logit_scale = clip_model.logit_scale + self.dtype = clip_model.dtype + + # Set continual learning parameters + self.class_num = 1 + self.numtask = 0 + + # Set up prompt learner and masks + self.prompt_learner = nn.ModuleList() + if args["dataset"] == "cddb": + for i in range(len(args["task_name"])): + self.prompt_learner.append(PromptLearner(self.cfg, clip_model, self.K)) + self.make_prompts( + [ + "a photo of a _ image.".replace("_", c) + for c in list(cddb_classnames.values()) + ] + ) + self.class_num = 2 + elif args["dataset"] == "TrueFake": + for i in range(len(args["task_name"])): + self.prompt_learner.append(PromptLearner(self.cfg, clip_model, self.K)) + self.make_prompts( + [ + "a photo of a _ image.".replace("_", c) + for c in list(cddb_classnames.values()) + ] + ) + self.class_num = 2 + else: + raise ValueError("Unknown datasets: {}.".format(args["dataset"])) + self.define_mask() + + def make_prompts(self, prompts): + with torch.no_grad(): + tmp = torch.cat([clip.tokenize(p) for p in prompts]).clone() + tmp = tmp.to('cuda:0') + tmp = tmp.to(next(self.clip_model.parameters()).device) # CLIP on CPU at the beginning, after in GPU + self.text_tokenized = tmp + self.text_x = self.token_embedding(self.text_tokenized).type( + self.dtype + ) + self.text_pos_embedding.type(self.dtype) + self.len_prompts = self.text_tokenized.argmax(dim=-1) + 1 + + def define_mask(self): + len_max = 77 + attn_head = 8 + + # text encoder mask + num_masks = len(self.len_prompts) * attn_head + text_mask = torch.full((num_masks, len_max, len_max), float("-inf")) + + for i, idx in enumerate(self.len_prompts): + mask = torch.full((len_max, len_max), float("-inf")) + mask.triu_(1) # zero out the lower diagonal + mask[:, idx:].fill_(float("-inf")) + text_mask[i * attn_head : (i + 1) * attn_head] = mask + + self.text_mask = text_mask + + # image encoder mask + att_size = 1 + 14 * 14 + self.K + visual_mask = torch.zeros((att_size, att_size), dtype=self.dtype, requires_grad=False) + visual_mask[:, -1 * self.K :] = float("-inf") + self.visual_mask = visual_mask + + def get_none_attn_mask(self, att_size: int): # correspond to a None attn_mask + return torch.zeros((att_size, att_size), dtype=self.dtype, requires_grad=False) + + @property + def feature_dim(self): + return self.clip_model.visual.output_dim + + def extract_vector(self, image): + # only image without prompts + image_features = self.clip_model.visual( + image.type(self.dtype), self.get_none_attn_mask(att_size=1 + 14 * 14) + ) + image_features = image_features / image_features.norm(dim=-1, keepdim=True) + + return image_features + + def generate_prompts_from_input(self, object_labels): + assert self.topk_classes <= 5 # maximum topk values from CLIP Zeroshot, hardcoded value based on our initial settings + labels, scores = zip(*object_labels) + labels_by_position_lists = [ + list(group) for group in zip(*labels[: self.topk_classes]) + ] + + if self.confidence_score_enable: + self.score_weights_labels = ( + (torch.stack(scores[: self.topk_classes]) / 100) + .t() + .unsqueeze(1) + .expand(-1, 2, -1) + .to(self.device) + .half() + ) + self.score_weights_labels = ( + self.score_weights_labels + / self.score_weights_labels.sum(dim=-1, keepdim=True) + ) # normalize + + if self.topk_classes > 0: + # Top1 object label to text + if self.topk_classes == 1: + prompts = [ + f"a {type_image} photo of a {label[0]}." + for label in labels_by_position_lists + for type_image in cddb_classnames.values() + ] # * [N = B*2 = 256] + self.make_prompts(prompts) + # Topk object label to text + else: + prompts = [ + f"a {type_image} photo of a {topk}." + for label in labels_by_position_lists + for type_image in cddb_classnames.values() + for topk in label + ] + if self.ensemble_token_embedding: + assert ( + self.ensemble_before_cosine_sim == False + and self.ensemble_after_cosine_sim == False + ) + with torch.no_grad(): + self.text_tokenized = torch.cat( + [clip.tokenize(p) for p in prompts] + ).to( + next(self.clip_model.parameters()).device + ) # CLIP on CPU at the beginning, after in GPU + self.text_x = self.token_embedding(self.text_tokenized).type( + self.dtype + ) + self.text_pos_embedding.type(self.dtype) + self.len_prompts = torch.cat( + [ + self.text_tokenized[i : i + self.topk_classes] + .argmax(dim=-1) + .max() + .unsqueeze(0) + + 1 + for i in range( + 0, len(self.text_tokenized), self.topk_classes + ) + ] + ) + # * B = batch | L = label (real/fake) | O = object labels (topk) | M = len_max 77 | D = dimension 512 *# + self.text_x = rearrange( + self.text_x, + "(b l o) m d -> b l o m d", + b=len(labels_by_position_lists), + l=len(cddb_classnames.values()), + o=self.topk_classes, + ) + self.text_x = reduce(self.text_x, "b l o m d -> b l m d", "mean") + self.text_x = rearrange(self.text_x, "b l m d -> (b l) m d") + else: + self.make_prompts(prompts) + + # Real/fake image prompts without object labels + else: + # emulate top1 prompts generation, generate batch size numbers prompts * 2 (real/fake) + prompts = [ + f"a photo of a {type_image} image." + for i in range(len(object_labels[0][0])) + for type_image in cddb_classnames.values() + ] + self.make_prompts(prompts) + self.define_mask() + + def image_encoder(self, image, image_prompt): + batch_size = image.shape[0] + visual_mask = self.visual_mask + + # training and inference may have different image_prompt shape + if image_prompt.dim() == 2: + image_prompt = image_prompt.repeat(batch_size, 1, 1) + + # forward propagate image features with token concatenation + image_embedding = self.img_patch_embedding( + image.type(self.dtype) + ) # (batch_size, h_dim, 7, 7) + image_embedding = image_embedding.reshape( + batch_size, image_embedding.shape[1], -1 + ) + image_embedding = image_embedding.permute(0, 2, 1) # (batch_size, 49, h_dim) + image_embedding = torch.cat( + [ + self.img_cls_embedding.repeat(batch_size, 1, 1).type(self.dtype), + image_embedding, + ], + dim=1, + ) # 16 (batch_size, 50, h_dim) + img_x = image_embedding + self.img_pos_embedding.type(self.dtype) # (N,L,D) + # concatenation the token on visual encoder + img_x = torch.cat([img_x, image_prompt], dim=1) + # image encoder + img_x = self.img_pre_ln(img_x) + img_x = img_x.permute(1, 0, 2) + img_x = self.img_transformer(img_x, visual_mask) + img_x = img_x.permute(1, 0, 2) + img_f = self.img_post_ln(img_x[:, -1 * self.K :, :]) @ self.img_proj + i_f = self.img_post_ln(img_x[:, 0, :]) @ self.img_proj + + """ + img_f: only K prompts + i_f: img fts without K prompts + """ + return img_f, i_f + + def text_encoder(self, text_prompt): + text_x = self.text_x # * [N, L = 77, D = 512] + text_mask = self.text_mask # * [N * ATTN_HEAD = 8, 77, 77] + text_x = text_x.to(self.device) + + for i in range(self.K): + text_x[torch.arange(text_x.shape[0]), self.len_prompts + i, :] = ( + text_prompt[i, :].repeat(text_x.shape[0], 1) + ) + + text_x = text_x.permute(1, 0, 2) # * NLD -> LND + text_x = self.text_transformers(text_x, text_mask) # * [LND] + text_x = text_x.permute(1, 0, 2) # * [NLD] + text_x = self.text_ln_final(text_x).type(self.dtype) + + text_f = torch.empty( + text_x.shape[0], 0, 512, device=self.device, dtype=self.dtype + ) # * [N0D] + for i in range(self.K): + idx = self.len_prompts + i + x = text_x[torch.arange(text_x.shape[0]), idx] + text_f = torch.cat([text_f, x[:, None, :]], dim=1) + + text_f = text_f @ self.text_proj # * [NKD] + t_f = None + # t_f = text_x[torch.arange(text_x.shape[0]), self.text_tokenized.argmax(dim=-1)] @ self.text_proj # [ND] + + if self.ensemble_before_cosine_sim: + assert ( + self.ensemble_token_embedding == False + and self.ensemble_after_cosine_sim == False + ) + batch_size = self.text_x.shape[0] // ( + len(cddb_classnames.values()) * self.topk_classes + ) + # * B = batch | L = label (real/fake) | O = object labels (topk) | K = k learnable prompts | D = dimension 512 *# + text_f = rearrange( + text_f, + "(b l o) k d -> b l o k d", + b=batch_size, + l=len(cddb_classnames.values()), + o=self.topk_classes, + ) + text_f = reduce(text_f, "b l o k d -> b l k d", "mean") + text_f = rearrange(text_f, "b l k d -> (b l) k d") + + """ + text_f: only K prompts + t_f: text fts without K prompts + """ + return text_f, t_f + + def forward(self, image, object_labels): + ## * B = batch | N = B*2 = num prompts | D = text features | F = image features | P = prompt per image + text_prompt, image_prompt = self.prompt_learner[self.numtask - 1]() # * [KD], [KF] + self.generate_prompts_from_input(object_labels) + + text_f, _ = self.text_encoder(text_prompt) # * [NKD] + img_f, _ = self.image_encoder(image, image_prompt) # * [BKD] + + text_f = text_f / text_f.norm(dim=-1, keepdim=True) + img_f = img_f / img_f.norm(dim=-1, keepdim=True) + + logits = self.training_cosine_similarity(text_f, img_f) + + return {"logits": logits} + + def training_cosine_similarity(self, text_f, img_f): + if self.ensemble_after_cosine_sim: + assert ( + self.ensemble_before_cosine_sim == False + and self.ensemble_token_embedding == False + ) + # * B = batch | L = label (real/fake) | O = object labels (topk) | K = k learnable prompts | D = dimension 512 *# + text_f = rearrange( + text_f, + "(b l o) k d -> b l o k d", + b=img_f.shape[0], + l=len(cddb_classnames.values()), + o=self.topk_classes, + ) + logits = torch.zeros( + img_f.shape[0], text_f.shape[1], device=self.device + ) # * [BP] + + for i in range(self.K): + i_img_f = img_f[:, i, :] # * [BD] + i_text_f = text_f[:, :, :, i, :] # * [BLOD] + logit = torch.einsum("bd,blod->blo", i_img_f, i_text_f) # * [BLO] + if self.confidence_score_enable: + logit = torch.einsum( + "blo,blo->bl", logit, self.score_weights_labels + ) + else: + logit = reduce(logit, "b l o -> b l", "mean") # * [BL] + logit = self.logit_scale.exp() * logit + logits += logit + logits /= self.K + + else: # default case + text_f = rearrange( + text_f, + "(b p) k d -> b p k d", + b=img_f.shape[0], + p=len(cddb_classnames.values()), + ) + logits = torch.zeros( + img_f.shape[0], text_f.shape[1], device=self.device + ) # * [BP] + + for i in range(self.K): + i_img_f = img_f[:, i, :] # * [BD] + i_text_f = text_f[:, :, i, :] # * [BPD] + logit = torch.einsum("bd,bpd->bp", i_img_f, i_text_f) # * [BP] + logit = self.logit_scale.exp() * logit + logits += logit + logits /= self.K + + return logits + + def interface(self, image, object_labels, total_tasks, keys_dict): + ## * B = batch | N = B*2 = num prompts | D = text features | F = image features | P = prompt per image | K = k learnable prompt for each task | T = task + self.total_tasks = total_tasks + img_prompts = torch.cat( + [ + learner.img_prompt + for idx, learner in enumerate(self.prompt_learner) + if idx < self.total_tasks + ] + ) # * [K*T,D] + text_prompts = torch.cat( + [ + learner.text_prompt + for idx, learner in enumerate(self.prompt_learner) + if idx < self.total_tasks + ] + ) # * [K*T,F] + + self.K = self.K * self.total_tasks # make appropriate masks + self.generate_prompts_from_input(object_labels) + + text_f, _ = self.text_encoder(text_prompts) # * [N,K*T,D] + img_f, i_f = self.image_encoder(image, img_prompts) # * [B,K*T,D] , [B,D] + + prob_dist_dict = { + "real_prob_dist": self.convert_to_prob_distribution( + keys_dict["real_keys_one_cluster"], i_f + ), + "fake_prob_dist": self.convert_to_prob_distribution( + keys_dict["fake_keys_one_cluster"], i_f + ), + "keys_prob_dist": self.convert_to_prob_distribution( + keys_dict["all_keys_one_cluster"], i_f + ), + "upperbound_dist": keys_dict["upperbound"], + } + + selection_mapping = { + "fake": "fake_prob_dist", + "real": "real_prob_dist", + "all": "keys_prob_dist", + "upperbound": "upperbound_dist", + } + + self.prototype_selection = selection_mapping.get(keys_dict["prototype"], None) + + text_f = text_f / text_f.norm(dim=-1, keepdim=True) + img_f = img_f / img_f.norm(dim=-1, keepdim=True) + + self.K = ( + self.K // self.total_tasks + ) # restore K to original value for cosine similarity + logits = self.inference_cosine_similarity( + text_f, img_f, prob_dist_dict + ) # * [B,T,P] + logits = logits + return logits + + def convert_to_prob_distribution(self, keys, i_f): + domain_cls = torch.einsum("bd,td->bt", i_f, keys) + domain_cls = nn.functional.softmax(domain_cls, dim=1) + return domain_cls + + def inference_cosine_similarity(self, text_f, img_f, prob_dist_dict): + if self.ensemble_after_cosine_sim: + assert ( + self.ensemble_before_cosine_sim == False + and self.ensemble_token_embedding == False + ) + text_f = rearrange( + text_f, + "(b l o) k d -> b l o k d", + b=img_f.shape[0], + l=len(cddb_classnames.values()), + o=self.topk_classes, + ) + logits = [] + for t in range(self.total_tasks): + logits_tmp = torch.zeros(img_f.shape[0], text_f.shape[1], device=self.device) # * [B,P] + + t_img_domain_cls = prob_dist_dict[self.prototype_selection][:, t].unsqueeze(-1) # * [B, 1] + t_text_domain_cls = t_img_domain_cls.unsqueeze(-1).unsqueeze(-1) + + for k in range(self.K): + offset = k + t * self.K + i_img_f = img_f[:, offset, :] * t_img_domain_cls # * [B,D] + i_text_f = (text_f[:, :, :, offset, :] * t_text_domain_cls) # * [B,P,D] + logit = torch.einsum("bd,blod->blo", i_img_f, i_text_f) + if self.confidence_score_enable: + logit = torch.einsum("blo,blo->bl", logit, self.score_weights_labels) + else: + logit = reduce(logit, "b l o -> b l", "mean") # * [B,P] + logit = self.logit_scale.exp() * logit + logits_tmp += logit + logits_tmp /= self.K + logits.append(logits_tmp) + + else: + text_f = rearrange( + text_f, + "(b p) k d -> b p k d", + b=img_f.shape[0], + p=len(cddb_classnames.values()), + ) # * [B,P,K*T,D] + logits = [] + for t in range(self.total_tasks): + logits_tmp = torch.zeros(img_f.shape[0], text_f.shape[1], device=self.device) # * [B,P] + + t_img_domain_cls = prob_dist_dict[self.prototype_selection][:, t].unsqueeze(-1) # * [B, 1] + t_text_domain_cls = t_img_domain_cls.unsqueeze(-1) # * [B, P, 1] + # t_text_domain_cls = stack_real_fake_prob[:,:,t].unsqueeze(-1) #* [B, P, 1] + + for k in range(self.K): + offset = k + t * self.K + i_img_f = img_f[:, offset, :] * t_img_domain_cls # * [B,D] + i_text_f = text_f[:, :, offset, :] * t_text_domain_cls # * [B,P,D] + logit = torch.einsum("bd,bpd->bp", i_img_f, i_text_f) # * [B,P] + logit = self.logit_scale.exp() * logit # * t_img_domain_cls + logits_tmp += logit + logits_tmp /= self.K + logits.append(logits_tmp) + logits = torch.stack(logits) # * [T,B,P] + logits = rearrange(logits, "t b p -> b t p") # * [B,T,P] + return logits + + def update_fc(self): + self.numtask += 1 + + def copy(self): + return copy.deepcopy(self) + + def freeze(self): + for param in self.parameters(): + param.requires_grad = False + self.eval() + return self + + def logging_cfg(self): + args = { + attr: getattr(self.cfg, attr) + for attr in dir(self.cfg) + if not attr.startswith("_") + } + for key, value in args.items(): + logging.info("CFG -> {}: {}".format(key, value)) diff --git a/detectors/P2G/src/models/slinet_det.py b/detectors/P2G/src/models/slinet_det.py new file mode 100644 index 0000000000000000000000000000000000000000..2f04bd5ed960ad0c22f5629d7ea131c9e9846ea5 --- /dev/null +++ b/detectors/P2G/src/models/slinet_det.py @@ -0,0 +1,500 @@ +import torch +import torch.nn as nn +import copy +from einops import rearrange, reduce + +from models.clip import clip +from models.clip.prompt_learner import cfgc, load_clip_to_cpu, PromptLearner +from utils.class_names import cddb_classnames + +import logging + +import os + +os.environ["CUDA_LAUNCH_BLOCKING"] = "1" + + +class SliNet(nn.Module): + + def __init__(self, args): + super(SliNet, self).__init__() + self.args = args + self.cfg = cfgc() + self.logging_cfg() + + # Load and configure CLIP model + clip_model = load_clip_to_cpu(self.cfg) + if args["precision"] == "fp32": + clip_model.float() + self.clip_model = clip_model + + # Set general parameters + self.K = args["K"] + self.device = args["device"] + self.topk_classes = args["topk_classes"] + + # Set ensembling parameters for object classes, not the prediction ensembling (for that see the evaluation part) + if self.topk_classes > 1: + ( + self.ensemble_token_embedding, + self.ensemble_before_cosine_sim, + self.ensemble_after_cosine_sim, + self.confidence_score_enable, + ) = args["ensembling"] + else: + self.ensemble_token_embedding = self.ensemble_before_cosine_sim = self.ensemble_after_cosine_sim = self.confidence_score_enable = False + + # Set text encoder components + self.token_embedding = clip_model.token_embedding + self.text_pos_embedding = clip_model.positional_embedding + self.text_transformers = clip_model.transformer + self.text_ln_final = clip_model.ln_final + self.text_proj = clip_model.text_projection + + # Set vision encoder components + self.img_patch_embedding = clip_model.visual.conv1 + self.img_cls_embedding = clip_model.visual.class_embedding + self.img_pos_embedding = clip_model.visual.positional_embedding + self.img_pre_ln = clip_model.visual.ln_pre + self.img_transformer = clip_model.visual.transformer + self.img_post_ln = clip_model.visual.ln_post + self.img_proj = clip_model.visual.proj + + # Set logit and dtype + self.logit_scale = clip_model.logit_scale + self.dtype = clip_model.dtype + + # Set continual learning parameters + self.class_num = 1 + self.numtask = 0 + + # Set up prompt learner and masks + self.prompt_learner = nn.ModuleList() + if args["dataset"] == "cddb": + for i in range(len(args["task_name"])): + self.prompt_learner.append(PromptLearner(self.cfg, clip_model, self.K)) + self.make_prompts( + [ + "a photo of a _ image.".replace("_", c) + for c in list(cddb_classnames.values()) + ] + ) + self.class_num = 2 + elif args["dataset"] == "TrueFake": + for i in range(len(args["task_name"])): + self.prompt_learner.append(PromptLearner(self.cfg, clip_model, self.K)) + self.make_prompts( + [ + "a photo of a _ image.".replace("_", c) + for c in list(cddb_classnames.values()) + ] + ) + self.class_num = 2 + else: + raise ValueError("Unknown datasets: {}.".format(args["dataset"])) + self.define_mask() + + def make_prompts(self, prompts): + with torch.no_grad(): + tmp = torch.cat([clip.tokenize(p) for p in prompts]).clone() + # tmp = tmp.to('cuda:0') # Removed hardcoded cuda + tmp = tmp.to(next(self.clip_model.parameters()).device) # CLIP on CPU at the beginning, after in GPU + self.text_tokenized = tmp + self.text_x = self.token_embedding(self.text_tokenized).type( + self.dtype + ) + self.text_pos_embedding.type(self.dtype) + self.len_prompts = self.text_tokenized.argmax(dim=-1) + 1 + + def define_mask(self): + len_max = 77 + attn_head = 8 + + # text encoder mask + num_masks = len(self.len_prompts) * attn_head + text_mask = torch.full((num_masks, len_max, len_max), float("-inf")) + + for i, idx in enumerate(self.len_prompts): + mask = torch.full((len_max, len_max), float("-inf")) + mask.triu_(1) # zero out the lower diagonal + mask[:, idx:].fill_(float("-inf")) + text_mask[i * attn_head : (i + 1) * attn_head] = mask + + self.text_mask = text_mask + + # image encoder mask + att_size = 1 + 14 * 14 + self.K + visual_mask = torch.zeros((att_size, att_size), dtype=self.dtype, requires_grad=False) + visual_mask[:, -1 * self.K :] = float("-inf") + self.visual_mask = visual_mask + + def get_none_attn_mask(self, att_size: int): # correspond to a None attn_mask + return torch.zeros((att_size, att_size), dtype=self.dtype, requires_grad=False) + + @property + def feature_dim(self): + return self.clip_model.visual.output_dim + + def extract_vector(self, image): + # only image without prompts + image_features = self.clip_model.visual( + image.type(self.dtype), self.get_none_attn_mask(att_size=1 + 14 * 14) + ) + image_features = image_features / image_features.norm(dim=-1, keepdim=True) + + return image_features + + def generate_prompts_from_input(self, object_labels): + assert self.topk_classes <= 5 # maximum topk values from CLIP Zeroshot, hardcoded value based on our initial settings + labels, scores = zip(*object_labels) + labels_by_position_lists = [ + list(group) for group in zip(*labels[: self.topk_classes]) + ] + + if self.confidence_score_enable: + self.score_weights_labels = ( + (torch.stack(scores[: self.topk_classes]) / 100) + .t() + .unsqueeze(1) + .expand(-1, 2, -1) + .to(self.device) + .half() + ) + self.score_weights_labels = ( + self.score_weights_labels + / self.score_weights_labels.sum(dim=-1, keepdim=True) + ) # normalize + + if self.topk_classes > 0: + # Top1 object label to text + if self.topk_classes == 1: + prompts = [ + f"a {type_image} photo of a {labels[0]}." + for type_image in cddb_classnames.values() + ] + self.make_prompts(prompts) + # Topk object label to text + else: + # labels: tuple of topk labels for the image + prompts = [ + f"a {type_image} photo of a {label}." + for type_image in cddb_classnames.values() + for label in labels[:self.topk_classes] + ] + self.make_prompts(prompts) + + # Real/fake image prompts without object labels + else: + # emulate top1 prompts generation, generate batch size numbers prompts * 2 (real/fake) + prompts = [ + f"a photo of a {type_image} image." + for i in range(len(object_labels[0][0])) + for type_image in cddb_classnames.values() + ] + self.make_prompts(prompts) + self.define_mask() + + def image_encoder(self, image, image_prompt): + batch_size = image.shape[0] + visual_mask = self.visual_mask + + # training and inference may have different image_prompt shape + if image_prompt.dim() == 2: + image_prompt = image_prompt.repeat(batch_size, 1, 1) + + # forward propagate image features with token concatenation + image_embedding = self.img_patch_embedding( + image.type(self.dtype) + ) # (batch_size, h_dim, 7, 7) + image_embedding = image_embedding.reshape( + batch_size, image_embedding.shape[1], -1 + ) + image_embedding = image_embedding.permute(0, 2, 1) # (batch_size, 49, h_dim) + image_embedding = torch.cat( + [ + self.img_cls_embedding.repeat(batch_size, 1, 1).type(self.dtype), + image_embedding, + ], + dim=1, + ) # 16 (batch_size, 50, h_dim) + img_x = image_embedding + self.img_pos_embedding.type(self.dtype) # (N,L,D) + # concatenation the token on visual encoder + img_x = torch.cat([img_x, image_prompt], dim=1) + # image encoder + img_x = self.img_pre_ln(img_x) + img_x = img_x.permute(1, 0, 2) + img_x = self.img_transformer(img_x, visual_mask) + img_x = img_x.permute(1, 0, 2) + img_f = self.img_post_ln(img_x[:, -1 * self.K :, :]) @ self.img_proj + i_f = self.img_post_ln(img_x[:, 0, :]) @ self.img_proj + + """ + img_f: only K prompts + i_f: img fts without K prompts + """ + return img_f, i_f + + def text_encoder(self, text_prompt): + text_x = self.text_x # * [N, L = 77, D = 512] + text_mask = self.text_mask # * [N * ATTN_HEAD = 8, 77, 77] + text_x = text_x.to(self.device) + + for i in range(self.K): + text_x[torch.arange(text_x.shape[0]), self.len_prompts + i, :] = ( + text_prompt[i, :].repeat(text_x.shape[0], 1) + ) + + text_x = text_x.permute(1, 0, 2) # * NLD -> LND + text_x = self.text_transformers(text_x, text_mask) # * [LND] + text_x = text_x.permute(1, 0, 2) # * [NLD] + text_x = self.text_ln_final(text_x).type(self.dtype) + + text_f = torch.empty( + text_x.shape[0], 0, 512, device=self.device, dtype=self.dtype + ) # * [N0D] + for i in range(self.K): + idx = self.len_prompts + i + x = text_x[torch.arange(text_x.shape[0]), idx] + text_f = torch.cat([text_f, x[:, None, :]], dim=1) + + text_f = text_f @ self.text_proj # * [NKD] + t_f = None + # t_f = text_x[torch.arange(text_x.shape[0]), self.text_tokenized.argmax(dim=-1)] @ self.text_proj # [ND] + + if self.ensemble_before_cosine_sim: + assert ( + self.ensemble_token_embedding == False + and self.ensemble_after_cosine_sim == False + ) + batch_size = self.text_x.shape[0] // ( + len(cddb_classnames.values()) * self.topk_classes + ) + # * B = batch | L = label (real/fake) | O = object labels (topk) | K = k learnable prompts | D = dimension 512 *# + text_f = rearrange( + text_f, + "(b l o) k d -> b l o k d", + b=batch_size, + l=len(cddb_classnames.values()), + o=self.topk_classes, + ) + text_f = reduce(text_f, "b l o k d -> b l k d", "mean") + text_f = rearrange(text_f, "b l k d -> (b l) k d") + + """ + text_f: only K prompts + t_f: text fts without K prompts + """ + return text_f, t_f + + def forward(self, image, object_labels): + ## * B = batch | N = B*2 = num prompts | D = text features | F = image features | P = prompt per image + text_prompt, image_prompt = self.prompt_learner[self.numtask - 1]() # * [KD], [KF] + self.generate_prompts_from_input(object_labels) + + text_f, _ = self.text_encoder(text_prompt) # * [NKD] + img_f, _ = self.image_encoder(image, image_prompt) # * [BKD] + + text_f = text_f / text_f.norm(dim=-1, keepdim=True) + img_f = img_f / img_f.norm(dim=-1, keepdim=True) + + logits = self.training_cosine_similarity(text_f, img_f) + + return {"logits": logits} + + def training_cosine_similarity(self, text_f, img_f): + if self.ensemble_after_cosine_sim: + assert ( + self.ensemble_before_cosine_sim == False + and self.ensemble_token_embedding == False + ) + # * B = batch | L = label (real/fake) | O = object labels (topk) | K = k learnable prompts | D = dimension 512 *# + text_f = rearrange( + text_f, + "(b l o) k d -> b l o k d", + b=img_f.shape[0], + l=len(cddb_classnames.values()), + o=self.topk_classes, + ) + logits = torch.zeros( + img_f.shape[0], text_f.shape[1], device=self.device + ) # * [BP] + + for i in range(self.K): + i_img_f = img_f[:, i, :] # * [BD] + i_text_f = text_f[:, :, :, i, :] # * [BLOD] + logit = torch.einsum("bd,blod->blo", i_img_f, i_text_f) # * [BLO] + if self.confidence_score_enable: + logit = torch.einsum( + "blo,blo->bl", logit, self.score_weights_labels + ) + else: + logit = reduce(logit, "b l o -> b l", "mean") # * [BL] + logit = self.logit_scale.exp() * logit + logits += logit + logits /= self.K + + else: # default case + text_f = rearrange( + text_f, + "(b p) k d -> b p k d", + b=img_f.shape[0], + p=len(cddb_classnames.values()), + ) + logits = torch.zeros( + img_f.shape[0], text_f.shape[1], device=self.device + ) # * [BP] + + for i in range(self.K): + i_img_f = img_f[:, i, :] # * [BD] + i_text_f = text_f[:, :, i, :] # * [BPD] + logit = torch.einsum("bd,bpd->bp", i_img_f, i_text_f) # * [BP] + logit = self.logit_scale.exp() * logit + logits += logit + logits /= self.K + + return logits + + def interface(self, image, object_labels, total_tasks, keys_dict): + ## * B = batch | N = B*2 = num prompts | D = text features | F = image features | P = prompt per image | K = k learnable prompt for each task | T = task + self.total_tasks = total_tasks + img_prompts = torch.cat( + [ + learner.img_prompt + for idx, learner in enumerate(self.prompt_learner) + if idx < self.total_tasks + ] + ) # * [K*T,D] + text_prompts = torch.cat( + [ + learner.text_prompt + for idx, learner in enumerate(self.prompt_learner) + if idx < self.total_tasks + ] + ) # * [K*T,F] + + self.K = self.K * self.total_tasks # make appropriate masks + self.generate_prompts_from_input(object_labels) + + text_f, _ = self.text_encoder(text_prompts) # * [N,K*T,D] + img_f, i_f = self.image_encoder(image, img_prompts) # * [B,K*T,D] , [B,D] + + prob_dist_dict = { + "real_prob_dist": self.convert_to_prob_distribution( + keys_dict["real_keys_one_cluster"], i_f + ), + "fake_prob_dist": self.convert_to_prob_distribution( + keys_dict["fake_keys_one_cluster"], i_f + ), + "keys_prob_dist": self.convert_to_prob_distribution( + keys_dict["all_keys_one_cluster"], i_f + ), + "upperbound_dist": keys_dict["upperbound"], + } + + selection_mapping = { + "fake": "fake_prob_dist", + "real": "real_prob_dist", + "all": "keys_prob_dist", + "upperbound": "upperbound_dist", + } + + self.prototype_selection = selection_mapping.get(keys_dict["prototype"], None) + + text_f = text_f / text_f.norm(dim=-1, keepdim=True) + img_f = img_f / img_f.norm(dim=-1, keepdim=True) + + self.K = ( + self.K // self.total_tasks + ) # restore K to original value for cosine similarity + logits = self.inference_cosine_similarity( + text_f, img_f, prob_dist_dict + ) # * [B,T,P] + logits = logits + return logits + + def convert_to_prob_distribution(self, keys, i_f): + domain_cls = torch.einsum("bd,td->bt", i_f, keys) + domain_cls = nn.functional.softmax(domain_cls, dim=1) + return domain_cls + + def inference_cosine_similarity(self, text_f, img_f, prob_dist_dict): + if self.ensemble_after_cosine_sim: + assert ( + self.ensemble_before_cosine_sim == False + and self.ensemble_token_embedding == False + ) + text_f = rearrange( + text_f, + "(b l o) k d -> b l o k d", + b=img_f.shape[0], + l=len(cddb_classnames.values()), + o=self.topk_classes, + ) + logits = [] + for t in range(self.total_tasks): + logits_tmp = torch.zeros(img_f.shape[0], text_f.shape[1], device=self.device) # * [B,P] + + t_img_domain_cls = prob_dist_dict[self.prototype_selection][:, t].unsqueeze(-1) # * [B, 1] + t_text_domain_cls = t_img_domain_cls.unsqueeze(-1).unsqueeze(-1) + + for k in range(self.K): + offset = k + t * self.K + i_img_f = img_f[:, offset, :] * t_img_domain_cls # * [B,D] + i_text_f = (text_f[:, :, :, offset, :] * t_text_domain_cls) # * [B,P,D] + logit = torch.einsum("bd,blod->blo", i_img_f, i_text_f) + if self.confidence_score_enable: + logit = torch.einsum("blo,blo->bl", logit, self.score_weights_labels) + else: + logit = reduce(logit, "b l o -> b l", "mean") # * [B,P] + logit = self.logit_scale.exp() * logit + logits_tmp += logit + logits_tmp /= self.K + logits.append(logits_tmp) + + else: + text_f = rearrange( + text_f, + "(b p) k d -> b p k d", + b=img_f.shape[0], + p=len(cddb_classnames.values()), + ) # * [B,P,K*T,D] + logits = [] + for t in range(self.total_tasks): + logits_tmp = torch.zeros(img_f.shape[0], text_f.shape[1], device=self.device) # * [B,P] + + t_img_domain_cls = prob_dist_dict[self.prototype_selection][:, t].unsqueeze(-1) # * [B, 1] + t_text_domain_cls = t_img_domain_cls.unsqueeze(-1) # * [B, P, 1] + # t_text_domain_cls = stack_real_fake_prob[:,:,t].unsqueeze(-1) #* [B, P, 1] + + for k in range(self.K): + offset = k + t * self.K + i_img_f = img_f[:, offset, :] * t_img_domain_cls # * [B,D] + i_text_f = text_f[:, :, offset, :] * t_text_domain_cls # * [B,P,D] + logit = torch.einsum("bd,bpd->bp", i_img_f, i_text_f) # * [B,P] + logit = self.logit_scale.exp() * logit # * t_img_domain_cls + logits_tmp += logit + logits_tmp /= self.K + logits.append(logits_tmp) + logits = torch.stack(logits) # * [T,B,P] + logits = rearrange(logits, "t b p -> b t p") # * [B,T,P] + return logits + + def update_fc(self): + self.numtask += 1 + + def copy(self): + return copy.deepcopy(self) + + def freeze(self): + for param in self.parameters(): + param.requires_grad = False + self.eval() + return self + + def logging_cfg(self): + args = { + attr: getattr(self.cfg, attr) + for attr in dir(self.cfg) + if not attr.startswith("_") + } + for key, value in args.items(): + logging.info("CFG -> {}: {}".format(key, value)) diff --git a/detectors/P2G/src/train.py b/detectors/P2G/src/train.py new file mode 100644 index 0000000000000000000000000000000000000000..3de9b577d2d72e5a278a536ad6a84f73b364544e --- /dev/null +++ b/detectors/P2G/src/train.py @@ -0,0 +1,89 @@ +import json +import argparse +import ast +from trainer import train + + +def main(): + args = setup_parser().parse_args() + param = load_json(args.config) + args = vars(args) # Converting argparse Namespace to a dict. + param.update(args) # Add parameters from json + train(param) + + +def load_json(settings_path) -> dict: + with open(settings_path) as data_file: + param = json.load(data_file) + + return param + + +def setup_parser(): + parser = argparse.ArgumentParser(description="Prompt2Guard - training part.") + parser.add_argument( + "--config", + type=str, + default="./configs/cddb_training.json", + help="Json file of settings.", + ) + parser.add_argument( + "--K", type=int, default=argparse.SUPPRESS, help="Number of prompts." + ) + parser.add_argument( + "--batch_size", + type=int, + default=argparse.SUPPRESS, + help="Batch size for training.", + ) + parser.add_argument( + "--batch_size_eval", + type=int, + default=argparse.SUPPRESS, + help="Batch size for evaluation.", + ) + parser.add_argument( + "--torch_seed", + type=int, + default=argparse.SUPPRESS, + help="Seed for PyTorch random number generator.", + ) + parser.add_argument( + "--lrate", type=float, default=argparse.SUPPRESS, help="LR for task > 0." + ) + parser.add_argument( + "--init_lr", + type=float, + default=argparse.SUPPRESS, + help="Initial LR for task 0.", + ) + parser.add_argument( + "--epochs", + type=int, + default=argparse.SUPPRESS, + help="Epochs for the other tasks.", + ) + parser.add_argument( + "--wandb", action="store_true", help="Enable Weights & Biases logging." + ) + + parser.add_argument( + "--warmup_epoch", + type=int, + default=argparse.SUPPRESS, + help="Number of warmup epochs.", + ) + parser.add_argument( + "--topk_classes", type=int, default=argparse.SUPPRESS, help="TopK classes." + ) + parser.add_argument( + "--ensembling", + type=ast.literal_eval, + default=argparse.SUPPRESS, + help="List of boolean values for ensembling.", + ) + return parser + + +if __name__ == "__main__": + main() diff --git a/detectors/P2G/src/trainer.py b/detectors/P2G/src/trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..f6947a6b112ae61f8f6cf6b14e31cec85d4bfe3b --- /dev/null +++ b/detectors/P2G/src/trainer.py @@ -0,0 +1,122 @@ +import os +import os.path +import sys +import logging +import time +import torch +from utils.data_manager import DataManager +from utils.toolkit import count_parameters +from methods.prompt2guard import Prompt2Guard +import numpy as np + + +def train(args): + logfilename = "logs/{}/{}".format( + args["run_name"].replace("_", "/"), + time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime()), + ) + os.makedirs(logfilename) + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(filename)s] => %(message)s", + handlers=[ + logging.FileHandler(filename=logfilename + "/info.log"), + logging.StreamHandler(sys.stdout), + ], + ) + os.makedirs(f'./checkpoint/{args["run_name"]}/weights', exist_ok=True) + + _set_random(args) + print_args(args) + + data_manager = DataManager( + args["dataset"], + args["shuffle"], + args["seed"], + args["init_cls"], + args["increment"], + args, + ) + args["class_order"] = data_manager._class_order + args["filename"] = os.path.join(logfilename, "task") + model = Prompt2Guard(args) + + acc_matrix = { + "top1": np.zeros((data_manager.nb_tasks, data_manager.nb_tasks)), + "mean": np.zeros((data_manager.nb_tasks, data_manager.nb_tasks)), + "mix_top_mean": np.zeros((data_manager.nb_tasks, data_manager.nb_tasks)), + } + label_history = [] + + for task in range(data_manager.nb_tasks): + logging.info("All params: {}".format(count_parameters(model.network))) + logging.info("Trainable params: {}".format(count_parameters(model.network, True))) + model.incremental_train(data_manager) + record_task_accuracy(task, model.eval_task(), acc_matrix, label_history) + model.after_task(data_manager.nb_tasks) + model.save_checkpoint() + + + +def _compute_AF(matrix): + total_bwt = 0 + N = matrix.shape[0] + for i in range(N - 1): # Iterate through each task except the last one + bwt_i = 0 + for j in range(i + 1, N): # Iterate from task i+1 to N to calculate BWT_i + bwt_i += matrix[j, i] - matrix[i, i] + bwt_i /= N - i - 1 # Normalize by the number of tasks considered for this BWT_i + total_bwt += bwt_i + af = total_bwt / (N - 1) # Calculate the average of all BWT_i + return af + + +def compute_forgetting(model: Prompt2Guard, acc_matrix): + for k in acc_matrix.keys(): + forgetting = _compute_AF(acc_matrix[k]) + logging.info("Avg Forgetting of {}: {:.4f}".format(k, forgetting)) + + +def record_task_accuracy( + current_task, current_task_acc: dict, matrix_dict: dict, label_history: list +): + label_history.append( + "{}-{}".format( + str(current_task * 2).zfill(2), str(current_task * 2 + 1).zfill(2) + ) + ) + for logit_ops in current_task_acc.keys(): + dict_subset = { + k: current_task_acc[logit_ops][k] + for k in label_history + if k in current_task_acc[logit_ops] + } + for idx_label, label_task in enumerate(dict_subset): + matrix_dict[logit_ops][current_task][idx_label] = current_task_acc[ + logit_ops + ][label_task] + + for key, value in current_task_acc.items(): + logging.info(f"Performance Task {current_task} for {key}: {value}") + + +def _set_device(args): + if torch.cuda.is_available(): + device = torch.device("cuda") + else: + device = torch.device("cpu") + logging.info("Device: " + device.type) + args["device"] = device + + +def _set_random(args): + torch.manual_seed(args["torch_seed"]) + torch.cuda.manual_seed(args["torch_seed"]) + torch.cuda.manual_seed_all(args["torch_seed"]) + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + +def print_args(args): + for key, value in args.items(): + logging.info("{}: {}".format(key, value)) diff --git a/detectors/P2G/src/utils/.DS_Store b/detectors/P2G/src/utils/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..9a9fbb6979d17c51e58eafde2a3dbf1e8c99c5c3 Binary files /dev/null and b/detectors/P2G/src/utils/.DS_Store differ diff --git a/detectors/P2G/src/utils/__init__.py b/detectors/P2G/src/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/detectors/P2G/src/utils/__pycache__/__init__.cpython-310.pyc b/detectors/P2G/src/utils/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a34a0a4c97f6e3dba31413b8826c7d09529b773a Binary files /dev/null and b/detectors/P2G/src/utils/__pycache__/__init__.cpython-310.pyc differ diff --git a/detectors/P2G/src/utils/__pycache__/class_names.cpython-310.pyc b/detectors/P2G/src/utils/__pycache__/class_names.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..91ed9e1b41bf0b32649f565eb7d056edd75d4d71 Binary files /dev/null and b/detectors/P2G/src/utils/__pycache__/class_names.cpython-310.pyc differ diff --git a/detectors/P2G/src/utils/__pycache__/data.cpython-310.pyc b/detectors/P2G/src/utils/__pycache__/data.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d41a36df1bdde68857d1ff27bea4ba328b1432cb Binary files /dev/null and b/detectors/P2G/src/utils/__pycache__/data.cpython-310.pyc differ diff --git a/detectors/P2G/src/utils/__pycache__/data_manager.cpython-310.pyc b/detectors/P2G/src/utils/__pycache__/data_manager.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2c9eb18c51b4ce343677beadb33082f93efe92a5 Binary files /dev/null and b/detectors/P2G/src/utils/__pycache__/data_manager.cpython-310.pyc differ diff --git a/detectors/P2G/src/utils/__pycache__/lr_scheduler.cpython-310.pyc b/detectors/P2G/src/utils/__pycache__/lr_scheduler.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..34eafc38d561eb2e5218fb76e08d005e6b90fea7 Binary files /dev/null and b/detectors/P2G/src/utils/__pycache__/lr_scheduler.cpython-310.pyc differ diff --git a/detectors/P2G/src/utils/__pycache__/toolkit.cpython-310.pyc b/detectors/P2G/src/utils/__pycache__/toolkit.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e339a7b885d0270958fd20985f801086c506f11c Binary files /dev/null and b/detectors/P2G/src/utils/__pycache__/toolkit.cpython-310.pyc differ diff --git a/detectors/P2G/src/utils/class_names.py b/detectors/P2G/src/utils/class_names.py new file mode 100644 index 0000000000000000000000000000000000000000..4eaeb4dfe8945f45089f6dc567e2af249604ab24 --- /dev/null +++ b/detectors/P2G/src/utils/class_names.py @@ -0,0 +1,1016 @@ +cddb_classnames = { + 0: "real", + 1: "fake", +} + +facedataset_classnames = { + 0: "young male face", + 1: "young female face", + 2: "middle aged male face", + 3: "middle aged female face", + 4: "old male face", + 5: "old female face", +} + +imagenet1k_classnames = { + 0: "tench", + 1: "goldfish", + 2: "great white shark", + 3: "tiger shark", + 4: "hammerhead shark", + 5: "electric ray", + 6: "stingray", + 7: "cock", + 8: "hen", + 9: "ostrich", + 10: "brambling", + 11: "goldfinch", + 12: "house finch", + 13: "junco", + 14: "indigo bunting", + 15: "American robin", + 16: "bulbul", + 17: "jay", + 18: "magpie", + 19: "chickadee", + 20: "American dipper", + 21: "kite", + 22: "bald eagle", + 23: "vulture", + 24: "great grey owl", + 25: "fire salamander", + 26: "smooth newt", + 27: "newt", + 28: "spotted salamander", + 29: "axolotl", + 30: "American bullfrog", + 31: "tree frog", + 32: "tailed frog", + 33: "loggerhead sea turtle", + 34: "leatherback sea turtle", + 35: "mud turtle", + 36: "terrapin", + 37: "box turtle", + 38: "banded gecko", + 39: "green iguana", + 40: "Carolina anole", + 41: "desert grassland whiptail lizard", + 42: "agama", + 43: "frilled-necked lizard", + 44: "alligator lizard", + 45: "Gila monster", + 46: "European green lizard", + 47: "chameleon", + 48: "Komodo dragon", + 49: "Nile crocodile", + 50: "American alligator", + 51: "triceratops", + 52: "worm snake", + 53: "ring-necked snake", + 54: "eastern hog-nosed snake", + 55: "smooth green snake", + 56: "kingsnake", + 57: "garter snake", + 58: "water snake", + 59: "vine snake", + 60: "night snake", + 61: "boa constrictor", + 62: "African rock python", + 63: "Indian cobra", + 64: "green mamba", + 65: "sea snake", + 66: "Saharan horned viper", + 67: "eastern diamondback rattlesnake", + 68: "sidewinder", + 69: "trilobite", + 70: "harvestman", + 71: "scorpion", + 72: "yellow garden spider", + 73: "barn spider", + 74: "European garden spider", + 75: "southern black widow", + 76: "tarantula", + 77: "wolf spider", + 78: "tick", + 79: "centipede", + 80: "black grouse", + 81: "ptarmigan", + 82: "ruffed grouse", + 83: "prairie grouse", + 84: "peacock", + 85: "quail", + 86: "partridge", + 87: "grey parrot", + 88: "macaw", + 89: "sulphur-crested cockatoo", + 90: "lorikeet", + 91: "coucal", + 92: "bee eater", + 93: "hornbill", + 94: "hummingbird", + 95: "jacamar", + 96: "toucan", + 97: "duck", + 98: "red-breasted merganser", + 99: "goose", + 100: "black swan", + 101: "tusker", + 102: "echidna", + 103: "platypus", + 104: "wallaby", + 105: "koala", + 106: "wombat", + 107: "jellyfish", + 108: "sea anemone", + 109: "brain coral", + 110: "flatworm", + 111: "nematode", + 112: "conch", + 113: "snail", + 114: "slug", + 115: "sea slug", + 116: "chiton", + 117: "chambered nautilus", + 118: "Dungeness crab", + 119: "rock crab", + 120: "fiddler crab", + 121: "red king crab", + 122: "American lobster", + 123: "spiny lobster", + 124: "crayfish", + 125: "hermit crab", + 126: "isopod", + 127: "white stork", + 128: "black stork", + 129: "spoonbill", + 130: "flamingo", + 131: "little blue heron", + 132: "great egret", + 133: "bittern", + 134: "crane (bird)", + 135: "limpkin", + 136: "common gallinule", + 137: "American coot", + 138: "bustard", + 139: "ruddy turnstone", + 140: "dunlin", + 141: "common redshank", + 142: "dowitcher", + 143: "oystercatcher", + 144: "pelican", + 145: "king penguin", + 146: "albatross", + 147: "grey whale", + 148: "killer whale", + 149: "dugong", + 150: "sea lion", + 151: "Chihuahua", + 152: "Japanese Chin", + 153: "Maltese", + 154: "Pekingese", + 155: "Shih Tzu", + 156: "King Charles Spaniel", + 157: "Papillon", + 158: "toy terrier", + 159: "Rhodesian Ridgeback", + 160: "Afghan Hound", + 161: "Basset Hound", + 162: "Beagle", + 163: "Bloodhound", + 164: "Bluetick Coonhound", + 165: "Black and Tan Coonhound", + 166: "Treeing Walker Coonhound", + 167: "English foxhound", + 168: "Redbone Coonhound", + 169: "borzoi", + 170: "Irish Wolfhound", + 171: "Italian Greyhound", + 172: "Whippet", + 173: "Ibizan Hound", + 174: "Norwegian Elkhound", + 175: "Otterhound", + 176: "Saluki", + 177: "Scottish Deerhound", + 178: "Weimaraner", + 179: "Staffordshire Bull Terrier", + 180: "American Staffordshire Terrier", + 181: "Bedlington Terrier", + 182: "Border Terrier", + 183: "Kerry Blue Terrier", + 184: "Irish Terrier", + 185: "Norfolk Terrier", + 186: "Norwich Terrier", + 187: "Yorkshire Terrier", + 188: "Wire Fox Terrier", + 189: "Lakeland Terrier", + 190: "Sealyham Terrier", + 191: "Airedale Terrier", + 192: "Cairn Terrier", + 193: "Australian Terrier", + 194: "Dandie Dinmont Terrier", + 195: "Boston Terrier", + 196: "Miniature Schnauzer", + 197: "Giant Schnauzer", + 198: "Standard Schnauzer", + 199: "Scottish Terrier", + 200: "Tibetan Terrier", + 201: "Australian Silky Terrier", + 202: "Soft-coated Wheaten Terrier", + 203: "West Highland White Terrier", + 204: "Lhasa Apso", + 205: "Flat-Coated Retriever", + 206: "Curly-coated Retriever", + 207: "Golden Retriever", + 208: "Labrador Retriever", + 209: "Chesapeake Bay Retriever", + 210: "German Shorthaired Pointer", + 211: "Vizsla", + 212: "English Setter", + 213: "Irish Setter", + 214: "Gordon Setter", + 215: "Brittany Spaniel", + 216: "Clumber Spaniel", + 217: "English Springer Spaniel", + 218: "Welsh Springer Spaniel", + 219: "Cocker Spaniels", + 220: "Sussex Spaniel", + 221: "Irish Water Spaniel", + 222: "Kuvasz", + 223: "Schipperke", + 224: "Groenendael", + 225: "Malinois", + 226: "Briard", + 227: "Australian Kelpie", + 228: "Komondor", + 229: "Old English Sheepdog", + 230: "Shetland Sheepdog", + 231: "collie", + 232: "Border Collie", + 233: "Bouvier des Flandres", + 234: "Rottweiler", + 235: "German Shepherd Dog", + 236: "Dobermann", + 237: "Miniature Pinscher", + 238: "Greater Swiss Mountain Dog", + 239: "Bernese Mountain Dog", + 240: "Appenzeller Sennenhund", + 241: "Entlebucher Sennenhund", + 242: "Boxer", + 243: "Bullmastiff", + 244: "Tibetan Mastiff", + 245: "French Bulldog", + 246: "Great Dane", + 247: "St. Bernard", + 248: "husky", + 249: "Alaskan Malamute", + 250: "Siberian Husky", + 251: "Dalmatian", + 252: "Affenpinscher", + 253: "Basenji", + 254: "pug", + 255: "Leonberger", + 256: "Newfoundland", + 257: "Pyrenean Mountain Dog", + 258: "Samoyed", + 259: "Pomeranian", + 260: "Chow Chow", + 261: "Keeshond", + 262: "Griffon Bruxellois", + 263: "Pembroke Welsh Corgi", + 264: "Cardigan Welsh Corgi", + 265: "Toy Poodle", + 266: "Miniature Poodle", + 267: "Standard Poodle", + 268: "Mexican hairless dog", + 269: "grey wolf", + 270: "Alaskan tundra wolf", + 271: "red wolf", + 272: "coyote", + 273: "dingo", + 274: "dhole", + 275: "African wild dog", + 276: "hyena", + 277: "red fox", + 278: "kit fox", + 279: "Arctic fox", + 280: "grey fox", + 281: "tabby cat", + 282: "tiger cat", + 283: "Persian cat", + 284: "Siamese cat", + 285: "Egyptian Mau", + 286: "cougar", + 287: "lynx", + 288: "leopard", + 289: "snow leopard", + 290: "jaguar", + 291: "lion", + 292: "tiger", + 293: "cheetah", + 294: "brown bear", + 295: "American black bear", + 296: "polar bear", + 297: "sloth bear", + 298: "mongoose", + 299: "meerkat", + 300: "tiger beetle", + 301: "ladybug", + 302: "ground beetle", + 303: "longhorn beetle", + 304: "leaf beetle", + 305: "dung beetle", + 306: "rhinoceros beetle", + 307: "weevil", + 308: "fly", + 309: "bee", + 310: "ant", + 311: "grasshopper", + 312: "cricket", + 313: "stick insect", + 314: "cockroach", + 315: "mantis", + 316: "cicada", + 317: "leafhopper", + 318: "lacewing", + 319: "dragonfly", + 320: "damselfly", + 321: "red admiral", + 322: "ringlet", + 323: "monarch butterfly", + 324: "small white", + 325: "sulphur butterfly", + 326: "gossamer-winged butterfly", + 327: "starfish", + 328: "sea urchin", + 329: "sea cucumber", + 330: "cottontail rabbit", + 331: "hare", + 332: "Angora rabbit", + 333: "hamster", + 334: "porcupine", + 335: "fox squirrel", + 336: "marmot", + 337: "beaver", + 338: "guinea pig", + 339: "common sorrel", + 340: "zebra", + 341: "pig", + 342: "wild boar", + 343: "warthog", + 344: "hippopotamus", + 345: "ox", + 346: "water buffalo", + 347: "bison", + 348: "ram", + 349: "bighorn sheep", + 350: "Alpine ibex", + 351: "hartebeest", + 352: "impala", + 353: "gazelle", + 354: "dromedary", + 355: "llama", + 356: "weasel", + 357: "mink", + 358: "European polecat", + 359: "black-footed ferret", + 360: "otter", + 361: "skunk", + 362: "badger", + 363: "armadillo", + 364: "three-toed sloth", + 365: "orangutan", + 366: "gorilla", + 367: "chimpanzee", + 368: "gibbon", + 369: "siamang", + 370: "guenon", + 371: "patas monkey", + 372: "baboon", + 373: "macaque", + 374: "langur", + 375: "black-and-white colobus", + 376: "proboscis monkey", + 377: "marmoset", + 378: "white-headed capuchin", + 379: "howler monkey", + 380: "titi", + 381: "Geoffroy's spider monkey", + 382: "common squirrel monkey", + 383: "ring-tailed lemur", + 384: "indri", + 385: "Asian elephant", + 386: "African bush elephant", + 387: "red panda", + 388: "giant panda", + 389: "snoek", + 390: "eel", + 391: "coho salmon", + 392: "rock beauty", + 393: "clownfish", + 394: "sturgeon", + 395: "garfish", + 396: "lionfish", + 397: "pufferfish", + 398: "abacus", + 399: "abaya", + 400: "academic gown", + 401: "accordion", + 402: "acoustic guitar", + 403: "aircraft carrier", + 404: "airliner", + 405: "airship", + 406: "altar", + 407: "ambulance", + 408: "amphibious vehicle", + 409: "analog clock", + 410: "apiary", + 411: "apron", + 412: "waste container", + 413: "assault rifle", + 414: "backpack", + 415: "bakery", + 416: "balance beam", + 417: "balloon", + 418: "ballpoint pen", + 419: "Band-Aid", + 420: "banjo", + 421: "baluster", + 422: "barbell", + 423: "barber chair", + 424: "barbershop", + 425: "barn", + 426: "barometer", + 427: "barrel", + 428: "wheelbarrow", + 429: "baseball", + 430: "basketball", + 431: "bassinet", + 432: "bassoon", + 433: "swimming cap", + 434: "bath towel", + 435: "bathtub", + 436: "station wagon", + 437: "lighthouse", + 438: "beaker", + 439: "military cap", + 440: "beer bottle", + 441: "beer glass", + 442: "bell-cot", + 443: "bib", + 444: "tandem bicycle", + 445: "bikini", + 446: "ring binder", + 447: "binoculars", + 448: "birdhouse", + 449: "boathouse", + 450: "bobsleigh", + 451: "bolo tie", + 452: "poke bonnet", + 453: "bookcase", + 454: "bookstore", + 455: "bottle cap", + 456: "bow", + 457: "bow tie", + 458: "brass", + 459: "bra", + 460: "breakwater", + 461: "breastplate", + 462: "broom", + 463: "bucket", + 464: "buckle", + 465: "bulletproof vest", + 466: "high-speed train", + 467: "butcher shop", + 468: "taxicab", + 469: "cauldron", + 470: "candle", + 471: "cannon", + 472: "canoe", + 473: "can opener", + 474: "cardigan", + 475: "car mirror", + 476: "carousel", + 477: "tool kit", + 478: "carton", + 479: "car wheel", + 480: "automated teller machine", + 481: "cassette", + 482: "cassette player", + 483: "castle", + 484: "catamaran", + 485: "CD player", + 486: "cello", + 487: "mobile phone", + 488: "chain", + 489: "chain-link fence", + 490: "chain mail", + 491: "chainsaw", + 492: "chest", + 493: "chiffonier", + 494: "chime", + 495: "china cabinet", + 496: "Christmas stocking", + 497: "church", + 498: "movie theater", + 499: "cleaver", + 500: "cliff dwelling", + 501: "cloak", + 502: "clogs", + 503: "cocktail shaker", + 504: "coffee mug", + 505: "coffeemaker", + 506: "coil", + 507: "combination lock", + 508: "computer keyboard", + 509: "confectionery store", + 510: "container ship", + 511: "convertible", + 512: "corkscrew", + 513: "cornet", + 514: "cowboy boot", + 515: "cowboy hat", + 516: "cradle", + 517: "crane (machine)", + 518: "crash helmet", + 519: "crate", + 520: "infant bed", + 521: "Crock Pot", + 522: "croquet ball", + 523: "crutch", + 524: "cuirass", + 525: "dam", + 526: "desk", + 527: "desktop computer", + 528: "rotary dial telephone", + 529: "diaper", + 530: "digital clock", + 531: "digital watch", + 532: "dining table", + 533: "dishcloth", + 534: "dishwasher", + 535: "disc brake", + 536: "dock", + 537: "dog sled", + 538: "dome", + 539: "doormat", + 540: "drilling rig", + 541: "drum", + 542: "drumstick", + 543: "dumbbell", + 544: "Dutch oven", + 545: "electric fan", + 546: "electric guitar", + 547: "electric locomotive", + 548: "entertainment center", + 549: "envelope", + 550: "espresso machine", + 551: "face powder", + 552: "feather boa", + 553: "filing cabinet", + 554: "fireboat", + 555: "fire engine", + 556: "fire screen sheet", + 557: "flagpole", + 558: "flute", + 559: "folding chair", + 560: "football helmet", + 561: "forklift", + 562: "fountain", + 563: "fountain pen", + 564: "four-poster bed", + 565: "freight car", + 566: "French horn", + 567: "frying pan", + 568: "fur coat", + 569: "garbage truck", + 570: "gas mask", + 571: "gas pump", + 572: "goblet", + 573: "go-kart", + 574: "golf ball", + 575: "golf cart", + 576: "gondola", + 577: "gong", + 578: "gown", + 579: "grand piano", + 580: "greenhouse", + 581: "grille", + 582: "grocery store", + 583: "guillotine", + 584: "barrette", + 585: "hair spray", + 586: "half-track", + 587: "hammer", + 588: "hamper", + 589: "hair dryer", + 590: "hand-held computer", + 591: "handkerchief", + 592: "hard disk drive", + 593: "harmonica", + 594: "harp", + 595: "harvester", + 596: "hatchet", + 597: "holster", + 598: "home theater", + 599: "honeycomb", + 600: "hook", + 601: "hoop skirt", + 602: "horizontal bar", + 603: "horse-drawn vehicle", + 604: "hourglass", + 605: "iPod", + 606: "clothes iron", + 607: "jack-o'-lantern", + 608: "jeans", + 609: "jeep", + 610: "T-shirt", + 611: "jigsaw puzzle", + 612: "pulled rickshaw", + 613: "joystick", + 614: "kimono", + 615: "knee pad", + 616: "knot", + 617: "lab coat", + 618: "ladle", + 619: "lampshade", + 620: "laptop computer", + 621: "lawn mower", + 622: "lens cap", + 623: "paper knife", + 624: "library", + 625: "lifeboat", + 626: "lighter", + 627: "limousine", + 628: "ocean liner", + 629: "lipstick", + 630: "slip-on shoe", + 631: "lotion", + 632: "speaker", + 633: "loupe", + 634: "sawmill", + 635: "magnetic compass", + 636: "mail bag", + 637: "mailbox", + 638: "tights", + 639: "tank suit", + 640: "manhole cover", + 641: "maraca", + 642: "marimba", + 643: "mask", + 644: "match", + 645: "maypole", + 646: "maze", + 647: "measuring cup", + 648: "medicine chest", + 649: "megalith", + 650: "microphone", + 651: "microwave oven", + 652: "military uniform", + 653: "milk can", + 654: "minibus", + 655: "miniskirt", + 656: "minivan", + 657: "missile", + 658: "mitten", + 659: "mixing bowl", + 660: "mobile home", + 661: "Model T", + 662: "modem", + 663: "monastery", + 664: "monitor", + 665: "moped", + 666: "mortar", + 667: "square academic cap", + 668: "mosque", + 669: "mosquito net", + 670: "scooter", + 671: "mountain bike", + 672: "tent", + 673: "computer mouse", + 674: "mousetrap", + 675: "moving van", + 676: "muzzle", + 677: "nail", + 678: "neck brace", + 679: "necklace", + 680: "nipple", + 681: "notebook computer", + 682: "obelisk", + 683: "oboe", + 684: "ocarina", + 685: "odometer", + 686: "oil filter", + 687: "organ", + 688: "oscilloscope", + 689: "overskirt", + 690: "bullock cart", + 691: "oxygen mask", + 692: "packet", + 693: "paddle", + 694: "paddle wheel", + 695: "padlock", + 696: "paintbrush", + 697: "pajamas", + 698: "palace", + 699: "pan flute", + 700: "paper towel", + 701: "parachute", + 702: "parallel bars", + 703: "park bench", + 704: "parking meter", + 705: "passenger car", + 706: "patio", + 707: "payphone", + 708: "pedestal", + 709: "pencil case", + 710: "pencil sharpener", + 711: "perfume", + 712: "Petri dish", + 713: "photocopier", + 714: "plectrum", + 715: "Pickelhaube", + 716: "picket fence", + 717: "pickup truck", + 718: "pier", + 719: "piggy bank", + 720: "pill bottle", + 721: "pillow", + 722: "ping-pong ball", + 723: "pinwheel", + 724: "pirate ship", + 725: "pitcher", + 726: "hand plane", + 727: "planetarium", + 728: "plastic bag", + 729: "plate rack", + 730: "plow", + 731: "plunger", + 732: "Polaroid camera", + 733: "pole", + 734: "police van", + 735: "poncho", + 736: "billiard table", + 737: "soda bottle", + 738: "pot", + 739: "potter's wheel", + 740: "power drill", + 741: "prayer rug", + 742: "printer", + 743: "prison", + 744: "projectile", + 745: "projector", + 746: "hockey puck", + 747: "punching bag", + 748: "purse", + 749: "quill", + 750: "quilt", + 751: "race car", + 752: "racket", + 753: "radiator", + 754: "radio", + 755: "radio telescope", + 756: "rain barrel", + 757: "recreational vehicle", + 758: "reel", + 759: "reflex camera", + 760: "refrigerator", + 761: "remote control", + 762: "restaurant", + 763: "revolver", + 764: "rifle", + 765: "rocking chair", + 766: "rotisserie", + 767: "eraser", + 768: "rugby ball", + 769: "ruler", + 770: "running shoe", + 771: "safe", + 772: "safety pin", + 773: "salt shaker", + 774: "sandal", + 775: "sarong", + 776: "saxophone", + 777: "scabbard", + 778: "weighing scale", + 779: "school bus", + 780: "schooner", + 781: "scoreboard", + 782: "CRT screen", + 783: "screw", + 784: "screwdriver", + 785: "seat belt", + 786: "sewing machine", + 787: "shield", + 788: "shoe store", + 789: "shoji", + 790: "shopping basket", + 791: "shopping cart", + 792: "shovel", + 793: "shower cap", + 794: "shower curtain", + 795: "ski", + 796: "ski mask", + 797: "sleeping bag", + 798: "slide rule", + 799: "sliding door", + 800: "slot machine", + 801: "snorkel", + 802: "snowmobile", + 803: "snowplow", + 804: "soap dispenser", + 805: "soccer ball", + 806: "sock", + 807: "solar thermal collector", + 808: "sombrero", + 809: "soup bowl", + 810: "space bar", + 811: "space heater", + 812: "space shuttle", + 813: "spatula", + 814: "motorboat", + 815: "spider web", + 816: "spindle", + 817: "sports car", + 818: "spotlight", + 819: "stage", + 820: "steam locomotive", + 821: "through arch bridge", + 822: "steel drum", + 823: "stethoscope", + 824: "scarf", + 825: "stone wall", + 826: "stopwatch", + 827: "stove", + 828: "strainer", + 829: "tram", + 830: "stretcher", + 831: "couch", + 832: "stupa", + 833: "submarine", + 834: "suit", + 835: "sundial", + 836: "sunglass", + 837: "sunglasses", + 838: "sunscreen", + 839: "suspension bridge", + 840: "mop", + 841: "sweatshirt", + 842: "swimsuit", + 843: "swing", + 844: "switch", + 845: "syringe", + 846: "table lamp", + 847: "tank", + 848: "tape player", + 849: "teapot", + 850: "teddy bear", + 851: "television", + 852: "tennis ball", + 853: "thatched roof", + 854: "front curtain", + 855: "thimble", + 856: "threshing machine", + 857: "throne", + 858: "tile roof", + 859: "toaster", + 860: "tobacco shop", + 861: "toilet seat", + 862: "torch", + 863: "totem pole", + 864: "tow truck", + 865: "toy store", + 866: "tractor", + 867: "semi-trailer truck", + 868: "tray", + 869: "trench coat", + 870: "tricycle", + 871: "trimaran", + 872: "tripod", + 873: "triumphal arch", + 874: "trolleybus", + 875: "trombone", + 876: "tub", + 877: "turnstile", + 878: "typewriter keyboard", + 879: "umbrella", + 880: "unicycle", + 881: "upright piano", + 882: "vacuum cleaner", + 883: "vase", + 884: "vault", + 885: "velvet", + 886: "vending machine", + 887: "vestment", + 888: "viaduct", + 889: "violin", + 890: "volleyball", + 891: "waffle iron", + 892: "wall clock", + 893: "wallet", + 894: "wardrobe", + 895: "military aircraft", + 896: "sink", + 897: "washing machine", + 898: "water bottle", + 899: "water jug", + 900: "water tower", + 901: "whiskey jug", + 902: "whistle", + 903: "wig", + 904: "window screen", + 905: "window shade", + 906: "Windsor tie", + 907: "wine bottle", + 908: "wing", + 909: "wok", + 910: "wooden spoon", + 911: "wool", + 912: "split-rail fence", + 913: "shipwreck", + 914: "yawl", + 915: "yurt", + 916: "website", + 917: "comic book", + 918: "crossword", + 919: "traffic sign", + 920: "traffic light", + 921: "dust jacket", + 922: "menu", + 923: "plate", + 924: "guacamole", + 925: "consomme", + 926: "hot pot", + 927: "trifle", + 928: "ice cream", + 929: "ice pop", + 930: "baguette", + 931: "bagel", + 932: "pretzel", + 933: "cheeseburger", + 934: "hot dog", + 935: "mashed potato", + 936: "cabbage", + 937: "broccoli", + 938: "cauliflower", + 939: "zucchini", + 940: "spaghetti squash", + 941: "acorn squash", + 942: "butternut squash", + 943: "cucumber", + 944: "artichoke", + 945: "bell pepper", + 946: "cardoon", + 947: "mushroom", + 948: "Granny Smith", + 949: "strawberry", + 950: "orange", + 951: "lemon", + 952: "fig", + 953: "pineapple", + 954: "banana", + 955: "jackfruit", + 956: "custard apple", + 957: "pomegranate", + 958: "hay", + 959: "carbonara", + 960: "chocolate syrup", + 961: "dough", + 962: "meatloaf", + 963: "pizza", + 964: "pot pie", + 965: "burrito", + 966: "red wine", + 967: "espresso", + 968: "cup", + 969: "eggnog", + 970: "alp", + 971: "bubble", + 972: "cliff", + 973: "coral reef", + 974: "geyser", + 975: "lakeshore", + 976: "promontory", + 977: "shoal", + 978: "seashore", + 979: "valley", + 980: "volcano", + 981: "baseball player", + 982: "bridegroom", + 983: "scuba diver", + 984: "rapeseed", + 985: "daisy", + 986: "yellow lady's slipper", + 987: "corn", + 988: "acorn", + 989: "rose hip", + 990: "horse chestnut seed", + 991: "coral fungus", + 992: "agaric", + 993: "gyromitra", + 994: "stinkhorn mushroom", + 995: "earth star", + 996: "hen-of-the-woods", + 997: "bolete", + 998: "ear of corn", + 999: "toilet paper", +} diff --git a/detectors/P2G/src/utils/classes.pkl b/detectors/P2G/src/utils/classes.pkl new file mode 100644 index 0000000000000000000000000000000000000000..72959b7aca39a0ca3a2774a6ac2e04bbc9046fe7 --- /dev/null +++ b/detectors/P2G/src/utils/classes.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09a1b02a1e4ebae515ae7e3a8ef85cdda8916e1c65741fd026f401723b17daa8 +size 10909199 diff --git a/detectors/P2G/src/utils/data.py b/detectors/P2G/src/utils/data.py new file mode 100644 index 0000000000000000000000000000000000000000..129732480aecde91cac87fb9049a196a289ab504 --- /dev/null +++ b/detectors/P2G/src/utils/data.py @@ -0,0 +1,166 @@ +import os +import numpy as np +from torchvision import datasets, transforms +from utils.toolkit import split_images_labels +import json +import bisect + +class iData(object): + train_trsf = [] + test_trsf = [] + common_trsf = [] + class_order = None + + +class CDDB_benchmark(object): + use_path = True + train_trsf = [ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ColorJitter(brightness=63 / 255), + ] + test_trsf = [ + transforms.Resize(256, interpolation=transforms.InterpolationMode.BICUBIC), + transforms.CenterCrop(224), + ] + common_trsf = [ + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + + def __init__(self, args): + self.args = args + class_order = args["class_order"] + self.class_order = class_order + + def download_data(self): + + train_dataset = [] + test_dataset = [] + for id, name in enumerate(self.args["task_name"]): + root_ = os.path.join(self.args["data_path"], name, "train") + sub_classes = os.listdir(root_) if self.args["multiclass"][id] else [""] + for cls in sub_classes: + for imgname in os.listdir(os.path.join(root_, cls, "0_real")): + train_dataset.append( + (os.path.join(root_, cls, "0_real", imgname), 0 + 2 * id) + ) + for imgname in os.listdir(os.path.join(root_, cls, "1_fake")): + train_dataset.append( + (os.path.join(root_, cls, "1_fake", imgname), 1 + 2 * id) + ) + + for id, name in enumerate(self.args["task_name"]): + root_ = os.path.join(self.args["data_path"], name, "val") + sub_classes = os.listdir(root_) if self.args["multiclass"][id] else [""] + for cls in sub_classes: + for imgname in os.listdir(os.path.join(root_, cls, "0_real")): + test_dataset.append( + (os.path.join(root_, cls, "0_real", imgname), 0 + 2 * id) + ) + for imgname in os.listdir(os.path.join(root_, cls, "1_fake")): + test_dataset.append( + (os.path.join(root_, cls, "1_fake", imgname), 1 + 2 * id) + ) + + self.train_data, self.train_targets = split_images_labels(train_dataset) + self.test_data, self.test_targets = split_images_labels(test_dataset) + + +def parse_dataset(data_keys): + gen_keys = { + 'gan1':['StyleGAN'], + 'gan2':['StyleGAN2'], + 'gan3':['StyleGAN3'], + 'sd15':['StableDiffusion1.5'], + 'sd2':['StableDiffusion2'], + 'sd3':['StableDiffusion3'], + 'sdXL':['StableDiffusionXL'], + 'flux':['FLUX.1'], + 'realFFHQ':['FFHQ'], + 'realFORLAB':['FORLAB'] + } + + gen_keys['all'] = [gen_keys[key][0] for key in gen_keys.keys()] + # gen_keys['gan'] = [gen_keys[key][0] for key in gen_keys.keys() if 'gan' in key] + # gen_keys['sd'] = [gen_keys[key][0] for key in gen_keys.keys() if 'sd' in key] + gen_keys['real'] = [gen_keys[key][0] for key in gen_keys.keys() if 'real' in key] + + mod_keys = { + 'pre': ['PreSocial'], + 'fb': ['Facebook'], + 'tl': ['Telegram'], + 'tw': ['X'], + } + + mod_keys['all'] = [mod_keys[key][0] for key in mod_keys.keys()] + mod_keys['shr'] = [mod_keys[key][0] for key in mod_keys.keys() if key in ['fb', 'tl', 'tw']] + + dataset_list = [] + for data in data_keys.split('&'): + gen, mod = data.split(':') + dataset_list.append({'gen':gen_keys[gen], 'mod':mod_keys[mod]}) + + return dataset_list + +class TrueFake_benchmark(object): + use_path = True + train_trsf = [ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ColorJitter(brightness=63 / 255), + ] + test_trsf = [ + transforms.Resize(256, interpolation=transforms.InterpolationMode.BICUBIC), + transforms.CenterCrop(224), + ] + common_trsf = [ + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + + def __init__(self, args): + self.args = args + class_order = args["class_order"] + self.class_order = class_order + + def _in_list(self, split, elem): + i = bisect.bisect_left(split, elem) + return i != len(split) and split[i] == elem + + def download_data(self): + + with open(self.args["split_file"], "r") as f: + splits = json.load(f) + train_split = sorted(splits["train"]) + val_split = sorted(splits["val"]) + + train_dataset = [] + test_dataset = [] + + + for id, name in enumerate(self.args["task_name"]): + dataset_list = parse_dataset(name) + + for dict in dataset_list: + generators = dict['gen'] + modifiers = dict['mod'] + + for mod in modifiers: + for dataset_root, dataset_dirs, dataset_files in os.walk(os.path.join(self.args["data_path"], mod), topdown=True, followlinks=True): + if len(dataset_dirs): + continue + + (label, gen, sub) = f'{dataset_root}/'.replace(os.path.join(self.args["data_path"], mod) + os.sep, '').split(os.sep)[:3] + + if gen in generators: + for filename in sorted(dataset_files): + if os.path.splitext(filename)[1].lower() in ['.png', '.jpg', '.jpeg']: + if self._in_list(train_split, os.path.join(gen, sub, os.path.splitext(filename)[0])): + train_dataset.append((os.path.join(dataset_root, filename), (1 if label == 'Fake' else 0) + 2 * id)) + + if self._in_list(val_split, os.path.join(gen, sub, os.path.splitext(filename)[0])): + test_dataset.append((os.path.join(dataset_root, filename), (1 if label == 'Fake' else 0) + 2 * id)) + + self.train_data, self.train_targets = split_images_labels(train_dataset) + self.test_data, self.test_targets = split_images_labels(test_dataset) diff --git a/detectors/P2G/src/utils/data_manager.py b/detectors/P2G/src/utils/data_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..0914a685a1479393346a50feb8b65cd7c803ce76 --- /dev/null +++ b/detectors/P2G/src/utils/data_manager.py @@ -0,0 +1,252 @@ +import logging +import numpy as np +from PIL import Image, ImageFile +ImageFile.LOAD_TRUNCATED_IMAGES = True +from torch.utils.data import Dataset +from torchvision import transforms +from utils.data import CDDB_benchmark, TrueFake_benchmark +import pickle +import os + + +class DataManager(object): + def __init__(self, dataset_name, shuffle, seed, init_cls, increment, args): + self.args = args + self.dataset_name = dataset_name + self._setup_data(dataset_name, shuffle, seed) + assert init_cls <= len(self._class_order), "No enough classes." + self._increments = [init_cls] + while sum(self._increments) + increment < len(self._class_order): + self._increments.append(increment) + offset = len(self._class_order) - sum(self._increments) + if offset > 0: + self._increments.append(offset) + + @property + def nb_tasks(self): + return len(self._increments) + + def get_task_size(self, task): + return self._increments[task] + + def get_dataset(self, indices, source, mode, appendent=None, ret_data=False): + if source == "train": + x, y = self._train_data, self._train_targets + elif source == "test": + x, y = self._test_data, self._test_targets + else: + raise ValueError("Unknown data source {}.".format(source)) + + if mode == "train": + trsf = transforms.Compose([*self._train_trsf, *self._common_trsf]) + elif mode == "flip": + trsf = transforms.Compose( + [ + *self._test_trsf, + transforms.RandomHorizontalFlip(p=1.0), + *self._common_trsf, + ] + ) + elif mode == "test": + trsf = transforms.Compose([*self._test_trsf, *self._common_trsf]) + else: + raise ValueError("Unknown mode {}.".format(mode)) + + data, targets = [], [] + for idx in indices: + class_data, class_targets = self._select( + x, y, low_range=idx, high_range=idx + 1 + ) + data.append(class_data) + targets.append(class_targets) + + if appendent is not None and len(appendent) != 0: + appendent_data, appendent_targets = appendent + data.append(appendent_data) + targets.append(appendent_targets) + + data, targets = np.concatenate(data), np.concatenate(targets) + + # if ret_data: + # return data, targets, DummyDataset(data, targets, trsf, self.use_path) + # else: + return DummyDataset( + data, + targets, + trsf, + self._object_classes_data, + self.use_path, + self.args, + ) + + def get_anchor_dataset(self, mode, appendent=None, ret_data=False): + if mode == "train": + trsf = transforms.Compose([*self._train_trsf, *self._common_trsf]) + elif mode == "flip": + trsf = transforms.Compose( + [ + *self._test_trsf, + transforms.RandomHorizontalFlip(p=1.0), + *self._common_trsf, + ] + ) + elif mode == "test": + trsf = transforms.Compose([*self._test_trsf, *self._common_trsf]) + else: + raise ValueError("Unknown mode {}.".format(mode)) + + data, targets = [], [] + if appendent is not None and len(appendent) != 0: + appendent_data, appendent_targets = appendent + data.append(appendent_data) + targets.append(appendent_targets) + + data, targets = np.concatenate(data), np.concatenate(targets) + + if ret_data: + return data, targets, DummyDataset(data, targets, trsf, self.use_path) + else: + return DummyDataset(data, targets, trsf, self.use_path) + + def get_dataset_with_split( + self, indices, source, mode, appendent=None, val_samples_per_class=0 + ): + if source == "train": + x, y = self._train_data, self._train_targets + elif source == "test": + x, y = self._test_data, self._test_targets + else: + raise ValueError("Unknown data source {}.".format(source)) + + if mode == "train": + trsf = transforms.Compose([*self._train_trsf, *self._common_trsf]) + elif mode == "test": + trsf = transforms.Compose([*self._test_trsf, *self._common_trsf]) + else: + raise ValueError("Unknown mode {}.".format(mode)) + + train_data, train_targets = [], [] + val_data, val_targets = [], [] + for idx in indices: + class_data, class_targets = self._select( + x, y, low_range=idx, high_range=idx + 1 + ) + val_indx = np.random.choice( + len(class_data), val_samples_per_class, replace=False + ) + train_indx = list(set(np.arange(len(class_data))) - set(val_indx)) + val_data.append(class_data[val_indx]) + val_targets.append(class_targets[val_indx]) + train_data.append(class_data[train_indx]) + train_targets.append(class_targets[train_indx]) + + if appendent is not None: + appendent_data, appendent_targets = appendent + for idx in range(0, int(np.max(appendent_targets)) + 1): + append_data, append_targets = self._select( + appendent_data, appendent_targets, low_range=idx, high_range=idx + 1 + ) + val_indx = np.random.choice( + len(append_data), val_samples_per_class, replace=False + ) + train_indx = list(set(np.arange(len(append_data))) - set(val_indx)) + val_data.append(append_data[val_indx]) + val_targets.append(append_targets[val_indx]) + train_data.append(append_data[train_indx]) + train_targets.append(append_targets[train_indx]) + + train_data, train_targets = np.concatenate(train_data), np.concatenate( + train_targets + ) + val_data, val_targets = np.concatenate(val_data), np.concatenate(val_targets) + + return DummyDataset( + train_data, train_targets, trsf, self.use_path + ), DummyDataset(val_data, val_targets, trsf, self.use_path) + + def _setup_data(self, dataset_name, shuffle, seed): + idata = _get_idata(dataset_name, self.args) + idata.download_data() + + # Data + self._train_data, self._train_targets = idata.train_data, idata.train_targets + self._test_data, self._test_targets = idata.test_data, idata.test_targets + self.use_path = idata.use_path + with open("./src/utils/classes.pkl", "rb") as f: + self._object_classes_data = pickle.load(f) + + # Transforms + self._train_trsf = idata.train_trsf + self._test_trsf = idata.test_trsf + self._common_trsf = idata.common_trsf + + # Order + order = [i for i in range(len(np.unique(self._train_targets)))] + if shuffle: + np.random.seed(seed) + order = np.random.permutation(len(order)).tolist() + else: + order = idata.class_order + self._class_order = order + logging.info(self._class_order) + + # Map indices + self._train_targets = _map_new_class_index( + self._train_targets, self._class_order + ) + self._test_targets = _map_new_class_index(self._test_targets, self._class_order) + + def _select(self, x, y, low_range, high_range): + idxes = np.where(np.logical_and(y >= low_range, y < high_range))[0] + return x[idxes], y[idxes] + + +class DummyDataset(Dataset): + def __init__(self, images, labels, trsf, classes, use_path=False, args=None): + assert len(images) == len(labels), "Data size error!" + self.images = images + self.labels = labels + self.trsf = trsf + self.use_path = use_path + self.classes = classes + self.dataset_path = args["data_path"] + self.topk_classes = args["topk_classes"] if args["topk_classes"] > 0 else 1 + + def __len__(self): + return len(self.images) + + def __getitem__(self, idx): + img_path = os.path.join(self.dataset_path, self.images[idx]) + if self.use_path: + image = self.trsf(pil_loader(img_path)) + else: + image = self.trsf(Image.fromarray(img_path)) + label = self.labels[idx] + classes = self.classes[img_path.replace(self.dataset_path, "")][: self.topk_classes] + + return classes, image, label + + +def _map_new_class_index(y, order): + return np.array(list(map(lambda x: order.index(x), y))) + + +def _get_idata(dataset_name, args=None): + name = dataset_name.lower() + if name == "cddb": + return CDDB_benchmark(args) + elif name == "truefake": + return TrueFake_benchmark(args) + else: + raise NotImplementedError("Unknown dataset {}.".format(dataset_name)) + + +def pil_loader(path): + """ + Ref: + https://pytorch.org/docs/stable/_modules/torchvision/datasets/folder.html#ImageFolder + """ + # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835) + with open(path, "rb") as f: + img = Image.open(f) + return img.convert("RGB") diff --git a/detectors/P2G/src/utils/lr_scheduler.py b/detectors/P2G/src/utils/lr_scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..5a745e0ac971a9e8533fb9c371feafb99a9770fb --- /dev/null +++ b/detectors/P2G/src/utils/lr_scheduler.py @@ -0,0 +1,145 @@ +### +# Modified by Francesco Laiti - date 23 February 2024 +# Fetched from https://github.com/KaiyangZhou/Dassl.pytorch/blob/master/dassl/optim/lr_scheduler.py +### + +import torch +from torch.optim.lr_scheduler import _LRScheduler + +AVAI_SCHEDS = ["single_step", "multi_step", "cosine"] + + +class _BaseWarmupScheduler(_LRScheduler): + + def __init__( + self, optimizer, successor, warmup_epoch, last_epoch=-1, verbose=False + ): + self.successor = successor + self.warmup_epoch = warmup_epoch + super().__init__(optimizer, last_epoch, verbose) + + def get_lr(self): + raise NotImplementedError + + def step(self, epoch=None): + if self.last_epoch >= self.warmup_epoch: + self.successor.step(epoch) + self._last_lr = self.successor.get_last_lr() + else: + super().step(epoch) + + +class ConstantWarmupScheduler(_BaseWarmupScheduler): + + def __init__( + self, optimizer, successor, warmup_epoch, cons_lr, last_epoch=-1, verbose=False + ): + self.cons_lr = cons_lr + super().__init__(optimizer, successor, warmup_epoch, last_epoch, verbose) + + def get_lr(self): + if self.last_epoch >= self.warmup_epoch: + return self.successor.get_last_lr() + return [self.cons_lr for _ in self.base_lrs] + + +class LinearWarmupScheduler(_BaseWarmupScheduler): + + def __init__( + self, optimizer, successor, warmup_epoch, min_lr, last_epoch=-1, verbose=False + ): + self.min_lr = min_lr + super().__init__(optimizer, successor, warmup_epoch, last_epoch, verbose) + + def get_lr(self): + if self.last_epoch >= self.warmup_epoch: + return self.successor.get_last_lr() + if self.last_epoch == 0: + return [self.min_lr for _ in self.base_lrs] + return [lr * self.last_epoch / self.warmup_epoch for lr in self.base_lrs] + + +def build_lr_scheduler( + optimizer, + lr_scheduler, + max_epoch, + warmup_epoch=0, + warmup_recount=False, + warmup_type=None, + warmup_cons_lr=0.01, + warmup_min_lr=0.001, + stepsize=None, + gamma=None, +): + """ + A function wrapper for building a learning rate scheduler. + + Args: + optimizer (Optimizer): an Optimizer. + lr_scheduler (str): Type of learning rate scheduler. + stepsize (int or list/tuple): Step size for learning rate decay. + gamma (float): Multiplicative factor of learning rate decay. + max_epoch (int): Maximum number of epochs. + warmup_epoch (int, optional): Number of warmup epochs. + warmup_recount (bool, optional): Recount option for warmup. + warmup_type (str, optional): Type of warmup ('constant' or 'linear'). + warmup_cons_lr (float, optional): Learning rate for constant warmup. + warmup_min_lr (float, optional): Minimum learning rate for linear warmup. + """ + + if lr_scheduler not in AVAI_SCHEDS: + raise ValueError( + f"scheduler must be one of {AVAI_SCHEDS}, but got {lr_scheduler}" + ) + + if lr_scheduler == "single_step": + if isinstance(stepsize, (list, tuple)): + stepsize = stepsize[-1] + + if not isinstance(stepsize, int): + raise TypeError( + "For single_step lr_scheduler, stepsize must " + f"be an integer, but got {type(stepsize)}" + ) + + if stepsize <= 0: + stepsize = max_epoch + + scheduler = torch.optim.lr_scheduler.StepLR( + optimizer, step_size=stepsize, gamma=gamma + ) + + elif lr_scheduler == "multi_step": + if not isinstance(stepsize, (list, tuple)): + raise TypeError( + "For multi_step lr_scheduler, stepsize must " + f"be a list, but got {type(stepsize)}" + ) + + scheduler = torch.optim.lr_scheduler.MultiStepLR( + optimizer, milestones=stepsize, gamma=gamma + ) + + elif lr_scheduler == "cosine": + scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( + optimizer=optimizer, T_max=float(max_epoch) + ) + + if warmup_epoch > 0: + if not warmup_recount: + scheduler.last_epoch = warmup_epoch + + if warmup_type == "constant": + scheduler = ConstantWarmupScheduler( + optimizer, scheduler, warmup_epoch, warmup_cons_lr + ) + + elif warmup_type == "linear": + scheduler = LinearWarmupScheduler( + optimizer, scheduler, warmup_epoch, warmup_min_lr + ) + + else: + raise ValueError + + return scheduler diff --git a/detectors/P2G/src/utils/toolkit.py b/detectors/P2G/src/utils/toolkit.py new file mode 100644 index 0000000000000000000000000000000000000000..71bad90bbf19746b6e5b653b23da76e35be3a3c4 --- /dev/null +++ b/detectors/P2G/src/utils/toolkit.py @@ -0,0 +1,162 @@ +import os +import numpy as np +import torch + + +def count_parameters(model, trainable=False): + if trainable: + return sum(p.numel() for p in model.parameters() if p.requires_grad) + return sum(p.numel() for p in model.parameters()) + + +def tensor2numpy(x): + return x.cpu().data.numpy() if x.is_cuda else x.data.numpy() + + +def target2onehot(targets, n_classes): + onehot = torch.zeros(targets.shape[0], n_classes).to(targets.device) + onehot.scatter_(dim=1, index=targets.long().view(-1, 1), value=1.0) + return onehot + + +def makedirs(path): + if not os.path.exists(path): + os.makedirs(path) + + +def accuracy(y_pred, y_true, nb_old, increment=10): + assert len(y_pred) == len(y_true), "Data length error." + all_acc = {} + all_acc["total"] = np.around( + (y_pred == y_true).sum() * 100 / len(y_true), decimals=2 + ) + + # Grouped accuracy + for class_id in range(0, np.max(y_true), increment): + idxes = np.where( + np.logical_and(y_true >= class_id, y_true < class_id + increment) + )[0] + label = "{}-{}".format( + str(class_id).rjust(2, "0"), str(class_id + increment - 1).rjust(2, "0") + ) + all_acc[label] = np.around( + (y_pred[idxes] == y_true[idxes]).sum() * 100 / len(idxes), decimals=2 + ) + + # Old accuracy + idxes = np.where(y_true < nb_old)[0] + all_acc["old"] = ( + 0 + if len(idxes) == 0 + else np.around( + (y_pred[idxes] == y_true[idxes]).sum() * 100 / len(idxes), decimals=2 + ) + ) + + # New accuracy + idxes = np.where(y_true >= nb_old)[0] + all_acc["new"] = np.around( + (y_pred[idxes] == y_true[idxes]).sum() * 100 / len(idxes), decimals=2 + ) + + return all_acc + + +def split_images_labels(imgs): + # split trainset.imgs in ImageFolder + images = [] + labels = [] + for item in imgs: + images.append(item[0]) + labels.append(item[1]) + + return np.array(images), np.array(labels) + + +def accuracy_domain(y_pred, y_true, nb_old, increment=2, class_num=1) -> dict: + assert len(y_pred) == len(y_true), "Data length error." + all_acc = {} + all_acc["total"] = np.around( + (y_pred % class_num == y_true % class_num).sum() * 100 / len(y_true), decimals=2 + ) + + # Grouped accuracy + for class_id in range(0, np.max(y_true), increment): + idxes = np.where( + np.logical_and(y_true >= class_id, y_true < class_id + increment) + )[0] + label = "{}-{}".format( + str(class_id).rjust(2, "0"), str(class_id + increment - 1).rjust(2, "0") + ) + all_acc[label] = np.around( + ((y_pred[idxes] % class_num) == (y_true[idxes] % class_num)).sum() + * 100 + / len(idxes), + decimals=2, + ) + + # Old accuracy + idxes = np.where(y_true < nb_old)[0] + all_acc["old"] = ( + 0 + if len(idxes) == 0 + else np.around( + ((y_pred[idxes] % class_num) == (y_true[idxes] % class_num)).sum() + * 100 + / len(idxes), + decimals=2, + ) + ) + + # New accuracy + idxes = np.where(y_true >= nb_old)[0] + all_acc["new"] = np.around( + ((y_pred[idxes] % class_num) == (y_true[idxes] % class_num)).sum() + * 100 + / len(idxes), + decimals=2, + ) + + return all_acc + + +def accuracy_binary(y_pred, y_true, nb_old, increment=2): + assert len(y_pred) == len(y_true), "Data length error." + all_acc = {} + all_acc["total"] = np.around( + (y_pred % 2 == y_true % 2).sum() * 100 / len(y_true), decimals=2 + ) + + # Grouped accuracy + for class_id in range(0, np.max(y_true), increment): + idxes = np.where( + np.logical_and(y_true >= class_id, y_true < class_id + increment) + )[0] + label = "{}-{}".format( + str(class_id).rjust(2, "0"), str(class_id + increment - 1).rjust(2, "0") + ) + all_acc[label] = np.around( + ((y_pred[idxes] % 2) == (y_true[idxes] % 2)).sum() * 100 / len(idxes), + decimals=2, + ) + + # Old accuracy + idxes = np.where(y_true < nb_old)[0] + # all_acc['old'] = 0 if len(idxes) == 0 else np.around((y_pred[idxes] == y_true[idxes]).sum()*100 / len(idxes),decimals=2) + all_acc["old"] = ( + 0 + if len(idxes) == 0 + else np.around( + ((y_pred[idxes] % 2) == (y_true[idxes] % 2)).sum() * 100 / len(idxes), + decimals=2, + ) + ) + + # New accuracy + idxes = np.where(y_true >= nb_old)[0] + all_acc["new"] = np.around( + ((y_pred[idxes] % 2) == (y_true[idxes] % 2)).sum() * 100 / len(idxes), + decimals=2, + ) + + return all_acc diff --git a/detectors/P2G/src/utils/zeroshot_checker.py b/detectors/P2G/src/utils/zeroshot_checker.py new file mode 100644 index 0000000000000000000000000000000000000000..e487131d48556c23540de4fcdabe38d8db1195e3 --- /dev/null +++ b/detectors/P2G/src/utils/zeroshot_checker.py @@ -0,0 +1,16 @@ +import pickle +import os +import glob + +dataroot = "/media/mmlab/Volume2/TrueFake" + +if __name__ == "__main__": + with open("./classes.pkl", "rb") as f: + results_new = pickle.load(f) + with open("./classes_old.pkl", "rb") as f: + results_old = pickle.load(f) + + for key, value in list(results_new.items())[:10]: + print(key, value) + for key, value in list(results_old.items())[:10]: + print(key, value) diff --git a/detectors/P2G/src/utils/zeroshot_classprediction.py b/detectors/P2G/src/utils/zeroshot_classprediction.py new file mode 100644 index 0000000000000000000000000000000000000000..6ec113f2d0d5bb4bb404ac8cb9258d4df55851b0 --- /dev/null +++ b/detectors/P2G/src/utils/zeroshot_classprediction.py @@ -0,0 +1,279 @@ +import sys +import os +import torch + +from PIL import Image, ImageFile +ImageFile.LOAD_TRUNCATED_IMAGES = True + +from class_names import imagenet1k_classnames, facedataset_classnames +import json +from tqdm import tqdm +import pickle + +sys.path.append("../") +from models.clip import clip + + +def zeroshot_CLIP_batch(model, preprocess, device, text_inputs, class_names, image_paths, topk_indexes=5): + batch = torch.stack([preprocess(Image.open(path)) for path in image_paths]).to(device) + + with torch.no_grad(): + image_features = model.encode_image(batch) + text_features = model.encode_text(text_inputs) + + image_features /= image_features.norm(dim=-1, keepdim=True) + text_features /= text_features.norm(dim=-1, keepdim=True) + similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1) + + results = [] + for i in range(similarity.size(0)): + values, indices = similarity[i].topk(topk_indexes) + outputs = [ + [class_names[index.item()], round(100 * value.item(), 4)] + for value, index in zip(values, indices) + ] + results.append(outputs) + return results + + +def process_images_in_batches( + model, + preprocess, + device, + text_inputs, + dataset_dir, + class_names, + image_paths, + batch_size, + topk_indexes, + class_label=None, +): + results = {} + for i in tqdm( + range(0, len(image_paths), batch_size), + desc=f"Processing batch of size {batch_size}", + ): + batch_paths = image_paths[i : i + batch_size] + batch_results = zeroshot_CLIP_batch( + model, + preprocess, + device, + text_inputs, + class_names, + batch_paths, + topk_indexes, + ) + for path, result in zip(batch_paths, batch_results): + if class_label: + result.append([class_label, -1]) + results[path.replace(dataset_dir, "")] = result + return results + +def prepare_text_inputs(data_type): + if data_type == "CDDB": + dataset_structure = [ + "whichfaceisreal", + "stylegan", + "crn", + "imle", + "cyclegan", + "wild", + "glow", + "deepfake", + "san", + "stargan_gf", + "biggan", + "gaugan", + ] + multiclass = [0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0] + humans_inside = [1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0] + subsets = ["train", "val"] + classes = ["0_real", "1_fake"] + + return dataset_structure, multiclass, humans_inside, subsets, classes + + if data_type == "TrueFake": + dataset_structure = [ + 'Fake/FLUX.1/animals', + 'Fake/FLUX.1/faces', + 'Fake/FLUX.1/general', + 'Fake/FLUX.1/landscapes', + 'Fake/StableDiffusion1.5/animals', + 'Fake/StableDiffusion1.5/faces', + 'Fake/StableDiffusion1.5/general', + 'Fake/StableDiffusion1.5/landscapes', + 'Fake/StableDiffusion2/animals', + 'Fake/StableDiffusion2/faces', + 'Fake/StableDiffusion2/general', + 'Fake/StableDiffusion2/landscapes', + 'Fake/StableDiffusion3/animals', + 'Fake/StableDiffusion3/faces', + 'Fake/StableDiffusion3/general', + 'Fake/StableDiffusion3/landscapes', + 'Fake/StableDiffusionXL/animals', + 'Fake/StableDiffusionXL/faces', + 'Fake/StableDiffusionXL/general', + 'Fake/StableDiffusionXL/landscapes', + 'Fake/StyleGAN/images-psi-0.5', + 'Fake/StyleGAN/images-psi-0.7', + 'Fake/StyleGAN2/conf-f-psi-0.5', + 'Fake/StyleGAN2/conf-f-psi-1', + 'Fake/StyleGAN3/conf-t-psi-0.5', + 'Fake/StyleGAN3/conf-t-psi-0.7', + 'Real/FFHQ', + 'Real/FORLAB' + ] + multiclass = None + humans_inside = [0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0] + classes = ["Fake", "Real"] + subsets = None + + return dataset_structure, multiclass, humans_inside, subsets, classes + else: + raise ValueError(f"{data_type} not valid.") + +# def zeroshot_dataset_batch(dataset_dir, data_type, batch_size=32): +# device = "cuda" if torch.cuda.is_available() else "cpu" +# model, preprocess = clip.load("ViT-B/16", device) + +# dataset_structure, multiclass, humans_inside, subsets, classes = ( +# prepare_text_inputs(data_type) +# ) + +# results = {} +# for index, folder in enumerate(tqdm(dataset_structure, desc="Processing datasets")): +# if humans_inside[index] == 0: +# text_inputs = torch.cat( +# [ +# clip.tokenize(f"a photo of a {c}") +# for c in imagenet1k_classnames.values() +# ] +# ).to(device) +# class_names = imagenet1k_classnames +# else: +# text_inputs = torch.cat( +# [ +# clip.tokenize(f"a photo of a {c}") +# for c in facedataset_classnames.values() +# ] +# ).to(device) +# class_names = facedataset_classnames + +# for subset in subsets: +# subset_path = os.path.join(dataset_dir, folder, subset) +# if multiclass[index] == 1: +# class_labels = os.listdir(subset_path) +# for class_label in class_labels: +# class_path = os.path.join(subset_path, class_label) +# for binary_label in classes: +# image_paths = [ +# os.path.join(class_path, binary_label, img) +# for img in os.listdir( +# os.path.join(class_path, binary_label) +# ) +# ] +# batch_results = process_images_in_batches( +# model, +# preprocess, +# device, +# text_inputs, +# dataset_dir, +# class_names, +# image_paths, +# batch_size, +# 5, +# class_label, +# ) +# results.update(batch_results) +# else: +# for binary_label in classes: +# image_paths = [ +# os.path.join(subset_path, binary_label, img) +# for img in os.listdir(os.path.join(subset_path, binary_label)) +# ] +# batch_results = process_images_in_batches( +# model, +# preprocess, +# device, +# text_inputs, +# dataset_dir, +# class_names, +# image_paths, +# batch_size, +# 5, +# ) +# results.update(batch_results) + +# with open("./DEBUG_classes.json", "w") as f: # only for fast debug +# json.dump(results, f, indent=4) +# with open("./classes.pkl", "wb") as f: +# pickle.dump(results, f, protocol=pickle.HIGHEST_PROTOCOL) + +def zeroshot_dataset_batch(dataset_dir, data_type, batch_size=32): + device = "cuda" if torch.cuda.is_available() else "cpu" + model, preprocess = clip.load("ViT-B/16", device) + + dataset_structure, multiclass, humans_inside, subsets, classes = ( + prepare_text_inputs(data_type) + ) + + with open("./classes.pkl", "rb") as f: + results = pickle.load(f) + + if len(results) == 0: + results = {} + + print(len(results)) + for index, folder in enumerate(tqdm(dataset_structure, desc="Processing datasets")): + print(f"Processing {folder}") + if humans_inside[index] == 0: + text_inputs = torch.cat( + [ + clip.tokenize(f"a photo of a {c}") + for c in imagenet1k_classnames.values() + ] + ).to(device) + class_names = imagenet1k_classnames + else: + text_inputs = torch.cat( + [ + clip.tokenize(f"a photo of a {c}") + for c in facedataset_classnames.values() + ] + ).to(device) + class_names = facedataset_classnames + + image_paths = [os.path.join(dataset_dir, folder, img) for img in os.listdir(os.path.join(dataset_dir, folder))] + + if image_paths[0].replace(dataset_dir, "") in results.keys(): + continue + + batch_results = process_images_in_batches( + model, + preprocess, + device, + text_inputs, + dataset_dir, + class_names, + image_paths, + batch_size, + 5, + ) + results.update(batch_results) + + # with open("./DEBUG_classes.json", "w") as f: # only for fast debug + # json.dump(results, f, indent=4) + with open("./classes_nosocial.pkl", "wb") as f: + pickle.dump(results, f, protocol=pickle.HIGHEST_PROTOCOL) + +def get_JSON_dataset_batch(): + # dataroot = "/home/francesco.laiti/datasets/CDDB/" + # datatype = "CDDB" + dataroot = "/media/mmlab/Volume2/TrueFake/PreSocial/" + datatype = "TrueFake" + batch_size = 2048 + zeroshot_dataset_batch(dataroot, datatype, batch_size) + + +if __name__ == "__main__": + get_JSON_dataset_batch() diff --git a/detectors/P2G/src/utils/zeroshot_classprediction_shared.py b/detectors/P2G/src/utils/zeroshot_classprediction_shared.py new file mode 100644 index 0000000000000000000000000000000000000000..2590d72f77cb8045bff9d763f16bd03ff3fc6b46 --- /dev/null +++ b/detectors/P2G/src/utils/zeroshot_classprediction_shared.py @@ -0,0 +1,73 @@ +import pickle +import os +import glob + +dataroot = "/media/mmlab/Volume2/TrueFake" + +dataset_structure = [ + 'Fake/FLUX.1/animals', + 'Fake/FLUX.1/faces', + 'Fake/FLUX.1/general', + 'Fake/FLUX.1/landscapes', + 'Fake/StableDiffusion1.5/animals', + 'Fake/StableDiffusion1.5/faces', + 'Fake/StableDiffusion1.5/general', + 'Fake/StableDiffusion1.5/landscapes', + 'Fake/StableDiffusion2/animals', + 'Fake/StableDiffusion2/faces', + 'Fake/StableDiffusion2/general', + 'Fake/StableDiffusion2/landscapes', + 'Fake/StableDiffusion3/animals', + 'Fake/StableDiffusion3/faces', + 'Fake/StableDiffusion3/general', + 'Fake/StableDiffusion3/landscapes', + 'Fake/StableDiffusionXL/animals', + 'Fake/StableDiffusionXL/faces', + 'Fake/StableDiffusionXL/general', + 'Fake/StableDiffusionXL/landscapes', + 'Fake/StyleGAN/images-psi-0.5', + 'Fake/StyleGAN/images-psi-0.7', + 'Fake/StyleGAN2/conf-f-psi-0.5', + 'Fake/StyleGAN2/conf-f-psi-1', + 'Fake/StyleGAN3/conf-t-psi-0.5', + 'Fake/StyleGAN3/conf-t-psi-0.7', + 'Real/FFHQ', + 'Real/FORLAB' +] + +if __name__ == "__main__": + with open("./classes_nosocial.pkl", "rb") as f: + results = pickle.load(f) + + results_shared = {} + for social in ['Facebook', 'Telegram', 'Twitter']: + social_root = f'{dataroot}/{social}' + + for dataset in dataset_structure: + print(f'Processing {social}/{dataset}') + current_dir = f'{social_root}/{dataset}' + images = os.listdir(current_dir) + + for image in images: + try: + image_key = f'{dataset}/{os.path.splitext(image)[0]}.png' + results_shared[f'{social}/{dataset}/{image}'] = results[image_key] + except KeyError: + image_key = f'{dataset}/{os.path.splitext(image)[0]}.jpg' + results_shared[f'{social}/{dataset}/{image}'] = results[image_key] + + + results_presocial = {} + for key, value in results.items(): + results_presocial[f'PreSocial/{key}'] = value + + print(len(results)) + print(len(results_presocial)) + print(len(results_shared)) + print(len(results_shared)+len(results_presocial)) + + results_all = {**results_presocial, **results_shared} + print(len(results_all)) + + with open("./classes.pkl", "wb") as f: + pickle.dump(results_all, f, protocol=pickle.HIGHEST_PROTOCOL) diff --git a/detectors/P2G/test.py b/detectors/P2G/test.py new file mode 100644 index 0000000000000000000000000000000000000000..484ffab357cc26e56a7de077789003f33cb488c2 --- /dev/null +++ b/detectors/P2G/test.py @@ -0,0 +1,22 @@ +from parser import get_parser +import subprocess + +if __name__ == "__main__": + parser = get_parser() + settings = parser.parse_args() + print(settings) + + with open ("configs/test_template.json", "r") as f: + training_template = f.read() + training_template = training_template.replace("${DATA_KEYS}", settings.data_keys) + training_template = training_template.replace("${DATA_ROOT}", settings.data_root) + training_template = training_template.replace("${SPLIT_FILE}", settings.split_file) + training_template = training_template.replace("${NAME}", settings.name) + training_template = training_template.replace("${DEVICE}", settings.device) + training_template = training_template.replace("${TASK}", settings.task) + + with open("configs/test.json", "w") as f: + f.write(training_template) + print("Test config file created") + + subprocess.run(f'python -u src/eval.py --config configs/test.json', shell=True) \ No newline at end of file diff --git a/detectors/P2G/tmp_detect/detect_config.json b/detectors/P2G/tmp_detect/detect_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b9637c9124e5b6d1c23b633bdd3eebff04dffb54 --- /dev/null +++ b/detectors/P2G/tmp_detect/detect_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78c56deff5603d8c56a615e0798db0d18dede20aecb42715a6e1fb6839de15f1 +size 1428 diff --git a/detectors/P2G/tmp_detect/split_detect.json b/detectors/P2G/tmp_detect/split_detect.json new file mode 100644 index 0000000000000000000000000000000000000000..011df47e4ad09ccbee68c4956445445fbf76d3d4 --- /dev/null +++ b/detectors/P2G/tmp_detect/split_detect.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d0029babb05deca51291f6f4cebdda11ed12adb29383e0e3f8d457bf6a01db9 +size 34 diff --git a/detectors/P2G/train.py b/detectors/P2G/train.py new file mode 100644 index 0000000000000000000000000000000000000000..612d47262804a860bc1ce642d8611f8d98a1da4b --- /dev/null +++ b/detectors/P2G/train.py @@ -0,0 +1,22 @@ +from parser import get_parser +import subprocess + +if __name__ == "__main__": + parser = get_parser() + settings = parser.parse_args() + print(settings) + + with open ("configs/train_template.json", "r") as f: + training_template = f.read() + training_template = training_template.replace("${DATA_KEYS}", settings.data_keys) + training_template = training_template.replace("${DATA_ROOT}", settings.data_root) + training_template = training_template.replace("${SPLIT_FILE}", settings.split_file) + training_template = training_template.replace("${NAME}", settings.name) + training_template = training_template.replace("${DEVICE}", settings.device) + training_template = training_template.replace("${TASK}", settings.task) + + with open("configs/train.json", "w") as f: + f.write(training_template) + print("Train config file created") + + subprocess.run(f'python -u src/train.py --config configs/train.json', shell=True) diff --git a/detectors/R50_TF/.DS_Store b/detectors/R50_TF/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..7c9b6fd3d55cdde7ce9a4e39ccc6def0a1337fa0 Binary files /dev/null and b/detectors/R50_TF/.DS_Store differ diff --git a/detectors/R50_TF/LICENSE.md b/detectors/R50_TF/LICENSE.md new file mode 100644 index 0000000000000000000000000000000000000000..102013379aa9a5215236b66d418771a239e57470 --- /dev/null +++ b/detectors/R50_TF/LICENSE.md @@ -0,0 +1,395 @@ +Attribution 4.0 International + +======================================================================= + +Creative Commons Corporation ("Creative Commons") is not a law firm and +does not provide legal services or legal advice. Distribution of +Creative Commons public licenses does not create a lawyer-client or +other relationship. Creative Commons makes its licenses and related +information available on an "as-is" basis. Creative Commons gives no +warranties regarding its licenses, any material licensed under their +terms and conditions, or any related information. Creative Commons +disclaims all liability for damages resulting from their use to the +fullest extent possible. + +Using Creative Commons Public Licenses + +Creative Commons public licenses provide a standard set of terms and +conditions that creators and other rights holders may use to share +original works of authorship and other material subject to copyright +and certain other rights specified in the public license below. The +following considerations are for informational purposes only, are not +exhaustive, and do not form part of our licenses. + + Considerations for licensors: Our public licenses are + intended for use by those authorized to give the public + permission to use material in ways otherwise restricted by + copyright and certain other rights. Our licenses are + irrevocable. Licensors should read and understand the terms + and conditions of the license they choose before applying it. + Licensors should also secure all rights necessary before + applying our licenses so that the public can reuse the + material as expected. Licensors should clearly mark any + material not subject to the license. This includes other CC- + licensed material, or material used under an exception or + limitation to copyright. More considerations for licensors: + wiki.creativecommons.org/Considerations_for_licensors + + Considerations for the public: By using one of our public + licenses, a licensor grants the public permission to use the + licensed material under specified terms and conditions. If + the licensor's permission is not necessary for any reason--for + example, because of any applicable exception or limitation to + copyright--then that use is not regulated by the license. Our + licenses grant only permissions under copyright and certain + other rights that a licensor has authority to grant. Use of + the licensed material may still be restricted for other + reasons, including because others have copyright or other + rights in the material. A licensor may make special requests, + such as asking that all changes be marked or described. + Although not required by our licenses, you are encouraged to + respect those requests where reasonable. More considerations + for the public: + wiki.creativecommons.org/Considerations_for_licensees + +======================================================================= + +Creative Commons Attribution 4.0 International Public License + +By exercising the Licensed Rights (defined below), You accept and agree +to be bound by the terms and conditions of this Creative Commons +Attribution 4.0 International Public License ("Public License"). To the +extent this Public License may be interpreted as a contract, You are +granted the Licensed Rights in consideration of Your acceptance of +these terms and conditions, and the Licensor grants You such rights in +consideration of benefits the Licensor receives from making the +Licensed Material available under these terms and conditions. + + +Section 1 -- Definitions. + + a. Adapted Material means material subject to Copyright and Similar + Rights that is derived from or based upon the Licensed Material + and in which the Licensed Material is translated, altered, + arranged, transformed, or otherwise modified in a manner requiring + permission under the Copyright and Similar Rights held by the + Licensor. For purposes of this Public License, where the Licensed + Material is a musical work, performance, or sound recording, + Adapted Material is always produced where the Licensed Material is + synched in timed relation with a moving image. + + b. Adapter's License means the license You apply to Your Copyright + and Similar Rights in Your contributions to Adapted Material in + accordance with the terms and conditions of this Public License. + + c. Copyright and Similar Rights means copyright and/or similar rights + closely related to copyright including, without limitation, + performance, broadcast, sound recording, and Sui Generis Database + Rights, without regard to how the rights are labeled or + categorized. For purposes of this Public License, the rights + specified in Section 2(b)(1)-(2) are not Copyright and Similar + Rights. + + d. Effective Technological Measures means those measures that, in the + absence of proper authority, may not be circumvented under laws + fulfilling obligations under Article 11 of the WIPO Copyright + Treaty adopted on December 20, 1996, and/or similar international + agreements. + + e. Exceptions and Limitations means fair use, fair dealing, and/or + any other exception or limitation to Copyright and Similar Rights + that applies to Your use of the Licensed Material. + + f. Licensed Material means the artistic or literary work, database, + or other material to which the Licensor applied this Public + License. + + g. Licensed Rights means the rights granted to You subject to the + terms and conditions of this Public License, which are limited to + all Copyright and Similar Rights that apply to Your use of the + Licensed Material and that the Licensor has authority to license. + + h. Licensor means the individual(s) or entity(ies) granting rights + under this Public License. + + i. Share means to provide material to the public by any means or + process that requires permission under the Licensed Rights, such + as reproduction, public display, public performance, distribution, + dissemination, communication, or importation, and to make material + available to the public including in ways that members of the + public may access the material from a place and at a time + individually chosen by them. + + j. Sui Generis Database Rights means rights other than copyright + resulting from Directive 96/9/EC of the European Parliament and of + the Council of 11 March 1996 on the legal protection of databases, + as amended and/or succeeded, as well as other essentially + equivalent rights anywhere in the world. + + k. You means the individual or entity exercising the Licensed Rights + under this Public License. Your has a corresponding meaning. + + +Section 2 -- Scope. + + a. License grant. + + 1. Subject to the terms and conditions of this Public License, + the Licensor hereby grants You a worldwide, royalty-free, + non-sublicensable, non-exclusive, irrevocable license to + exercise the Licensed Rights in the Licensed Material to: + + a. reproduce and Share the Licensed Material, in whole or + in part; and + + b. produce, reproduce, and Share Adapted Material. + + 2. Exceptions and Limitations. For the avoidance of doubt, where + Exceptions and Limitations apply to Your use, this Public + License does not apply, and You do not need to comply with + its terms and conditions. + + 3. Term. The term of this Public License is specified in Section + 6(a). + + 4. Media and formats; technical modifications allowed. The + Licensor authorizes You to exercise the Licensed Rights in + all media and formats whether now known or hereafter created, + and to make technical modifications necessary to do so. The + Licensor waives and/or agrees not to assert any right or + authority to forbid You from making technical modifications + necessary to exercise the Licensed Rights, including + technical modifications necessary to circumvent Effective + Technological Measures. For purposes of this Public License, + simply making modifications authorized by this Section 2(a) + (4) never produces Adapted Material. + + 5. Downstream recipients. + + a. Offer from the Licensor -- Licensed Material. Every + recipient of the Licensed Material automatically + receives an offer from the Licensor to exercise the + Licensed Rights under the terms and conditions of this + Public License. + + b. No downstream restrictions. You may not offer or impose + any additional or different terms or conditions on, or + apply any Effective Technological Measures to, the + Licensed Material if doing so restricts exercise of the + Licensed Rights by any recipient of the Licensed + Material. + + 6. No endorsement. Nothing in this Public License constitutes or + may be construed as permission to assert or imply that You + are, or that Your use of the Licensed Material is, connected + with, or sponsored, endorsed, or granted official status by, + the Licensor or others designated to receive attribution as + provided in Section 3(a)(1)(A)(i). + + b. Other rights. + + 1. Moral rights, such as the right of integrity, are not + licensed under this Public License, nor are publicity, + privacy, and/or other similar personality rights; however, to + the extent possible, the Licensor waives and/or agrees not to + assert any such rights held by the Licensor to the limited + extent necessary to allow You to exercise the Licensed + Rights, but not otherwise. + + 2. Patent and trademark rights are not licensed under this + Public License. + + 3. To the extent possible, the Licensor waives any right to + collect royalties from You for the exercise of the Licensed + Rights, whether directly or through a collecting society + under any voluntary or waivable statutory or compulsory + licensing scheme. In all other cases the Licensor expressly + reserves any right to collect such royalties. + + +Section 3 -- License Conditions. + +Your exercise of the Licensed Rights is expressly made subject to the +following conditions. + + a. Attribution. + + 1. If You Share the Licensed Material (including in modified + form), You must: + + a. retain the following if it is supplied by the Licensor + with the Licensed Material: + + i. identification of the creator(s) of the Licensed + Material and any others designated to receive + attribution, in any reasonable manner requested by + the Licensor (including by pseudonym if + designated); + + ii. a copyright notice; + + iii. a notice that refers to this Public License; + + iv. a notice that refers to the disclaimer of + warranties; + + v. a URI or hyperlink to the Licensed Material to the + extent reasonably practicable; + + b. indicate if You modified the Licensed Material and + retain an indication of any previous modifications; and + + c. indicate the Licensed Material is licensed under this + Public License, and include the text of, or the URI or + hyperlink to, this Public License. + + 2. You may satisfy the conditions in Section 3(a)(1) in any + reasonable manner based on the medium, means, and context in + which You Share the Licensed Material. For example, it may be + reasonable to satisfy the conditions by providing a URI or + hyperlink to a resource that includes the required + information. + + 3. If requested by the Licensor, You must remove any of the + information required by Section 3(a)(1)(A) to the extent + reasonably practicable. + + 4. If You Share Adapted Material You produce, the Adapter's + License You apply must not prevent recipients of the Adapted + Material from complying with this Public License. + + +Section 4 -- Sui Generis Database Rights. + +Where the Licensed Rights include Sui Generis Database Rights that +apply to Your use of the Licensed Material: + + a. for the avoidance of doubt, Section 2(a)(1) grants You the right + to extract, reuse, reproduce, and Share all or a substantial + portion of the contents of the database; + + b. if You include all or a substantial portion of the database + contents in a database in which You have Sui Generis Database + Rights, then the database in which You have Sui Generis Database + Rights (but not its individual contents) is Adapted Material; and + + c. You must comply with the conditions in Section 3(a) if You Share + all or a substantial portion of the contents of the database. + +For the avoidance of doubt, this Section 4 supplements and does not +replace Your obligations under this Public License where the Licensed +Rights include other Copyright and Similar Rights. + + +Section 5 -- Disclaimer of Warranties and Limitation of Liability. + + a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE + EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS + AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF + ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, + IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, + WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, + ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT + KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT + ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. + + b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE + TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, + NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, + INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, + COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR + USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN + ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR + DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR + IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. + + c. The disclaimer of warranties and limitation of liability provided + above shall be interpreted in a manner that, to the extent + possible, most closely approximates an absolute disclaimer and + waiver of all liability. + + +Section 6 -- Term and Termination. + + a. This Public License applies for the term of the Copyright and + Similar Rights licensed here. However, if You fail to comply with + this Public License, then Your rights under this Public License + terminate automatically. + + b. Where Your right to use the Licensed Material has terminated under + Section 6(a), it reinstates: + + 1. automatically as of the date the violation is cured, provided + it is cured within 30 days of Your discovery of the + violation; or + + 2. upon express reinstatement by the Licensor. + + For the avoidance of doubt, this Section 6(b) does not affect any + right the Licensor may have to seek remedies for Your violations + of this Public License. + + c. For the avoidance of doubt, the Licensor may also offer the + Licensed Material under separate terms or conditions or stop + distributing the Licensed Material at any time; however, doing so + will not terminate this Public License. + + d. Sections 1, 5, 6, 7, and 8 survive termination of this Public + License. + + +Section 7 -- Other Terms and Conditions. + + a. The Licensor shall not be bound by any additional or different + terms or conditions communicated by You unless expressly agreed. + + b. Any arrangements, understandings, or agreements regarding the + Licensed Material not stated herein are separate from and + independent of the terms and conditions of this Public License. + + +Section 8 -- Interpretation. + + a. For the avoidance of doubt, this Public License does not, and + shall not be interpreted to, reduce, limit, restrict, or impose + conditions on any use of the Licensed Material that could lawfully + be made without permission under this Public License. + + b. To the extent possible, if any provision of this Public License is + deemed unenforceable, it shall be automatically reformed to the + minimum extent necessary to make it enforceable. If the provision + cannot be reformed, it shall be severed from this Public License + without affecting the enforceability of the remaining terms and + conditions. + + c. No term or condition of this Public License will be waived and no + failure to comply consented to unless expressly agreed to by the + Licensor. + + d. Nothing in this Public License constitutes or may be interpreted + as a limitation upon, or waiver of, any privileges and immunities + that apply to the Licensor or You, including from the legal + processes of any jurisdiction or authority. + + +======================================================================= + +Creative Commons is not a party to its public +licenses. Notwithstanding, Creative Commons may elect to apply one of +its public licenses to material it publishes and in those instances +will be considered the “Licensor.” The text of the Creative Commons +public licenses is dedicated to the public domain under the CC0 Public +Domain Dedication. Except for the limited purpose of indicating that +material is shared under a Creative Commons public license or as +otherwise permitted by the Creative Commons policies published at +creativecommons.org/policies, Creative Commons does not authorize the +use of the trademark "Creative Commons" or any other trademark or logo +of Creative Commons without its prior written consent including, +without limitation, in connection with any unauthorized modifications +to any of its public licenses or any other arrangements, +understandings, or agreements concerning use of licensed material. For +the avoidance of doubt, this paragraph does not form part of the +public licenses. + +Creative Commons may be contacted at creativecommons.org. \ No newline at end of file diff --git a/detectors/R50_TF/README.md b/detectors/R50_TF/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e416b2060807ba1cc698ca695f203f9380b50d76 --- /dev/null +++ b/detectors/R50_TF/README.md @@ -0,0 +1,35 @@ +# TrueFake: A Real World Case Dataset of Last Generation Fake Images also Shared on Social Networks + +[![Official Github Repo](https://img.shields.io/badge/Github%20page-222222.svg?style=for-the-badge&logo=github)](https://github.com/MMLab-unitn/TrueFake-IJCNN25) +[![Paper](https://img.shields.io/badge/-arXiv-B31B1B.svg?style=for-the-badge)](https://arxiv.org/pdf/2504.20658) + +Original Paper: +[TrueFake: A Real World Case Dataset of Last Generation Fake Images also Shared on Social Networks](https://arxiv.org/pdf/2504.20658). + +Authors: Stefano Dell'Anna, Andrea Montibeller, Giulia Boato + +## Abstract + +AI-generated synthetic media are increasingly used in real-world scenarios, often with the purpose of spreading misinformation and propaganda through social media platforms, where compression and other processing can degrade fake detection cues. Currently, many forensic tools fail to account for these in-the-wild challenges. In this work, we introduce TrueFake, a large-scale benchmarking dataset of 600,000 images including top notch generative techniques and sharing via three different social networks. This dataset allows for rigorous evaluation of state-of-the-art fake image detectors under very realistic and challenging conditions. Through extensive experimentation, we analyze how social media sharing impacts detection performance, and identify current most effective detection and training strategies. Our findings highlight the need for evaluating forensic models in conditions that mirror real-world use. + +# R50-TF + +The R50-TF network uses a ResNet50 architecture pretrained on ImageNet, modified to exclude downsampling at the first layer. During training, the network's backbone remains frozen, and only the classification head is trained. This classification head implements "learned prototypes" to provide robust real vs. fake image detection and to detect out-of-distribution samples by modeling an isotropic Gaussian class-conditional distribution representative of the input data. + + +# Please Cite + +``` +@misc{dellanna2025truefake, + title={TrueFake: A Real World Case Dataset of + Last Generation Fake Images also Shared on + Social Networks}, + author={Stefano Dell'Anna and Andrea Montibeller + and Giulia Boato}, + year={2025}, + eprint={2504.20658}, + archivePrefix={arXiv}, + primaryClass={cs.MM}, + url={https://arxiv.org/abs/2504.20658}, +} +``` \ No newline at end of file diff --git a/detectors/R50_TF/__pycache__/dataset.cpython-310.pyc b/detectors/R50_TF/__pycache__/dataset.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6c5c95f9e55daf8380f684667d5e09b1f6d1e84d Binary files /dev/null and b/detectors/R50_TF/__pycache__/dataset.cpython-310.pyc differ diff --git a/detectors/R50_TF/__pycache__/networks.cpython-310.pyc b/detectors/R50_TF/__pycache__/networks.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7f489f9eca04368007d0a9ffce269534031ecbd7 Binary files /dev/null and b/detectors/R50_TF/__pycache__/networks.cpython-310.pyc differ diff --git a/detectors/R50_TF/__pycache__/parser.cpython-310.pyc b/detectors/R50_TF/__pycache__/parser.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..191b3de4fdbe5fb1b586c0356a921135589e4fec Binary files /dev/null and b/detectors/R50_TF/__pycache__/parser.cpython-310.pyc differ diff --git a/detectors/R50_TF/checkpoint/pretrained/weights/best.pt b/detectors/R50_TF/checkpoint/pretrained/weights/best.pt new file mode 100644 index 0000000000000000000000000000000000000000..2c5062fbc14ba557e13746ee6d52e68aa98c92cf --- /dev/null +++ b/detectors/R50_TF/checkpoint/pretrained/weights/best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f099ef43d2a1b3d35759cc9cc273861c2cb26d8907aeb75f1f3fc2cb3d1be19 +size 95386140 diff --git a/detectors/R50_TF/dataset.py b/detectors/R50_TF/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..9e29a024de3aa18f724fa3adae202ba685f7db72 --- /dev/null +++ b/detectors/R50_TF/dataset.py @@ -0,0 +1,181 @@ +import os +import torch +from torchvision import datasets +import torchvision.transforms.v2 as Tv2 + +from PIL import Image, ImageFile +ImageFile.LOAD_TRUNCATED_IMAGES = True + +import json +import bisect + +def parse_dataset(settings): + gen_keys = { + 'gan1':['StyleGAN'], + 'gan2':['StyleGAN2'], + 'gan3':['StyleGAN3'], + 'sd15':['StableDiffusion1.5'], + 'sd2':['StableDiffusion2'], + 'sd3':['StableDiffusion3'], + 'sdXL':['StableDiffusionXL'], + 'flux':['FLUX.1'], + 'realFFHQ':['FFHQ'], + 'realFORLAB':['FORLAB'] + } + + gen_keys['all'] = [gen_keys[key][0] for key in gen_keys.keys()] + gen_keys['real'] = [gen_keys[key][0] for key in gen_keys.keys() if 'real' in key] + + mod_keys = { + 'pre': ['PreSocial'], + 'fb': ['Facebook'], + 'tl': ['Telegram'], + 'tw': ['X'], + } + + mod_keys['all'] = [mod_keys[key][0] for key in mod_keys.keys()] + mod_keys['shr'] = [mod_keys[key][0] for key in mod_keys.keys() if key in ['fb', 'tl', 'tw']] + + need_real = (settings.task == 'train' and not len([data.split(':')[0] for data in settings.data_keys.split('&') if 'real' in data.split(':')[0]])) + + assert not need_real, 'Train task without real data, this will not get handeled automatically, terminating' + + dataset_list = [] + for data in settings.data_keys.split('&'): + gen, mod = data.split(':') + dataset_list.append({'gen':gen_keys[gen], 'mod':mod_keys[mod]}) + + return dataset_list + +class TrueFake_dataset(datasets.DatasetFolder): + def __init__(self, settings): + self.data_root = settings.data_root + self.split = settings.split + + with open(settings.split_file, "r") as f: + split_list = sorted(json.load(f)[self.split]) + + dataset_list = parse_dataset(settings) + + self.samples = [] + self.info = [] + for dict in dataset_list: + generators = dict['gen'] + modifiers = dict['mod'] + + for mod in modifiers: + for dataset_root, dataset_dirs, dataset_files in os.walk(os.path.join(self.data_root, mod), topdown=True, followlinks=True): + if len(dataset_dirs): + continue + + (label, gen, sub) = f'{dataset_root}/'.replace(os.path.join(self.data_root, mod) + os.sep, '').split(os.sep)[:3] + + if gen in generators: + for filename in sorted(dataset_files): + if os.path.splitext(filename)[1].lower() in ['.png', '.jpg', '.jpeg']: + if self._in_list(split_list, os.path.join(gen, sub, os.path.splitext(filename)[0])): + self.samples.append(os.path.join(dataset_root, filename)) + self.info.append((mod, label, gen, sub)) + + self.transform_start = Tv2.Compose( + [ + Tv2.ToImage() + ] + ) + + self.transform_end = Tv2.Compose( + [ + Tv2.CenterCrop(1024) if self.split == 'test' and 'realFORLAB:pre' in settings.data_keys else Tv2.Identity(), + Tv2.CenterCrop(720) if self.split == 'test' and 'realFORLAB:fb' in settings.data_keys else Tv2.Identity(), + Tv2.CenterCrop(1200) if self.split == 'test' and 'realFORLAB:tw' in settings.data_keys else Tv2.Identity(), + Tv2.CenterCrop(800) if self.split == 'test' and 'realFORLAB:tl' in settings.data_keys else Tv2.Identity(), + Tv2.ToDtype(torch.float32, scale=True), + Tv2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ) + + if self.split in ['train', 'val']: + self.transform_aug = { + 'light': Tv2.Compose( + [ + Tv2.RandomChoice([Tv2.RandomResizedCrop([300], (0.5, 1.5), (0.5, 2)), Tv2.RandomCrop([300])], p=[0.3, 0.7]), + Tv2.Compose([Tv2.RandomHorizontalFlip(p=0.5), Tv2.RandomVerticalFlip(p=0.5)]), + Tv2.RandomCrop(96, pad_if_needed=True) if self.split == 'train' else Tv2.Identity(), + ] + ), + 'heavy': Tv2.Compose( + [ + Tv2.RandomChoice([Tv2.RandomResizedCrop([300], (0.5, 1.5), (0.5, 2)), Tv2.RandomCrop([300])], p=[0.3, 0.7]), + + Tv2.RandomApply([Tv2.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1)], p=0.3), + Tv2.RandomApply([Tv2.GaussianBlur(kernel_size=11, sigma=(0.1,3))], p=0.3), + Tv2.RandomApply([Tv2.JPEG((65, 95))], p=0.3), + + Tv2.Compose([Tv2.RandomHorizontalFlip(p=0.5), Tv2.RandomVerticalFlip(p=0.5)]), + + Tv2.RandomCrop(96, pad_if_needed=True) if self.split == 'train' else Tv2.Identity(), + ] + ) + } + + else: + self.transform_aug = None + + print() + print(f'Transforms for {self.split}:') + print(self.transform_start) + if self.transform_aug: + print(self.transform_aug['light']) + print(self.transform_aug['heavy']) + print(self.transform_end) + + print(f'Loaded {len(self.samples)} samples for {self.split}') + + def _in_list(self, split, elem): + i = bisect.bisect_left(split, elem) + return i != len(split) and split[i] == elem + + def __len__(self): + return len(self.samples) + + def __getitem__(self, index): + path = self.samples[index] + mod, label, gen, sub = self.info[index] + + image = Image.open(path).convert('RGB') + sample = self.transform_start(image) + if self.transform_aug: + sample = self.transform_aug['heavy' if mod == 'PreSocial' else 'light'](sample) + sample = self.transform_end(sample) + + target = 1.0 if label == 'Fake' else 0.0 + + return sample, target, path + +def create_dataloader(settings, split=None): + if split == "train": + settings.split = 'train' + is_train=True + + elif split == "val": + settings.split = 'val' + is_train=False + + elif split == "test": + settings.split = 'test' + settings.batch_size = settings.batch_size//8 + is_train=False + + else: + raise ValueError(f"Unknown split {split}") + + dataset = TrueFake_dataset(settings) + + data_loader = torch.utils.data.DataLoader( + dataset, + batch_size=settings.batch_size, + num_workers=int(settings.num_threads), + shuffle = is_train, + collate_fn=None, + ) + return data_loader diff --git a/detectors/R50_TF/detect.py b/detectors/R50_TF/detect.py new file mode 100644 index 0000000000000000000000000000000000000000..85867951162d346276abffd2fd457cafb6460c3f --- /dev/null +++ b/detectors/R50_TF/detect.py @@ -0,0 +1,253 @@ +# ---------------------------------------------------------------------------- +# IMPORTS +# ---------------------------------------------------------------------------- +import os +import argparse +import json +import time +import yaml +import torch +from PIL import Image +import torchvision.transforms.v2 as Tv2 + +from networks import ImageClassifier +import sys +project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +sys.path.append(project_root) +from support.detect_utils import format_result, save_result, get_device + + +# ---------------------------------------------------------------------------- +# IMAGE PREPROCESSING +# ---------------------------------------------------------------------------- +def preprocess_image(image_path): + """ + Load and preprocess a single image for model input. + Uses the same normalization as test.py (ImageNet stats). + """ + # Load image + image = Image.open(image_path).convert('RGB') + + # Apply transforms (same as test split without augmentation) + transform = Tv2.Compose([ + Tv2.ToImage(), + Tv2.ToDtype(torch.float32, scale=True), + Tv2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ]) + + # Apply transforms and add batch dimension + tensor = transform(image) + tensor = tensor.unsqueeze(0) # Add batch dimension + + return tensor + + +# ---------------------------------------------------------------------------- +# CONFIG LOADING AND PARSING +# ---------------------------------------------------------------------------- +def load_config(config_path): + """Load configuration from YAML file.""" + with open(config_path, 'r') as f: + config = yaml.safe_load(f) + return config + + +def parse_detector_args(detector_args, default_num_centers=1): + """ + Parse detector_args list (e.g., ["--arch", "nodown", "--prototype", "--freeze"]) + into a settings object. + """ + class Settings: + def __init__(self): + self.arch = "nodown" + self.freeze = False + self.prototype = False + self.num_centers = default_num_centers + + settings = Settings() + + i = 0 + while i < len(detector_args): + arg = detector_args[i] + + if arg == "--arch": + if i + 1 < len(detector_args): + settings.arch = detector_args[i + 1] + i += 2 + else: + i += 1 + elif arg == "--freeze": + settings.freeze = True + i += 1 + elif arg == "--prototype": + settings.prototype = True + i += 1 + elif arg == "--num_centers": + if i + 1 < len(detector_args): + settings.num_centers = int(detector_args[i + 1]) + i += 2 + else: + i += 1 + else: + i += 1 + + return settings + + +def resolve_config_path(config_path): + """ + Resolve config path. If relative, resolve relative to project root + (two levels up from detect.py location). + """ + if os.path.isabs(config_path): + return config_path + + # Get directory of detect.py (detectors/R50_TF/) + detect_dir = os.path.dirname(os.path.abspath(__file__)) + # Go two levels up to project root + project_root = os.path.dirname(os.path.dirname(detect_dir)) + # Join with config path + return os.path.join(project_root, config_path) + + +# ---------------------------------------------------------------------------- +# INFERENCE +# ---------------------------------------------------------------------------- +def run_inference(model, image_path, device): + """ + Run inference on a single image. + Returns: (probability, label, runtime_ms) + """ + start_time = time.time() + + # Preprocess image + image_tensor = preprocess_image(image_path) + image_tensor = image_tensor.to(device) + + # Run inference + model.eval() + with torch.no_grad(): + raw_score_tensor = model(image_tensor).squeeze(1) # shape [1] + + # Convert to probability using sigmoid + probability = torch.sigmoid(raw_score_tensor).item() + + # Determine label (fake if probability > 0.5, else real) + label = "fake" if probability > 0.5 else "real" + + # Calculate runtime in milliseconds + runtime_ms = int((time.time() - start_time) * 1000) + + return probability, label, runtime_ms + + +# ---------------------------------------------------------------------------- +# MAIN +# ---------------------------------------------------------------------------- +def main(): + parser = argparse.ArgumentParser(description='Single image inference for R50_TF detector') + parser.add_argument('--input', type=str, required=False, help='Path to input image (alias: --image)') + parser.add_argument('--image', type=str, required=False, help='Path to input image (alias for --input)') + parser.add_argument('--output', type=str, default='/tmp/result.json', help='Path to output JSON file') + parser.add_argument('--checkpoint', type=str, required=False, help='Path to model checkpoint file') + parser.add_argument('--model', type=str, required=False, help='Model name or checkpoint directory (alias for --checkpoint)') + parser.add_argument('--config', type=str, default='configs/R50_TF.yaml', help='Path to YAML config file') + parser.add_argument('--device', type=str, default=None, help='Device to use (cuda:0, cpu, etc.)') + + args = parser.parse_args() + + # Normalize image argument: prefer --image over --input if provided + if args.image: + args.input = args.image + + checkpoint_path = None + if args.checkpoint: + checkpoint_path = args.checkpoint + elif getattr(args, 'model', None): + detect_dir = os.path.dirname(os.path.abspath(__file__)) + candidate = os.path.join(detect_dir, 'checkpoint', args.model, 'weights', 'best.pt') + if os.path.exists(candidate): + checkpoint_path = candidate + else: + # If model refers directly to a file path, accept it + if os.path.isabs(args.model) and os.path.exists(args.model): + checkpoint_path = args.model + else: + # Try resolving relative to project root + project_root = os.path.dirname(os.path.dirname(detect_dir)) + candidate2 = os.path.join(project_root, args.model) + if os.path.exists(candidate2): + checkpoint_path = candidate2 + + # If still not found, keep existing behavior (will raise later) + if checkpoint_path: + args.checkpoint = checkpoint_path + + # Resolve config path + config_path = resolve_config_path(args.config) + if not os.path.exists(config_path): + raise FileNotFoundError(f"Configuration file not found: {config_path}") + + # Load config + config = load_config(config_path) + + # Parse detector_args from config + detector_args = config.get('detector_args', []) + settings = parse_detector_args(detector_args) + + # Get device from config if available, else use argument + # Prioritize argument if explicitly provided (we assume if it's not default, or if we trust the caller) + # Since we want to support --device cpu override, we should prioritize args.device + device_str = args.device + + # Only check config if args.device wasn't explicitly passed (but here it has a default) + # Let's assume if the user passed --device, they want that. + # But args.device has a default 'cuda:0'. + # We should change the default to None to distinguish. + + if args.device is None: + if config.get('global', {}).get('device_override'): + device_override = config['global']['device_override'] + if device_override and device_override != "null" and device_override != "": + device_str = device_override + else: + device_str = 'cuda:0' + else: + device_str = args.device + + # Determine device + if device_str.startswith('cuda') and not torch.cuda.is_available(): + print(f"Warning: CUDA requested but not available. Using CPU.") + device = torch.device('cpu') + else: + device = torch.device(device_str if torch.cuda.is_available() else 'cpu') + + # Load model + print(f"Loading model from {args.checkpoint}") + model = ImageClassifier(settings) + model.load_state_dict(torch.load(args.checkpoint, map_location=device)) + model.to(device) + model.eval() + + # Run inference + print(f"Running inference on {args.input}") + probability, label, runtime_ms = run_inference(model, args.input, device) + + # Format result to match other detectors (prediction/confidence/elapsed_time) + elapsed_time = runtime_ms / 1000.0 + formatted = format_result(label, float(round(probability, 4)), elapsed_time) + + # Save using shared utility (if output path is provided) + if args.output: + save_result(formatted, args.output) + print(f"Results saved to {args.output}") + + # Print concise output for user + print(f"Prediction: {formatted['prediction']}") + print(f"Confidence: {formatted['confidence']:.4f}") + print(f"Time: {formatted['elapsed_time']:.3f}s") + + +if __name__ == '__main__': + main() + diff --git a/detectors/R50_TF/networks.py b/detectors/R50_TF/networks.py new file mode 100644 index 0000000000000000000000000000000000000000..4ec78726ae461aadd53bc612df3fd98f0f7ffed5 --- /dev/null +++ b/detectors/R50_TF/networks.py @@ -0,0 +1,80 @@ +import torch +import torch.nn as nn +from torchvision import models + +class ScoresLayer(nn.Module): + def __init__(self, input_dim, num_centers): + super().__init__() + self.input_dim = input_dim + self.num_centers = num_centers + self.centers = nn.Parameter(torch.zeros(num_centers, input_dim), requires_grad=True) + self.logsigmas = nn.Parameter(torch.zeros(num_centers), requires_grad=True) + + def forward(self, x): + batch_size = x.size(0) + out = x.view(batch_size, self.input_dim, 1, 1) # [batch, C, 1, 1] + + centers = self.centers[None, :, :, None, None] # [1, K, C, 1, 1] + diff = out.unsqueeze(1) - centers # [batch, K, C, 1, 1] + + sum_diff = torch.sum(diff, dim=2) # [batch, K, 1, 1] + sign = torch.sign(sum_diff) + + squared_diff = torch.sum(diff ** 2, dim=2) # [batch, K, 1, 1] + + logsigmas = nn.functional.relu(self.logsigmas) + denominator = 2 * torch.exp(2 * logsigmas) + part1 = (sign * squared_diff) / denominator.view(1, -1, 1, 1) + + part2 = self.input_dim * logsigmas + part2 = part2.view(1, -1, 1, 1) + + scores = part1 + part2 + output = scores.sum(dim=(1, 2, 3)).view(-1, 1) # [batch, 1] + + return output + +class ImageClassifier(nn.Module): + def __init__(self, settings): + super().__init__() + if settings.arch == 'baseline': + self.backbone = models.resnet50(weights=None) + self.backbone.fc = nn.Linear(self.backbone.fc.in_features, 1) + + elif settings.arch == 'nodown': + self.backbone = models.resnet50(weights=None) + + # Replace first conv layer to avoid downsampling + new_conv = nn.Conv2d(3, 64, kernel_size=7, stride=1, padding=3, bias=False) + new_conv.weight = nn.Parameter(self.backbone.conv1.weight) + self.backbone.conv1 = new_conv + self.backbone.fc = nn.Sequential(nn.Linear(self.backbone.fc.in_features, 128), nn.Dropout(0.5)) + + + else: + raise NotImplementedError('Model not recognized') + + if settings.freeze: + for param in self.backbone.parameters(): + param.requires_grad = False + for param in self.backbone.fc.parameters(): + param.requires_grad = True + else: + for param in self.backbone.parameters(): + param.requires_grad = True + + self.prototype = settings.prototype + + if self.prototype: + self.proto = ScoresLayer(input_dim=self.backbone.fc[0].out_features, num_centers=settings.num_centers) + for param in self.proto.parameters(): + param.requires_grad = True + + def forward(self, x): + x = self.backbone(x) + + if self.prototype: + x = self.proto(x) + + return x + \ No newline at end of file diff --git a/detectors/R50_TF/parser.py b/detectors/R50_TF/parser.py new file mode 100644 index 0000000000000000000000000000000000000000..a145ca303176db45a2015f0ffb75d0fba700ac37 --- /dev/null +++ b/detectors/R50_TF/parser.py @@ -0,0 +1,26 @@ +import argparse + +def get_parser(): + parser = argparse.ArgumentParser() + parser.add_argument("--name", type=str, default="test", help="run name") + + parser.add_argument("--task", type=str, help="Task: train/test") + parser.add_argument("--device", type=str, default="cuda:0", help="cuda device to use") + parser.add_argument("--arch", type=str, default="nodown", help="architecture name") + parser.add_argument("--freeze", action='store_true', help="Freeze all layers except the last one") + parser.add_argument("--prototype", action='store_true', help="Use prototypes") + parser.add_argument("--num_centers",type=int, default=1, help="Number of prototypes centers") + + parser.add_argument("--num_epoches", type=int, default=100, help="# of epoches at starting learning rate") + parser.add_argument("--lr", type=float, default=0.0001, help="initial learning rate") + parser.add_argument("--lr_decay_epochs",type=int, default=5, help="Number of epochs without loss reduction before lowering the learning rate by 10x") + parser.add_argument("--lr_min",type=float, default=1e-7, help="Number of epochs without loss reduction before lowering the learning rate by 10x") + + parser.add_argument("--split_file", type=str, help="Path to split json") + parser.add_argument("--data_root", type=str, help="Path to dataset") + parser.add_argument("--data_keys", type=str, help="Dataset specifications") + + parser.add_argument("--batch_size", type=int, default=64, help='Dataloader batch size') + parser.add_argument("--num_threads", type=int, default=14, help='# threads for loading data') + + return parser \ No newline at end of file diff --git a/detectors/R50_TF/test.py b/detectors/R50_TF/test.py new file mode 100644 index 0000000000000000000000000000000000000000..63f597ea94ba2cc28c02adcb849f477b99b5a077 --- /dev/null +++ b/detectors/R50_TF/test.py @@ -0,0 +1,162 @@ +# ---------------------------------------------------------------------------- +# IMPORTS +# ---------------------------------------------------------------------------- +import os +import torch +import pandas as pd +from tqdm import tqdm +import json +import time +import numpy as np +from sklearn.metrics import roc_auc_score, accuracy_score + +from networks import ImageClassifier +from parser import get_parser +from dataset import create_dataloader + +def test(loader, model, settings, device): + model.eval() + + start_time = time.time() + + # File paths + output_dir = f'./results/{settings.name}/{settings.data_keys}/data/' + os.makedirs(output_dir, exist_ok=True) + + csv_filename = os.path.join(output_dir, 'results.csv') + metrics_filename = os.path.join(output_dir, 'metrics.json') + image_results_filename = os.path.join(output_dir, 'image_results.json') + + # Collect all results + all_scores = [] + all_labels = [] + all_paths = [] + image_results = [] + + # Parse dataset keys from settings.data_keys (format: "key1&key2&..." or single "key") + dataset_keys = settings.data_keys.split('&') if '&' in settings.data_keys else [settings.data_keys] + + # Extract training dataset keys from model name (format: "training_keys_freeze_down" or "training_keys") + # The model name typically contains the training dataset keys used for training + training_dataset_keys = [] + model_name = settings.name + # Remove common suffixes like "_freeze_down" + if '_freeze_down' in model_name: + training_name = model_name.replace('_freeze_down', '') + else: + training_name = model_name + # Split by & to get individual training dataset keys + if '&' in training_name: + training_dataset_keys = training_name.split('&') + else: + training_dataset_keys = [training_name] + + # Write CSV header + with open(csv_filename, 'w') as f: + f.write(f"{','.join(['name', 'pro', 'flag'])}\n") + + with torch.no_grad(): + with tqdm(loader, unit='batch', mininterval=0.5) as tbatch: + tbatch.set_description(f'Validation') + for (data, labels, paths) in tbatch: + data = data.to(device) + labels = labels.to(device) + + scores = model(data).squeeze(1) + + # Collect results + for score, label, path in zip(scores, labels, paths): + score_val = score.item() + label_val = label.item() + + all_scores.append(score_val) + all_labels.append(label_val) + all_paths.append(path) + + image_results.append({ + 'path': path, + 'score': score_val, + 'label': label_val + }) + + # Write to CSV (maintain backward compatibility) + with open(csv_filename, 'a') as f: + for score, label, path in zip(scores, labels, paths): + f.write(f"{path}, {score.item()}, {label.item()}\n") + + # Calculate metrics + all_scores = np.array(all_scores) + all_labels = np.array(all_labels) + + # Convert scores to predictions (threshold at 0, as used in train.py) + predictions = (all_scores > 0).astype(int) + + # Calculate overall metrics + total_accuracy = accuracy_score(all_labels, predictions) + + # TPR (True Positive Rate) = TP / (TP + FN) = accuracy on fake images (label==1) + fake_mask = all_labels == 1 + if fake_mask.sum() > 0: + tpr = accuracy_score(all_labels[fake_mask], predictions[fake_mask]) + else: + tpr = 0.0 + + # Calculate TNR on real images (label==0) in the test set + real_mask = all_labels == 0 + if real_mask.sum() > 0: + # Overall TNR calculated on all real images in the test set + tnr = accuracy_score(all_labels[real_mask], predictions[real_mask]) + else: + tnr = 0.0 + + # AUC calculation (needs probabilities, so we'll use sigmoid on scores) + if len(np.unique(all_labels)) > 1: + # Apply sigmoid to convert scores to probabilities + probabilities = torch.sigmoid(torch.tensor(all_scores)).numpy() + auc = roc_auc_score(all_labels, probabilities) + else: + auc = 0.0 + + execution_time = time.time() - start_time + + # Prepare metrics JSON + metrics = { + 'TPR': float(tpr), + 'TNR': float(tnr), + 'Acc total': float(total_accuracy), + 'AUC': float(auc), + 'execution time': float(execution_time) + } + + # Write metrics JSON + with open(metrics_filename, 'w') as f: + json.dump(metrics, f, indent=2) + + # Write individual image results JSON + with open(image_results_filename, 'w') as f: + json.dump(image_results, f, indent=2) + + print(f'\nMetrics saved to {metrics_filename}') + print(f'Image results saved to {image_results_filename}') + print(f'\nMetrics:') + print(f' TPR: {tpr:.4f}') + print(f' TNR: {tnr:.4f}') + print(f' Accuracy: {total_accuracy:.4f}') + print(f' AUC: {auc:.4f}') + print(f' Execution time: {execution_time:.2f} seconds') + +if __name__ == "__main__": + parser = get_parser() + settings = parser.parse_args() + + device = torch.device(settings.device if torch.cuda.is_available() else 'cpu') + + test_dataloader = create_dataloader(settings, split='test') + + model = ImageClassifier(settings) + model.to(device) + path_weight = f'./checkpoint/{settings.name}/weights/best.pt' + state_dict = torch.load(path_weight) + model.load_state_dict(state_dict) + + test(test_dataloader, model, settings, device) \ No newline at end of file diff --git a/detectors/R50_TF/train.py b/detectors/R50_TF/train.py new file mode 100644 index 0000000000000000000000000000000000000000..da404531cfb283b26e35efcc9b78b0d8e6b47754 --- /dev/null +++ b/detectors/R50_TF/train.py @@ -0,0 +1,106 @@ +# ---------------------------------------------------------------------------- +# IMPORTS +# ---------------------------------------------------------------------------- +import os +os.environ['CUDA_LAUNCH_BLOCKING'] = '1' +import glob +import torch +import shutil +from tqdm import tqdm +import torch.nn as nn +import torch.optim as optim + +from networks import ImageClassifier +from parser import get_parser +from dataset import create_dataloader +from sklearn.metrics import balanced_accuracy_score + +def check_accuracy(val_dataloader, model, settings): + model.eval() + + label_array = torch.empty(0, dtype=torch.int64, device=device) + pred_array = torch.empty(0, dtype=torch.int64, device=device) + + with torch.no_grad(): + with tqdm(val_dataloader, unit='batch', mininterval=0.5) as tbatch: + tbatch.set_description(f'Validation') + for (data, label, _) in tbatch: + data = data.to(device) + label = label.to(device) + + pred = model(data).squeeze(1) + + label_array = torch.cat((label_array, label)) + pred_array = torch.cat((pred_array, pred)) + + accuracy = balanced_accuracy_score(label_array.cpu().numpy(), pred_array.cpu().numpy() > 0) + + print(f'Got accuracy {accuracy:.2f} \n') + return accuracy + + +def train(train_dataloader, val_dataloader, model, settings): + best_accuracy = 0 + lr_decay_counter = 0 + for epoch in range(0, settings.num_epoches): + model.train() + with tqdm(train_dataloader, unit='batch', mininterval=0.5) as tepoch: + tepoch.set_description(f'Epoch {epoch}', refresh=False) + if epoch > 0: + for batch_idx, (data, label, _) in enumerate(tepoch): + data = data.to(device) + label = label.to(device).float() + + scores = model(data).squeeze(1) + + loss = criterion(scores, label).mean() + + optimizer.zero_grad() + loss.backward() + optimizer.step() + + tepoch.set_postfix(loss=loss.item()) + + accuracy = check_accuracy(val_dataloader, model, settings) + + if accuracy > best_accuracy: + best_accuracy = accuracy + torch.save(model.state_dict(), f'./checkpoint/{settings.name}/weights/best.pt') + + print(f'New best model saved with accuracy {best_accuracy:.4f} \n') + lr_decay_counter = 0 + + elif settings.lr_decay_epochs > 0: + lr_decay_counter += 1 + if lr_decay_counter == settings.lr_decay_epochs: + if optimizer.param_groups[0]['lr'] > settings.lr_min: + for param_group in optimizer.param_groups: + param_group['lr'] *= 0.1 + print('Learning rate decayed \n') + lr_decay_counter = 0 + else: + print('Learning rate already at minimum \n') + break + +if __name__ == "__main__": + parser = get_parser() + settings = parser.parse_args() + print(settings) + + device = torch.device(settings.device if torch.cuda.is_available() else 'cpu') + + model = ImageClassifier(settings) + model.to(device) + os.makedirs(f'./checkpoint/{settings.name}/weights/', exist_ok=True) + + with open(f'./checkpoint/settings.txt', 'w') as f: + f.write(str(settings)) + + train_dataloader = create_dataloader(settings, split='train') + val_dataloader = create_dataloader(settings, split='val') + + optimizer = optim.Adam((p for p in model.parameters() if p.requires_grad), lr=settings.lr) + + criterion = nn.BCEWithLogitsLoss(reduction='none') + + train(train_dataloader, val_dataloader, model, settings) \ No newline at end of file diff --git a/detectors/R50_nodown/LICENSE.md b/detectors/R50_nodown/LICENSE.md new file mode 100644 index 0000000000000000000000000000000000000000..7a4a3ea2424c09fbe48d455aed1eaa94d9124835 --- /dev/null +++ b/detectors/R50_nodown/LICENSE.md @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/detectors/R50_nodown/README.md b/detectors/R50_nodown/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c05cab514ed105bc1df50226da6d2f73680b50df --- /dev/null +++ b/detectors/R50_nodown/README.md @@ -0,0 +1,32 @@ +# On the detection of synthetic images generated by diffusion models + +[![Github](https://img.shields.io/badge/Github%20webpage-222222.svg?style=for-the-badge&logo=github)](https://grip-unina.github.io/DMimageDetection/) +[![arXiv](https://img.shields.io/badge/-arXiv-B31B1B.svg?style=for-the-badge)](https://arxiv.org/abs/2211.00680) +[![IEEE](https://img.shields.io/badge/-IEEE-6093BF.svg?style=for-the-badge)](https://doi.org/10.1109/ICASSP49357.2023.10095167) +[![GRIP](https://img.shields.io/badge/-GRIP-0888ef.svg?style=for-the-badge)](https://www.grip.unina.it) + +Original Paper: [On the detection of synthetic images generated by diffusion models](https://arxiv.org/abs/2211.00680). + +Authors: Riccardo Corvi, Davide Cozzolino, Giada Zingarini, Giovanni Poggi, Koki Nagano, Luisa Verdoliva + +## Abstract + +Over the past decade, there has been tremendous progress in creating synthetic media, mainly thanks to the development of powerful methods based on generative adversarial networks (GAN). Very recently, methods based on diffusion models (DM) have been gaining the spotlight. In addition to providing an impressive level of photorealism, they enable the creation of text-based visual content, opening up new and exciting opportunities in many different application fields, from arts to video games. On the other hand, this property is an additional asset in the hands of malicious users, who can generate and distribute fake media perfectly adapted to their attacks, posing new challenges to the media forensic community. With this work, we seek to understand how difficult it is to distinguish synthetic images generated by diffusion models from pristine ones and whether current state-of-the-art detectors are suitable for the task. To this end, first we expose the forensics traces left by diffusion models, then study how current detectors, developed for GAN-generated images, perform on these new synthetic images, especially in challenging social-network scenarios involving image compression and resizing. + + +## Please Cite + +``` +@InProceedings{Corvi_2023_ICASSP, + author={Corvi, Riccardo and Cozzolino, Davide and + Zingarini, Giada and Poggi, Giovanni and Nagano, + Koki and Verdoliva, Luisa}, + title={On The Detection of Synthetic Images + Generated by Diffusion Models}, + booktitle={IEEE International Conference on + Acoustics, Speech and Signal Processing (ICASSP)}, + year={2023}, + pages={1-5}, + doi={10.1109/ICASSP49357.2023.10095167} +} +``` \ No newline at end of file diff --git a/detectors/R50_nodown/__pycache__/parser.cpython-310.pyc b/detectors/R50_nodown/__pycache__/parser.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..219216103853b96f2df7267915787e313cec91de Binary files /dev/null and b/detectors/R50_nodown/__pycache__/parser.cpython-310.pyc differ diff --git a/detectors/R50_nodown/checkpoint/pretrained/weights/best.pt b/detectors/R50_nodown/checkpoint/pretrained/weights/best.pt new file mode 100644 index 0000000000000000000000000000000000000000..4945c3f169e1f17374d19752de2e31d505170674 --- /dev/null +++ b/detectors/R50_nodown/checkpoint/pretrained/weights/best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2288deb4d38238cc3fe7082c78c42cf8b7b4346e86e9d94ec45f0a40684807a3 +size 282535031 diff --git a/detectors/R50_nodown/detect.py b/detectors/R50_nodown/detect.py new file mode 100644 index 0000000000000000000000000000000000000000..25cbe168654325eacc9ede891704c524651a9036 --- /dev/null +++ b/detectors/R50_nodown/detect.py @@ -0,0 +1,111 @@ +import os +import sys +import time +import yaml +import torch +from PIL import Image +import torchvision.transforms as transforms +import argparse + +# Add project root to path for imports +project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +sys.path.append(project_root) +from support.detect_utils import format_result, save_result, get_device +from networks import create_architecture + +def parse_args(): + parser = argparse.ArgumentParser(description='R50_nodown single image detector') + parser.add_argument('--image', type=str, required=True, help='Path to input image') + parser.add_argument('--model', type=str, default='checkpoint/best.pt', help='Path to model checkpoint') + parser.add_argument('--output', type=str, help='Path to save detection result JSON') + parser.add_argument('--device', type=str, help='Device to run on (e.g., cuda:0, cuda:1, cpu)') + parser.add_argument('--config', type=str, default='configs/r50_nodown.yaml', help='Path to config file') + return parser.parse_args() + +def load_config(config_path): + """Load configuration from YAML file.""" + with open(config_path, 'r') as f: + return yaml.safe_load(f) + +def load_image(image_path, size=224): + """Load and preprocess image.""" + if not os.path.exists(image_path): + raise FileNotFoundError(f"Image not found: {image_path}") + + image = Image.open(image_path).convert('RGB') + transform = transforms.Compose([ + transforms.Resize(size), + transforms.CenterCrop(size), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + return transform(image).unsqueeze(0) + +def main(): + args = parse_args() + + # Setup device + if args.device: + if args.device.startswith('cuda') and not torch.cuda.is_available(): + print("CUDA is not available, falling back to CPU") + device = torch.device('cpu') + else: + device = torch.device(args.device) + else: + device = get_device() + + # Load model + try: + # Initialize model + model = create_architecture("res50nodown", pretrained=True, num_classes=1).to(device) + load_path = f'./detectors/R50_nodown/checkpoint/{args.model}/weights/best.pt' + + if not os.path.exists(load_path): + raise FileNotFoundError(f"Model weights not found at: {load_path}") + checkpoint = torch.load(load_path, map_location=device) + if 'model' in checkpoint: + model.load_state_dict(checkpoint['model']) + else: + model.load_state_dict(checkpoint) + model.eval() + except Exception as e: + print(f"Error loading model: {e}") + return + + # Load and preprocess image + try: + image_tensor = load_image(args.image).to(device) + except Exception as e: + print(f"Error loading image: {e}") + return + + # Run detection + start_time = time.time() + with torch.no_grad(): + try: + score = model(image_tensor) + prediction = torch.sigmoid(score) + + confidence = prediction.item() + + result = format_result( + 'fake' if confidence>0.5 else 'real', + confidence, + time.time() - start_time + ) + + # Print result + print(f"Prediction: {result['prediction']}") + print(f"Confidence: {result['confidence']:.4f}") + print(f"Time: {result['elapsed_time']:.3f}s") + + # Save result if output path provided + if args.output: + save_result(result, args.output) + + except Exception as e: + print(f"Error during detection: {e}") + return + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/detectors/R50_nodown/networks/__init__.py b/detectors/R50_nodown/networks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ca4b03ad9c25b5789ee5b6431bdcef8926c69043 --- /dev/null +++ b/detectors/R50_nodown/networks/__init__.py @@ -0,0 +1,38 @@ +''' +Copyright 2024 Image Processing Research Group of University Federico +II of Naples ('GRIP-UNINA'). All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + +def create_architecture(name_arch, pretrained=False, num_classes=1): + if name_arch == "res50nodown": + from .resnet_mod import resnet50 + + if pretrained: + model = resnet50(pretrained=False, stride0=1, dropout=0.5).change_output(num_classes) + else: + model = resnet50(num_classes=num_classes, stride0=1, dropout=0.5) + elif name_arch == "res50": + from .resnet_mod import resnet50 + + if pretrained: + model = resnet50(pretrained=False, stride0=2).change_output(num_classes) + else: + model = resnet50(num_classes=num_classes, stride0=2) + else: + assert False + return model + +def count_parameters(model): + return sum(p.numel() for p in model.parameters() if p.requires_grad) diff --git a/detectors/R50_nodown/networks/__pycache__/__init__.cpython-310.pyc b/detectors/R50_nodown/networks/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..353695416df7e3579d8f1f5ece1906aaaa57cd1a Binary files /dev/null and b/detectors/R50_nodown/networks/__pycache__/__init__.cpython-310.pyc differ diff --git a/detectors/R50_nodown/networks/__pycache__/resnet_mod.cpython-310.pyc b/detectors/R50_nodown/networks/__pycache__/resnet_mod.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8118e766ecc58dab7ddc909dce2aee43547a20a9 Binary files /dev/null and b/detectors/R50_nodown/networks/__pycache__/resnet_mod.cpython-310.pyc differ diff --git a/detectors/R50_nodown/networks/resnet_mod.py b/detectors/R50_nodown/networks/resnet_mod.py new file mode 100644 index 0000000000000000000000000000000000000000..8b71c0108c985f58548f40ef1eb680c99fd1908d --- /dev/null +++ b/detectors/R50_nodown/networks/resnet_mod.py @@ -0,0 +1,335 @@ +''' +Copyright 2024 Image Processing Research Group of University Federico +II of Naples ('GRIP-UNINA'). All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + +import torch +import torch.nn as nn +import torch.utils.model_zoo as model_zoo + +__all__ = ["ResNet", "resnet18", "resnet34", "resnet50", "resnet101", "resnet152"] + + +model_urls = { + "resnet18": "https://download.pytorch.org/models/resnet18-5c106cde.pth", + "resnet34": "https://download.pytorch.org/models/resnet34-333f7ec4.pth", + "resnet50": "https://download.pytorch.org/models/resnet50-19c8e357.pth", + "resnet101": "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth", + "resnet152": "https://download.pytorch.org/models/resnet152-b121ed2d.pth", +} + +class ChannelLinear(nn.Linear): + def __init__( + self, in_features: int, out_features: int, bias: bool = True, pool=None + ) -> None: + super(ChannelLinear, self).__init__(in_features, out_features, bias) + self.compute_axis = 1 + self.pool = pool + + def forward(self, x): + axis_ref = len(x.shape) - 1 + x = torch.transpose(x, self.compute_axis, axis_ref) + out_shape = list(x.shape) + out_shape[-1] = self.out_features + x = x.reshape(-1, x.shape[-1]) + x = x.matmul(self.weight.t()) + if self.bias is not None: + x = x + self.bias[None, :] + x = torch.transpose(x.view(out_shape), axis_ref, self.compute_axis) + if self.pool is not None: + x = self.pool(x) + return x + + +def conv3x3(in_planes, out_planes, stride=1, padding=1): + """3x3 convolution with padding""" + return nn.Conv2d( + in_planes, out_planes, kernel_size=3, stride=stride, padding=padding, bias=False + ) + + +def conv1x1(in_planes, out_planes, stride=1): + """1x1 convolution""" + return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, padding=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride, padding=padding) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes, padding=padding) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + self.padding = padding + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.padding == 0: + identity = identity[..., 1:-1, 1:-1] + if self.downsample is not None: + identity = self.downsample(identity) + if self.padding == 0: + identity = identity[..., 1:-1, 1:-1] + + out += identity + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, padding=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = conv1x1(inplanes, planes) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = conv3x3(planes, planes, stride, padding=padding) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = conv1x1(planes, planes * self.expansion) + self.bn3 = nn.BatchNorm2d(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + self.padding = padding + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.padding == 0: + identity = identity[..., 1:-1, 1:-1] + if self.downsample is not None: + identity = self.downsample(identity) + + out += identity + out = self.relu(out) + + return out + +class ResNet(nn.Module): + def __init__( + self, + block, + layers, + num_classes=1000, + zero_init_residual=False, + stride0=2, + padding=1, + dropout=0.0, + gap_size=None, + ): + super(ResNet, self).__init__() + self.inplanes = 64 + + self.conv1 = nn.Conv2d( + 3, 64, kernel_size=7, stride=stride0, padding=3 * padding, bias=False + ) + self.bn1 = nn.BatchNorm2d(64) + if dropout > 0: + self.dropout = nn.Dropout(dropout) + else: + self.dropout = None + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=stride0, padding=padding) + self.layer1 = self._make_layer(block, 64, layers[0], padding=padding) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2, padding=padding) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2, padding=padding) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2, padding=padding) + + if gap_size is None: + self.gap_size = None + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + elif gap_size < 0: + with torch.no_grad(): + y = self.forward_features( + torch.zeros((1, 3, -gap_size, -gap_size), dtype=torch.float32) + ).shape + print("gap_size:", -gap_size, ">>", y[-1]) + self.gap_size = y[-1] + self.avgpool = nn.AvgPool2d(kernel_size=self.gap_size, stride=1, padding=0) + elif gap_size == 1: + self.gap_size = gap_size + self.avgpool = None + else: + self.gap_size = gap_size + self.avgpool = nn.AvgPool2d(kernel_size=self.gap_size, stride=1, padding=0) + self.num_features = 512 * block.expansion + self.fc = ChannelLinear(self.num_features, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + if zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + nn.init.constant_(m.bn3.weight, 0) + elif isinstance(m, BasicBlock): + nn.init.constant_(m.bn2.weight, 0) + + def _make_layer(self, block, planes, blocks, stride=1, padding=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + conv1x1(self.inplanes, planes * block.expansion, stride), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append( + block( + self.inplanes, + planes, + stride=stride, + downsample=downsample, + padding=padding, + ) + ) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append(block(self.inplanes, planes, padding=padding)) + + return nn.Sequential(*layers) + + def change_output(self, num_classes): + self.fc = ChannelLinear(self.num_features, num_classes) + torch.nn.init.normal_(self.fc.weight.data, 0.0, 0.02) + return self + + def change_input(self, num_inputs): + data = self.conv1.weight.data + old_num_inputs = int(data.shape[1]) + if num_inputs > old_num_inputs: + times = num_inputs // old_num_inputs + if (times * old_num_inputs) < num_inputs: + times = times + 1 + data = data.repeat(1, times, 1, 1) / times + elif num_inputs == old_num_inputs: + return self + + data = data[:, :num_inputs, :, :] + print(self.conv1.weight.data.shape, "->", data.shape) + self.conv1.weight.data = data + + return self + + def forward_features(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + return x + + def forward_head(self, x): + if self.avgpool is not None: + x = self.avgpool(x) + if self.dropout is not None: + x = self.dropout(x) + y = self.fc(x) + if self.gap_size is None: + y = torch.squeeze(torch.squeeze(y, -1), -1) + return y + + def forward(self, x): + x = self.forward_features(x) + x = self.forward_head(x) + return x + + +def resnet18(pretrained=False, **kwargs): + """Constructs a ResNet-18 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls["resnet18"])) + return model + + +def resnet34(pretrained=False, **kwargs): + """Constructs a ResNet-34 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls["resnet34"])) + return model + + +def resnet50(pretrained=False, **kwargs): + """Constructs a ResNet-50 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls["resnet50"])) + return model + + +def resnet101(pretrained=False, **kwargs): + """Constructs a ResNet-101 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls["resnet101"])) + return model + + +def resnet152(pretrained=False, **kwargs): + """Constructs a ResNet-152 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls["resnet152"])) + return model diff --git a/detectors/R50_nodown/parser.py b/detectors/R50_nodown/parser.py new file mode 100644 index 0000000000000000000000000000000000000000..f0508baa0aa3353e652ae390286da87711271523 --- /dev/null +++ b/detectors/R50_nodown/parser.py @@ -0,0 +1,25 @@ +import argparse + +def get_parser(): + parser = argparse.ArgumentParser() + parser.add_argument("--name", type=str, default="test", help="run name") + parser.add_argument("--arch", type=str, default="resnet_nodown", help="architecture name") + + parser.add_argument("--task", type=str, help="Task: train/test") + parser.add_argument("--device", type=str, default="cuda:0", help="cuda device to use") + + parser.add_argument("--split_file", type=str, help="Path to split json") + parser.add_argument("--data_root", type=str, help="Path to dataset") + parser.add_argument("--data_keys", type=str, help="Dataset specifications") + + parser.add_argument("--batch_size", type=int, default=64, help='Dataloader batch size') + parser.add_argument("--num_threads", type=int, default=14, help='# threads for loading data') + + parser.add_argument("--lr", type=float, default=0.0001, help="initial learning rate") + parser.add_argument("--weight_decay", type=float, default=0.0, help="weight decay") + parser.add_argument("--beta1", type=float, default=0.9, help="momentum term of adam") + + parser.add_argument("--num_epoches", type=int, default=1000, help="# of epoches at starting learning rate") + parser.add_argument("--earlystop_epoch", type=int, default=5, help="Number of epochs without loss reduction before lowering the learning rate") + + return parser \ No newline at end of file diff --git a/detectors/R50_nodown/test.py b/detectors/R50_nodown/test.py new file mode 100644 index 0000000000000000000000000000000000000000..332d62ad7e6adb01e874a9c7ec0b4d11d8c0a69e --- /dev/null +++ b/detectors/R50_nodown/test.py @@ -0,0 +1,161 @@ +import os +from tqdm import tqdm +import torch +import pandas as pd +import json +import time +import numpy as np +from sklearn.metrics import roc_auc_score, accuracy_score +from networks import create_architecture, count_parameters +from utils.dataset import create_dataloader +from utils.processing import add_processing_arguments +from parser import get_parser + +def test(loader, model, settings, device): + model.eval() + + start_time = time.time() + + # File paths + output_dir = f'./results/{settings.name}/{settings.data_keys}/data/' + os.makedirs(output_dir, exist_ok=True) + + csv_filename = os.path.join(output_dir, 'results.csv') + metrics_filename = os.path.join(output_dir, 'metrics.json') + image_results_filename = os.path.join(output_dir, 'image_results.json') + + # Collect all results + all_scores = [] + all_labels = [] + all_paths = [] + image_results = [] + + # Extract training dataset keys from model name (format: "training_keys_freeze_down" or "training_keys") + # The model name typically contains the training dataset keys used for training + training_dataset_keys = [] + model_name = settings.name + # Remove common suffixes like "_freeze_down" + if '_freeze_down' in model_name: + training_name = model_name.replace('_freeze_down', '') + else: + training_name = model_name + # Split by & to get individual training dataset keys + if '&' in training_name: + training_dataset_keys = training_name.split('&') + else: + training_dataset_keys = [training_name] + + # Write CSV header + with open(csv_filename, 'w') as f: + f.write(f"{','.join(['name', 'pro', 'flag'])}\n") + + with torch.no_grad(): + with tqdm(loader, unit='batch', mininterval=0.5) as tbatch: + tbatch.set_description(f'Validation') + for data_dict in tbatch: + data = data_dict['img'].to(device) + labels = data_dict['target'].to(device) + paths = data_dict['path'] + + scores = model(data).squeeze(1) + + # Collect results + for score, label, path in zip(scores, labels, paths): + score_val = score.item() + label_val = label.item() + + all_scores.append(score_val) + all_labels.append(label_val) + all_paths.append(path) + + image_results.append({ + 'path': path, + 'score': score_val, + 'label': label_val + }) + + # Write to CSV (maintain backward compatibility) + with open(csv_filename, 'a') as f: + for score, label, path in zip(scores, labels, paths): + f.write(f"{path}, {score.item()}, {label.item()}\n") + + # Calculate metrics + all_scores = np.array(all_scores) + all_labels = np.array(all_labels) + + # Convert scores to predictions (threshold at 0, as used in train.py) + predictions = (all_scores > 0).astype(int) + + # Calculate overall metrics + total_accuracy = accuracy_score(all_labels, predictions) + + # TPR (True Positive Rate) = TP / (TP + FN) = accuracy on fake images (label==1) + fake_mask = all_labels == 1 + if fake_mask.sum() > 0: + tpr = accuracy_score(all_labels[fake_mask], predictions[fake_mask]) + else: + tpr = 0.0 + + # Calculate TNR on real images (label==0) in the test set + real_mask = all_labels == 0 + if real_mask.sum() > 0: + # Overall TNR calculated on all real images in the test set + tnr = accuracy_score(all_labels[real_mask], predictions[real_mask]) + else: + tnr = 0.0 + + # AUC calculation (needs probabilities, so we'll use sigmoid on scores) + if len(np.unique(all_labels)) > 1: + # Apply sigmoid to convert scores to probabilities + probabilities = torch.sigmoid(torch.tensor(all_scores)).numpy() + auc = roc_auc_score(all_labels, probabilities) + else: + auc = 0.0 + + execution_time = time.time() - start_time + + # Prepare metrics JSON + metrics = { + 'TPR': float(tpr), + 'TNR': float(tnr), + 'Acc total': float(total_accuracy), + 'AUC': float(auc), + 'execution time': float(execution_time) + } + + # Write metrics JSON + with open(metrics_filename, 'w') as f: + json.dump(metrics, f, indent=2) + + # Write individual image results JSON + with open(image_results_filename, 'w') as f: + json.dump(image_results, f, indent=2) + + print(f'\nMetrics saved to {metrics_filename}') + print(f'Image results saved to {image_results_filename}') + print(f'\nMetrics:') + print(f' TPR: {tpr:.4f}') + print(f' TNR: {tnr:.4f}') + print(f' Accuracy: {total_accuracy:.4f}') + print(f' AUC: {auc:.4f}') + print(f' Execution time: {execution_time:.2f} seconds') + +if __name__ == '__main__': + parser = get_parser() + parser = add_processing_arguments(parser) + settings = parser.parse_args() + + device = torch.device(settings.device if torch.cuda.is_available() else 'cpu') + + test_dataloader = create_dataloader(settings, split='test') + + model = create_architecture(settings.arch, pretrained=True, num_classes=1).to(device) + num_parameters = count_parameters(model) + print(f"Arch: {settings.arch} with #parameters {num_parameters}") + load_path = f'./checkpoint/{settings.name}/weights/best.pt' + + print('loading the model from %s' % load_path) + model.load_state_dict(torch.load(load_path, map_location=device)['model']) + model.to(device) + + test(test_dataloader, model, settings, device) diff --git a/detectors/R50_nodown/train.py b/detectors/R50_nodown/train.py new file mode 100644 index 0000000000000000000000000000000000000000..3cd8ab17073ff604787bbb97768018abde9b4685 --- /dev/null +++ b/detectors/R50_nodown/train.py @@ -0,0 +1,64 @@ +import os +import tqdm +from utils import TrainingModel, create_dataloader, EarlyStopping +from sklearn.metrics import balanced_accuracy_score, roc_auc_score +from utils.processing import add_processing_arguments +from parser import get_parser + +if __name__ == "__main__": + parser = get_parser() + parser = add_processing_arguments(parser) + + opt = parser.parse_args() + os.makedirs(os.path.join('checkpoint', opt.name,'weights'), exist_ok=True) + + valid_data_loader = create_dataloader(opt, split="val") + train_data_loader = create_dataloader(opt, split="train") + print() + print("# validation batches = %d" % len(valid_data_loader)) + print("# training batches = %d" % len(train_data_loader)) + model = TrainingModel(opt) + early_stopping = None + start_epoch = model.total_steps // len(train_data_loader) + print() + + for epoch in range(start_epoch, opt.num_epoches+1): + if epoch > start_epoch: + # Training + pbar = tqdm.tqdm(train_data_loader) + for data in pbar: + loss = model.train_on_batch(data).item() + total_steps = model.total_steps + pbar.set_description(f"Train loss: {loss:.4f}") + + # Save model + model.save_networks(epoch) + + # Validation + print("Validation ...", flush=True) + y_true, y_pred, y_path = model.predict(valid_data_loader) + acc = balanced_accuracy_score(y_true, y_pred > 0.0) + auc = roc_auc_score(y_true, y_pred) + lr = model.get_learning_rate() + print("After {} epoches: val acc = {}; val auc = {}".format(epoch, acc, auc), flush=True) + + # Early Stopping + if early_stopping is None: + early_stopping = EarlyStopping( + init_score=acc, patience=opt.earlystop_epoch, + delta=0.001, verbose=True, + ) + print('Save best model', flush=True) + model.save_networks('best') + else: + if early_stopping(acc): + print('Save best model', flush=True) + model.save_networks('best') + if early_stopping.early_stop: + cont_train = model.adjust_learning_rate() + if cont_train: + print("Learning rate dropped by 10, continue training ...", flush=True) + early_stopping.reset_counter() + else: + print("Early stopping.", flush=True) + break diff --git a/detectors/R50_nodown/utils/__init__.py b/detectors/R50_nodown/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3b1bde9fb6193c6cb36e3ebab15c7f81ccf3730d --- /dev/null +++ b/detectors/R50_nodown/utils/__init__.py @@ -0,0 +1,52 @@ +''' +Copyright 2024 Image Processing Research Group of University Federico +II of Naples ('GRIP-UNINA'). All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + +from .dataset import create_dataloader +from .training import TrainingModel +class EarlyStopping: + def __init__(self, init_score=None, patience=1, verbose=False, delta=0): + self.best_score = init_score + self.patience = patience + self.delta = delta + self.verbose = verbose + self.count_down = self.patience + self.early_stop = False + + def __call__(self, score): + if self.best_score is None: + if self.verbose: + print(f'Score set to {score:.6f}.') + self.best_score = score + self.count_down = self.patience + return True + elif score <= self.best_score + self.delta: + self.count_down -= 1 + if self.verbose: + print(f'EarlyStopping count_down: {self.count_down} on {self.patience}') + if self.count_down <= 0: + self.early_stop = True + return False + else: + if self.verbose: + print(f'Score increased from ({self.best_score:.6f} to {score:.6f}).') + self.best_score = score + self.count_down = self.patience + return True + + def reset_counter(self): + self.count_down = self.patience + self.early_stop = False diff --git a/detectors/R50_nodown/utils/__pycache__/__init__.cpython-310.pyc b/detectors/R50_nodown/utils/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8a57bc3013222ed8ce71c4485f4d09e0388c7a2c Binary files /dev/null and b/detectors/R50_nodown/utils/__pycache__/__init__.cpython-310.pyc differ diff --git a/detectors/R50_nodown/utils/__pycache__/dataset.cpython-310.pyc b/detectors/R50_nodown/utils/__pycache__/dataset.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7903678031e8f0165e793c04be418bd520a6b162 Binary files /dev/null and b/detectors/R50_nodown/utils/__pycache__/dataset.cpython-310.pyc differ diff --git a/detectors/R50_nodown/utils/__pycache__/processing.cpython-310.pyc b/detectors/R50_nodown/utils/__pycache__/processing.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7f47b8da283d8c7f50a7216955c2b559118aff5d Binary files /dev/null and b/detectors/R50_nodown/utils/__pycache__/processing.cpython-310.pyc differ diff --git a/detectors/R50_nodown/utils/__pycache__/training.cpython-310.pyc b/detectors/R50_nodown/utils/__pycache__/training.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9981de2663cdac2868363e5a76915623cf377c84 Binary files /dev/null and b/detectors/R50_nodown/utils/__pycache__/training.cpython-310.pyc differ diff --git a/detectors/R50_nodown/utils/dataset.py b/detectors/R50_nodown/utils/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..c8172ba2bc609adabf12a87c0ce158202af33d9c --- /dev/null +++ b/detectors/R50_nodown/utils/dataset.py @@ -0,0 +1,163 @@ +''' +Copyright 2024 Image Processing Research Group of University Federico +II of Naples ('GRIP-UNINA'). All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + +import os +import json +import torch +import bisect +import numpy as np +from torch.utils.data.sampler import WeightedRandomSampler, RandomSampler +from torchvision import datasets +from .processing import make_processing + +from PIL import Image, ImageFile +ImageFile.LOAD_TRUNCATED_IMAGES = True + +def get_bal_sampler(dataset): + targets = [] + for d in dataset.datasets: + targets.extend(d.targets) + + ratio = np.bincount(targets) + w = 1.0 / torch.tensor(ratio, dtype=torch.float) + if torch.all(w==w[0]): + print(f"RandomSampler: # {ratio}") + sampler = RandomSampler(dataset, replacement = False) + else: + w = w / torch.sum(w) + print(f"WeightedRandomSampler: # {ratio}, Weightes {w}") + sample_weights = w[targets] + sampler = WeightedRandomSampler( + weights=sample_weights, num_samples=len(sample_weights) + ) + return sampler + +def create_dataloader(opt, split=None): + if split == "train": + opt.split = 'train' + is_train=True + + elif split == "val": + opt.split = 'val' + is_train=False + + elif split == "test": + opt.split = 'test' + opt.batch_size = 2 + is_train=False + + else: + raise ValueError(f"Unknown split {split}") + + dataset = TrueFake_dataset(opt) + + data_loader = torch.utils.data.DataLoader( + dataset, + batch_size=opt.batch_size, + shuffle=is_train, + num_workers=int(opt.num_threads), + ) + return data_loader + +def parse_dataset(settings): + gen_keys = { + 'gan1':['StyleGAN'], + 'gan2':['StyleGAN2'], + 'gan3':['StyleGAN3'], + 'sd15':['StableDiffusion1.5'], + 'sd2':['StableDiffusion2'], + 'sd3':['StableDiffusion3'], + 'sdXL':['StableDiffusionXL'], + 'flux':['FLUX.1'], + 'realFFHQ':['FFHQ'], + 'realFORLAB':['FORLAB'] + } + + gen_keys['all'] = [gen_keys[key][0] for key in gen_keys.keys()] + gen_keys['real'] = [gen_keys[key][0] for key in gen_keys.keys() if 'real' in key] + + mod_keys = { + 'pre': ['PreSocial'], + 'fb': ['Facebook'], + 'tl': ['Telegram'], + 'tw': ['X'], + } + + mod_keys['all'] = [mod_keys[key][0] for key in mod_keys.keys()] + mod_keys['shr'] = [mod_keys[key][0] for key in mod_keys.keys() if key in ['fb', 'tl', 'tw']] + + need_real = (settings.split in ['train', 'val'] and not len([data for data in settings.data_keys.split('&') if 'real' in data.split(':')[0]])) + + assert not need_real, 'Train task without real data, this will not get handeled automatically, terminating' + + dataset_list = [] + for data in settings.data_keys.split('&'): + gen, mod = data.split(':') + dataset_list.append({'gen':gen_keys[gen], 'mod':mod_keys[mod]}) + + return dataset_list + +class TrueFake_dataset(datasets.DatasetFolder): + def __init__(self, settings): + self.data_root = settings.data_root + self.split = settings.split + + with open(settings.split_file, "r") as f: + split_list = sorted(json.load(f)[self.split]) + + dataset_list = parse_dataset(settings) + + self.samples = [] + self.info = [] + for dict in dataset_list: + generators = dict['gen'] + modifiers = dict['mod'] + + for mod in modifiers: + for dataset_root, dataset_dirs, dataset_files in os.walk(os.path.join(self.data_root, mod), topdown=True, followlinks=True): + if len(dataset_dirs): + continue + + (label, gen, sub) = f'{dataset_root}/'.replace(os.path.join(self.data_root, mod) + os.sep, '').split(os.sep)[:3] + + if gen in generators: + for filename in sorted(dataset_files): + if os.path.splitext(filename)[1].lower() in ['.png', '.jpg', '.jpeg']: + if self._in_list(split_list, os.path.join(gen, sub, os.path.splitext(filename)[0])): + self.samples.append(os.path.join(dataset_root, filename)) + self.info.append((mod, label, gen, sub)) + + self.transform = make_processing(settings) + print(self.transform) + + def _in_list(self, split, elem): + i = bisect.bisect_left(split, elem) + return i != len(split) and split[i] == elem + + def __len__(self): + return len(self.samples) + + def __getitem__(self, index): + path = self.samples[index] + mod, label, gen, sub = self.info[index] + + sample = Image.open(path).convert('RGB') + sample = self.transform(sample) + + target = 1.0 if label == 'Fake' else 0.0 + + return {'img':sample, 'target':target, 'path':path} \ No newline at end of file diff --git a/detectors/R50_nodown/utils/processing.py b/detectors/R50_nodown/utils/processing.py new file mode 100644 index 0000000000000000000000000000000000000000..af479947d5c13dddfa51c334e79efb69ef38d7b4 --- /dev/null +++ b/detectors/R50_nodown/utils/processing.py @@ -0,0 +1,226 @@ +''' +Copyright 2024 Image Processing Research Group of University Federico +II of Naples ('GRIP-UNINA'). All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + +import random +import numpy as np +import torchvision.transforms as transforms +import torchvision.transforms.v2 as Tv2 +from PIL import Image +import torch + +def make_processing(opt): + opt = parse_arguments(opt) + transforms_list = list() # list of transforms + + if opt.task == 'train': + transforms_aug = make_aug(opt) # make data-augmentation transforms + if transforms_aug is not None: + transforms_list.append(transforms_aug) + + transforms_post = make_post(opt) # make post-data-augmentation transforms + if transforms_post is not None: + transforms_list.append(transforms_post) + + if opt.task == 'test' and 'realFORLAB:pre' in opt.data_keys: + transforms_list.append(Tv2.CenterCrop(1024)) + if opt.task == 'test' and 'realFORLAB:fb' in opt.data_keys: + transforms_list.append(Tv2.CenterCrop(720)) + if opt.task == 'test' and 'realFORLAB:tw' in opt.data_keys: + transforms_list.append(Tv2.CenterCrop(1200)) + if opt.task == 'test' and 'realFORLAB:tl' in opt.data_keys: + transforms_list.append(Tv2.CenterCrop(800)) + transforms_list.append(make_normalize(opt)) # make normalization + + return Tv2.Compose(transforms_list) + + +def add_processing_arguments(parser): + # parser is an argparse.ArgumentParser + # + # ICASSP2023: --cropSize 96 --loadSize -1 --resizeSize -1 --norm_type resnet --resize_prob 0.2 --jitter_prob 0.8 --colordist_prob 0.2 --cutout_prob 0.2 --noise_prob 0.2 --blur_prob 0.5 --cmp_prob 0.5 --rot90_prob 1.0 --hpf_prob 0.0 --blur_sig 0.0,3.0 --cmp_method cv2,pil --cmp_qual 30,100 --resize_size 256 --resize_ratio 0.75 + # + + parser.add_argument("--cropSize",type=int,default=-1,help="crop images to this size post augumentation") + + # data-augmentation probabilities + parser.add_argument("--resize_prob", type=float, default=0.0) + parser.add_argument("--jitter_prob", type=float, default=0.0) + parser.add_argument("--colordist_prob", type=float, default=0.0) + parser.add_argument("--cutout_prob", type=float, default=0.0) + parser.add_argument("--noise_prob", type=float, default=0.0) + parser.add_argument("--blur_prob", type=float, default=0.0) + parser.add_argument("--cmp_prob", type=float, default=0.0) + + # data-augmentation parameters + parser.add_argument("--blur_sig", default="0.5") + parser.add_argument("--cmp_qual", default="75") + parser.add_argument("--resize_size", type=int, default=256) + parser.add_argument("--resize_ratio", type=float, default=1.0) + + # other + parser.add_argument("--norm_type", type=str, default="resnet") # normalization type + + return parser + + +def parse_arguments(opt): + if not isinstance(opt.blur_sig, list): + opt.blur_sig = [float(s) for s in opt.blur_sig.split(",")] + if not isinstance(opt.cmp_qual, list): + opt.cmp_qual = [int(s) for s in opt.cmp_qual.split(",")] + + print(opt.cmp_qual) + return opt + +def make_post(opt): + transforms_list = list() + + if opt.cropSize > 0: + print("\nUsing Post Random Crop\n") + transforms_list.append(Tv2.RandomCrop(opt.cropSize, pad_if_needed=True, padding_mode="symmetric")) + + if len(transforms_list) == 0: + return None + else: + return Tv2.Compose(transforms_list) + + +def make_aug(opt): + # AUG + transforms_list_aug = list() + + if (opt.resize_size > 0) and (opt.resize_prob > 0): # opt.resized_ratio + transforms_list_aug.append( + Tv2.RandomChoice( + [ + Tv2.RandomResizedCrop( + size=opt.resize_size, + scale=(0.08, 1.0), + ratio=(opt.resize_ratio, 1.0 / opt.resize_ratio), + ), + Tv2.RandomCrop([opt.resize_size]) + ], + p=[opt.resize_prob, 1 - opt.resize_prob], + ) + ) + + if opt.jitter_prob > 0: + transforms_list_aug.append( + Tv2.RandomApply( + [ + Tv2.ColorJitter(0.4, 0.4, 0.4, 0.1) + ], + p=opt.jitter_prob + ) + ) + + if opt.colordist_prob > 0: + transforms_list_aug.append(Tv2.RandomGrayscale(p=opt.colordist_prob)) + + if opt.cutout_prob > 0: + transforms_list_aug.append(create_cutout_transforms(opt.cutout_prob)) + + if opt.noise_prob > 0: + transforms_list_aug.append( + Tv2.Compose([ + Tv2.ToImage(), + Tv2.ToDtype(torch.float32, scale=False), + Tv2.RandomApply( + [ + Tv2.GaussianNoise(sigma=0.44) + ], + p=opt.noise_prob + ), + Tv2.ToDtype(torch.uint8, scale=False), + Tv2.ToPILImage(), + ]) + ) + + if opt.blur_prob > 0: + transforms_list_aug.append( + Tv2.RandomApply( + [ + Tv2.GaussianBlur( + kernel_size=15, + sigma=opt.blur_sig, + ) + ], + p=opt.blur_prob + ) + ) + + if opt.cmp_prob > 0: + transforms_list_aug.append( + Tv2.RandomApply( + [ + Tv2.JPEG( + opt.cmp_qual + ) + ], + opt.cmp_prob, + ) + ) + + + transforms_list_aug.append(Tv2.Compose([Tv2.RandomHorizontalFlip(), Tv2.RandomVerticalFlip()])) + + if len(transforms_list_aug) > 0: + return Tv2.Compose(transforms_list_aug) + else: + return None + + +def make_normalize(opt): + transforms_list = list() + if opt.norm_type == "resnet": + print("normalize RESNET") + + transforms_list.append(Tv2.ToTensor()) + transforms_list.append( + Tv2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ) + else: + assert False + + return Tv2.Compose(transforms_list) + +def sample_discrete(s): + if len(s) == 1: + return s[0] + return random.choice(s) + + +def sample_continuous(s): + if len(s) == 1: + return s[0] + if len(s) == 2: + rg = s[1] - s[0] + return random.random() * rg + s[0] + raise ValueError("Length of iterable s should be 1 or 2.") + +def create_cutout_transforms(p): + from albumentations import CoarseDropout + aug = CoarseDropout( + num_holes_range=(1,1), + hole_height_range=(1, 48), + hole_width_range=(1, 48), + fill=128, + p=p, + ) + return transforms.Lambda( + lambda img: Image.fromarray(aug(image=np.array(img))["image"]) + ) \ No newline at end of file diff --git a/detectors/R50_nodown/utils/training.py b/detectors/R50_nodown/utils/training.py new file mode 100644 index 0000000000000000000000000000000000000000..afc2b77ce8779056f52c7b6bd4eba146c1151606 --- /dev/null +++ b/detectors/R50_nodown/utils/training.py @@ -0,0 +1,105 @@ +''' +Copyright 2024 Image Processing Research Group of University Federico +II of Naples ('GRIP-UNINA'). All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + +import os +import torch +import numpy as np +import tqdm +from networks import create_architecture, count_parameters + +class TrainingModel(torch.nn.Module): + + def __init__(self, opt): + super(TrainingModel, self).__init__() + + self.opt = opt + self.total_steps = 0 + self.save_dir = (os.path.join('checkpoint', opt.name,'weights')) + self.device = torch.device(opt.device if torch.cuda.is_available() else 'cpu') + + self.model = create_architecture(opt.arch, pretrained=True, num_classes=1) + num_parameters = count_parameters(self.model) + print(f"Arch: {opt.arch} with #trainable {num_parameters}") + + self.loss_fn = torch.nn.BCEWithLogitsLoss().to(self.device) + parameters = filter(lambda p: p.requires_grad, self.model.parameters()) + self.optimizer = torch.optim.Adam(parameters, lr=opt.lr, betas=(opt.beta1, 0.999), weight_decay=opt.weight_decay) + + self.model.to(self.device) + + def adjust_learning_rate(self, min_lr=1e-6): + for param_group in self.optimizer.param_groups: + param_group["lr"] /= 10.0 + if param_group["lr"] < min_lr: + return False + return True + + def get_learning_rate(self): + for param_group in self.optimizer.param_groups: + return param_group["lr"] + + def train_on_batch(self, data): + self.total_steps += 1 + self.model.train() + input = data['img'].to(self.device) + label = data['target'].to(self.device).float() + output = self.model(input) + if len(output.shape) == 4: + ss = output.shape + loss = self.loss_fn( + output, + label[:, None, None, None].repeat( + (1, int(ss[1]), int(ss[2]), int(ss[3])) + ), + ) + else: + loss = self.loss_fn(output.squeeze(1), label) + self.optimizer.zero_grad() + loss.backward() + self.optimizer.step() + return loss.cpu() + + def save_networks(self, epoch): + save_filename = f'{epoch}.pt' + save_path = os.path.join(self.save_dir, save_filename) + + # serialize model and optimizer to dict + state_dict = { + 'model': self.model.state_dict(), + 'optimizer': self.optimizer.state_dict(), + 'total_steps': self.total_steps, + } + + torch.save(state_dict, save_path) + + def predict(self, data_loader): + model = self.model.eval() + with torch.no_grad(): + y_true, y_pred, y_path = [], [], [] + for data in tqdm.tqdm(data_loader): + img = data['img'] + label = data['target'].cpu().numpy() + paths = list(data['path']) + out_tens = model(img.to(self.device)).cpu().numpy()[:, -1] + assert label.shape == out_tens.shape + + y_pred.extend(out_tens.tolist()) + y_true.extend(label.tolist()) + y_path.extend(paths) + + y_true, y_pred = np.array(y_true), np.array(y_pred) + return y_true, y_pred, y_path diff --git a/download_weights.py b/download_weights.py new file mode 100644 index 0000000000000000000000000000000000000000..24cdf869cf0f55fbf3d7fd29a3cce8bee5ee2d0d --- /dev/null +++ b/download_weights.py @@ -0,0 +1,67 @@ +""" +Download model weights for Hugging Face Spaces deployment. +This script downloads model weights on first run if they're not present. +""" +import os +import urllib.request +import ssl + +# Bypass SSL verification for downloads +try: + _create_unverified_https_context = ssl._create_unverified_context +except AttributeError: + pass +else: + ssl._create_default_https_context = _create_unverified_https_context + + +def download_file(url, dest_path): + """Download a file from URL to destination path.""" + os.makedirs(os.path.dirname(dest_path), exist_ok=True) + + if os.path.exists(dest_path): + print(f"✓ {dest_path} already exists") + return + + print(f"Downloading {os.path.basename(dest_path)}...") + try: + urllib.request.urlretrieve(url, dest_path) + print(f"✓ Downloaded {dest_path}") + except Exception as e: + print(f"✗ Failed to download {dest_path}: {e}") + + +# Model weights URLs (update these with actual URLs) +WEIGHTS_URLS = { + "R50_TF": "https://drive.google.com/uc?export=download&id=YOUR_GOOGLE_DRIVE_ID", # Replace + "R50_nodown": "https://drive.google.com/uc?export=download&id=YOUR_GOOGLE_DRIVE_ID", # Replace + "CLIP-D": "https://drive.google.com/uc?export=download&id=YOUR_GOOGLE_DRIVE_ID", # Replace + "P2G": "https://drive.google.com/uc?export=download&id=YOUR_GOOGLE_DRIVE_ID", # Replace + "NPR": "https://drive.google.com/uc?export=download&id=YOUR_GOOGLE_DRIVE_ID", # Replace +} + + +def download_all_weights(): + """Download all model weights if not present.""" + print("Checking model weights...") + + for model_name, url in WEIGHTS_URLS.items(): + dest_path = f"detectors/{model_name}/checkpoint/pretrained/weights/best.pt" + + # Skip if URL not configured + if "YOUR_GOOGLE_DRIVE_ID" in url: + print(f"⚠ Skipping {model_name}: URL not configured") + continue + + download_file(url, dest_path) + + # Download P2G classes.pkl + classes_url = "https://github.com/laitifranz/Prompt2Guard/raw/main/src/utils/classes.pkl" + classes_path = "detectors/P2G/src/utils/classes.pkl" + download_file(classes_url, classes_path) + + print("\nWeight check complete!") + + +if __name__ == "__main__": + download_all_weights() diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000000000000000000000000000000000000..7bdc85af5eab07d763ffdd6d80b444d84d552996 --- /dev/null +++ b/environment.yml @@ -0,0 +1,29 @@ +name: IDFD_VENV +channels: + - pytorch + - nvidia + - conda-forge +dependencies: + # Core Python + - python=3.10 + - pip + + # ML / DL Frameworks + - pytorch-cuda=12.1 # remove if using CPU-only + + # pip-only packages + - pip: + - torch==2.4.0 + - torchvision==0.19 + - pandas==2.2.3 + - scikit-image==0.22.0 + - scikit-learn==1.5.2 + - pyyaml==6.0.2 + - tqdm==4.67.1 + - opencv-python==4.10.0.84 + - transformers + - einops==0.8.0 + - albumentations + - ftfy + - open-clip-torch + - wandb==0.19.0 diff --git a/launcher.py b/launcher.py new file mode 100644 index 0000000000000000000000000000000000000000..7069bdfea2d967bc6e0533c3e482355de84d6860 --- /dev/null +++ b/launcher.py @@ -0,0 +1,337 @@ + +import os +import subprocess +import time +import argparse +import yaml +import glob +import shutil + + +def load_config(config_path): + """Load configuration from YAML file.""" + with open(config_path, 'r') as f: + config = yaml.safe_load(f) + return config + +# smi vampire function, busy waiting for a free-enough GPU, use min_vram to set the threshold +def get_gpus(): + from numpy import argwhere, asarray, diff + import re + smi = os.popen('nvidia-smi').readlines() + div = re.compile('[+]-{3,}[+]|[|]={3,}[|]') + dividers = argwhere([div.match(line) != None for line in smi])[-2:, 0] + processes = [line for line in smi[dividers[0]+1:dividers[1]] if ' C ' in line] + free = list(set([process.split()[1] for process in processes]) ^ set([str(0), str(1)])) + + udiv = re.compile('[|]={3,}[+]={3,}[+]={3,}[|]') + ldiv = re.compile('[+]-{3,}[+]-{3,}[+]-{3,}[+]') + divider_up = argwhere([udiv.match(line) != None for line in smi])[0,0] + divider_down = argwhere([ldiv.match(line) != None for line in smi])[-1, 0] + + gpus = [line for line in smi[divider_up+1:divider_down] if '%' in line and 'MiB' in line] + gpus = [gpu.split('|')[2].replace(' ', '').replace('MiB', '').split('/') for gpu in gpus] + memory = diff(asarray(gpus).astype(int), axis=1).squeeze() + + return free, memory + +def autotest(train_list, data_list, detector_name, checkpoint_name): + """Generate task list from training and testing configurations.""" + assert type(data_list) == list + task_list = [] + + for train_config in train_list: + + train_dict = {'detector': detector_name, 'model': None, 'data': train_config['data']} + task_list.append({'type':'train', 'details':train_dict}) + + for data in data_list: + + name = checkpoint_name #train_dict['data'] + task_list.append({'type':'test', 'details':{'detector': detector_name, 'model': name, 'data': data}}) + + return task_list + + +def parse_phases(phases_str): + """Parse phases string into list.""" + if phases_str.lower() == 'both': + return ['train', 'test'] + elif phases_str.lower() == 'train': + return ['train'] + elif phases_str.lower() == 'test': + return ['test'] + else: + raise ValueError(f"Invalid phases: {phases_str}. Must be 'train', 'test', or 'both'") + + +def run_demo(args): + import json + import torch + + project_root = os.path.abspath(os.path.dirname(__file__)) + demo_root = os.path.join(project_root, 'demo_images') + assert os.path.isdir(demo_root), f"Demo folder not found: {demo_root}" + + # Build split file from demo_images + def build_demo_split_json(root_path, out_path): + test_entries = [] + for mod in ['PreSocial', 'Facebook', 'Telegram', 'X']: + mod_path = os.path.join(root_path, mod) + if not os.path.isdir(mod_path): + continue + for dirpath, dirnames, filenames in os.walk(mod_path, topdown=True, followlinks=True): + if len(dirnames): + continue + rel_dir = f"{dirpath}/".replace(mod_path + os.sep, '') + parts = rel_dir.split(os.sep)[:3] + if len(parts) < 3: + continue + label, gen, sub = parts + for fname in sorted(filenames): + ext = os.path.splitext(fname)[1].lower() + if ext not in ['.png', '.jpg', '.jpeg']: + continue + stem = os.path.splitext(fname)[0] + test_entries.append(os.path.join(gen, sub, stem)) + + with open(out_path, 'w') as f: + json.dump({'test': sorted(list(set(test_entries)))}, f) + + split_demo_file = os.path.join(project_root, 'split_demo.json') + build_demo_split_json(demo_root, split_demo_file) + + def prepare_best_checkpoint(detector_dir, preferred_path=None): + weights_dir = os.path.join(detector_dir, 'checkpoint', 'pretrained', 'weights') + src_weight = None + + if preferred_path: + src_weight = preferred_path if os.path.isabs(preferred_path) else os.path.normpath(os.path.join(detector_dir, preferred_path)) + if not os.path.isfile(src_weight): + print(f"[demo] Preferred weights not found at {src_weight}, falling back to search") + src_weight = None + + if src_weight is None: + if not os.path.isdir(weights_dir): + return None + candidates = [] + for ext in ('*.pt', '*.pth'): + candidates.extend(glob.glob(os.path.join(weights_dir, ext))) + if not candidates: + return None + src_weight = sorted(candidates)[0] + + run_dir = os.path.join(detector_dir, 'checkpoint', 'demo', 'weights') + os.makedirs(run_dir, exist_ok=True) + dst_weight = os.path.join(run_dir, 'best.pt') + shutil.copy2(src_weight, dst_weight) + return dst_weight + + + device = f"cuda:0" if torch.cuda.is_available() else "cpu" + name = 'demo' + + detectors_root = os.path.join(project_root, 'detectors') + all_methods = ['R50_nodown', 'CLIP-D', 'R50_TF', 'P2G', 'NPR'] + methods = all_methods if args.demo_detector == 'all' else [args.demo_detector] + + os.makedirs(os.path.join(project_root, 'logs'), exist_ok=True) + + for method in methods: + det_dir = os.path.join(detectors_root, method) + if not os.path.isdir(det_dir): + continue + + preferred_weights = args.weights_name or './checkpoint/pretrained/weights/best.pt' + best_path = prepare_best_checkpoint(det_dir, preferred_weights) + if best_path is None: + print(f"[demo] Skipping {method}: no pretrained weights found under checkpoint/pretrained/weights/") + continue + + config_path = os.path.join(args.config_dir, f'{method}.yaml') + config = load_config(config_path) if os.path.exists(config_path) else {} + detector_args = config.get('detector_args', []) + testing_keys = config.get('testing', []) or ['all:all'] + global_cfg = config.get('global', {}) + num_threads = global_cfg.get('num_threads', 8) + + for data_keys in testing_keys: + args_list = [ + f'--name "{name}"', + f'--task test', + f'--device {device}', + f'--split_file {split_demo_file}', + f'--data_root {demo_root}', + f'--data_keys "{data_keys}"', + f'--num_threads {num_threads}', + ] + detector_args + + cmd_args = ' '.join(args_list) + log_file = os.path.join(project_root, 'logs', f'demo_{method}_{data_keys.replace(":","-")}.log') + with open(log_file, 'w') as f: + cwd = os.getcwd() + os.chdir(det_dir) + try: + print(f"[demo] Running {method} test with args: {cmd_args}") + runner = 'test.py' + subprocess.run(f'python -u {runner} {cmd_args}', shell=True)#, stdout=f, stderr=f) + finally: + os.chdir(cwd) + shutil.rmtree(os.path.join(det_dir, 'checkpoint', 'demo')) + + print('[demo] Completed. Results saved under detectors//results/demo//results.csv') + +def main(): + # Parse command-line arguments + parser = argparse.ArgumentParser(description='Launcher for deepfake detector training and testing') + parser.add_argument('--detector', type=str, required=False, + choices=['R50_TF', 'R50_nodown', 'CLIP-D', 'P2G', 'NPR'], + help='Detector to use') + parser.add_argument('--phases', type=str, default='both', + choices=['train', 'test', 'both'], + help='Phases to run: train, test, or both (default: both)') + parser.add_argument('--config-dir', type=str, default='configs', + help='Path to configs directory (default: configs/)'), + parser.add_argument('--weights_name', type=str, default=None, + help='Name of the weights directory') + parser.add_argument('--demo', action='store_true', help='Run demo on demo_images across detectors') + parser.add_argument('--demo-detector', type=str, default='all', choices=['all', 'R50_TF', 'R50_nodown', 'CLIP-D', 'P2G', 'NPR'], help='Which detector to demo (default: all)') + + # Add detect mode arguments + detect_group = parser.add_argument_group('detect', 'Single image detection options') + detect_group.add_argument('--detect', action='store_true', help='Run single image detection mode') + detect_group.add_argument('--image', type=str, help='Path to image file for detection') + detect_group.add_argument('--weights', type=str, default='pretrained', help='Path to model weights for detection') + detect_group.add_argument('--output', type=str, help='Path to save detection results') + detect_group.add_argument('--dry-run', action='store_true', help='Print commands without executing') + + args = parser.parse_args() + + if args.demo: + return run_demo(args) + + if args.detect: + if args.detector is None: + parser.error('--detector is required for detect mode') + if args.image is None: + parser.error('--image is required for detect mode') + from support.detect import run_detect + return run_detect(args) + + if args.detector is None: + parser.error('--detector is required unless --demo is specified') + + # Load configuration from YAML + config_path = os.path.join(args.config_dir, f'{args.detector}.yaml') + if not os.path.exists(config_path): + raise FileNotFoundError(f"Configuration file not found: {config_path}") + + config = load_config(config_path) + + # Extract configuration values + global_config = config.get('global', {}) + dataset_path = global_config.get('dataset_path') + device_override = global_config.get('device_override') # Can be None + if args.weights_name is not None: + global_config['name'] = args.weights_name + else: + global_config['name'] = config.get('training', [])[0]['data'] + model_name = global_config.get('name') + # Handle string "null" as None + if device_override == "null" or device_override == "": + device_override = None + min_vram = global_config.get('min_vram', 16000) + split_file = os.path.abspath(global_config.get('split_file', 'split.json')) + num_threads = global_config.get('num_threads', 8) + dry_run = global_config.get('dry_run', False) + only_list = global_config.get('only_list', False) + phases = parse_phases(args.phases) + + detector_args = config.get('detector_args', []) + training_configs = config.get('training', []) + test_list = config.get('testing', []) + + os.makedirs('logs', exist_ok=True) + + # Generate tasks + tasks = [] + if training_configs: + tasks.extend(autotest(training_configs, test_list, args.detector, model_name)) + + print('Number of tasks:', len(tasks)) + for task in tasks: + print(task) + + if only_list: + return + + # From here the launcher will create all the arguments to use when calling the train script + for task in tasks: + if task['type'] not in phases: + continue + + cmd_args = [] + + if task['type'] == 'train': + cmd_args.append(f'--name "{model_name}"')#{task["details"]["model"]}"') + else: + cmd_args.append(f'--name "{task["details"]["model"]}"') + + cmd_args.append(f'--split_file {split_file}') + cmd_args.append(f'--task {task["type"]}') + cmd_args.append(f'--num_threads {num_threads}') + cmd_args.append(f'--data_keys "{task["details"]["data"]}"') + cmd_args.append(f'--data_root {dataset_path}') + + device = None + if device_override is not None: + device = device_override + else: + if not dry_run: + print('Waiting for GPU') + while device is None: + free, memory = get_gpus() + if len(free): + device = "cuda:" + free[0] + elif max(memory) > min_vram: + device = "cuda:" + str([i for i, mem in enumerate(memory) if mem == max(memory)][0]) + time.sleep(1) + print('GPU found') + + cmd_args.append(f'--device {device}') + + # Add detector-specific arguments + for arg in detector_args: + cmd_args.append(arg) + + cmd_args_str = ' '.join(cmd_args) + + # Call train.py or test.py + if not dry_run: + #log_file = f'logs/{task["type"]}_{task["details"]["detector"]}_{task["details"]["model"]}_{task["details"]["data"]}.log' + log_file = f'logs/{task["type"]}_{task["details"]["detector"]}_{model_name}_{task["details"]["data"]}.log' + with open(log_file, 'w') as f: + cwd = os.getcwd() + os.chdir(f'./detectors/{task["details"]["detector"]}') + + start_time = time.time() + + runner = f'{task["type"]}.py' + print(f'Call to {runner} with: {cmd_args_str}') + + subprocess.run(f'python -u {runner} {cmd_args_str}', shell=True)#, stdout=f, stderr=f) + + end_time = time.time() + print(f'Execution time: {end_time-start_time:.2f} seconds') + + print('#'*80) + print('#'*80) + + os.chdir(cwd) + + +if __name__ == '__main__': + main() + + + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..07046e6c48529ff5fa35e0bc6fbb247c1ac0ed26 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,33 @@ +# ML / DL Frameworks +torch==2.4.0 +# torchaudio==2.4.0 +torchvision==0.19 + +# Hugging Face & Timm +# accelerate +# diffusers +# timm +transformers + +# ML/DL Helpers +einops==0.8.0 + +# Data Science +# matplotlib==3.9.3 +opencv-python==4.10.0.84 +pandas==2.2.3 +scikit-image==0.22.0 +scikit-learn==1.5.2 +# sympy==1.13.3 + +# MLOps & Infrastructure +wandb==0.19.0 + +# Utilities +# imageio==2.34.0 +PyYAML==6.0.2 +tqdm==4.67.1 +albumentations +ftfy +open-clip-torch +gradio \ No newline at end of file diff --git a/split.json b/split.json new file mode 100644 index 0000000000000000000000000000000000000000..6d6adf0e846bc4c272970983e6ec1c9e48e46172 --- /dev/null +++ b/split.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b15269fac22a7e6e8ba73860541467d0de2bb2604f50d95c2077ab089d15f1b +size 2942530 diff --git a/support/__pycache__/detect.cpython-310.pyc b/support/__pycache__/detect.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b7f4897f237fb22959ad569c5f900d07b9537d8b Binary files /dev/null and b/support/__pycache__/detect.cpython-310.pyc differ diff --git a/support/__pycache__/detect_utils.cpython-310.pyc b/support/__pycache__/detect_utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1a80cf7318e01408f995058f09d3868decad57c1 Binary files /dev/null and b/support/__pycache__/detect_utils.cpython-310.pyc differ diff --git a/support/detect.py b/support/detect.py new file mode 100644 index 0000000000000000000000000000000000000000..4b928ac2268b848cc5186ab947f84580abe23572 --- /dev/null +++ b/support/detect.py @@ -0,0 +1,104 @@ +import os +import json +import sys +import yaml +import subprocess +from datetime import datetime + +def get_gpus(): + from numpy import argwhere, asarray, diff + import re + smi = os.popen('nvidia-smi').readlines() + div = re.compile('[+]-{3,}[+]|[|]={3,}[|]') + dividers = argwhere([div.match(line) != None for line in smi])[-2:, 0] + processes = [line for line in smi[dividers[0]+1:dividers[1]] if ' C ' in line] + free = list(set([process.split()[1] for process in processes]) ^ set([str(0), str(1)])) + + udiv = re.compile('[|]={3,}[+]={3,}[+]={3,}[|]') + ldiv = re.compile('[+]-{3,}[+]-{3,}[+]-{3,}[+]') + divider_up = argwhere([udiv.match(line) != None for line in smi])[0,0] + divider_down = argwhere([ldiv.match(line) != None for line in smi])[-1, 0] + + gpus = [line for line in smi[divider_up+1:divider_down] if '%' in line and 'MiB' in line] + gpus = [gpu.split('|')[2].replace(' ', '').replace('MiB', '').split('/') for gpu in gpus] + memory = diff(asarray(gpus).astype(int), axis=1).squeeze() + + return free, memory + +def load_config(config_path): + """Load configuration from YAML file.""" + with open(config_path, 'r') as f: + return yaml.safe_load(f) + +def run_detect(args): + """Run single image detection.""" + if not args.image: + raise ValueError("--image is required for detect mode") + + if not os.path.exists(args.image): + raise FileNotFoundError(f"Image not found: {args.image}") + + # Load detector config + config_path = os.path.join(args.config_dir, f'{args.detector}.yaml') + if not os.path.exists(config_path): + raise FileNotFoundError(f"Configuration file not found: {config_path}") + + config = load_config(config_path) + global_config = config.get('global', {}) + + # Set device + if hasattr(args, 'device') and args.device: + device = args.device + else: + device = global_config.get('device_override') + + if not device or device == "null": + _, memory = get_gpus() + if len(memory) > 0: + device = f"cuda:{memory.argmax()}" + else: + device = "cpu" + + # Set up output path for results + if not args.output: + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + # Extract image filename without extension + image_name = os.path.splitext(os.path.basename(args.image))[0] + output_dir = os.path.join('detection_results', args.detector, 'detect') + os.makedirs(output_dir, exist_ok=True) + args.output = os.path.join(output_dir, f'{timestamp}_{image_name}.json') + + # Call detector's detect.py + detector_path = os.path.join('detectors', args.detector) + detect_script = os.path.join(detector_path, 'detect.py') + + if not os.path.exists(detect_script): + raise FileNotFoundError(f"Detector {args.detector} does not support single image detection") + + cmd_args = [ + sys.executable, + detect_script, + f'--image "{args.image}"', + f'--device {device}', + f'--output "{args.output}"' + ] + + # Add model path if specified + if args.weights: + cmd_args.append(f'--model "{args.weights}"') + + cmd = ' '.join(cmd_args) + print(f"Running detection with {args.detector}...") + + if not args.dry_run: + subprocess.run(cmd, shell=True)#, check=True) + + # Print results if available + if os.path.exists(args.output): + with open(args.output, 'r') as f: + result = json.load(f) + print("\nDetection Results:") + print(f"Prediction: {result['prediction']}") + print(f"Confidence: {result['confidence']:.4f}") + print(f"Time: {result['elapsed_time']:.3f}s") + print(f"\nFull results saved to: {args.output}") \ No newline at end of file diff --git a/support/detect_utils.py b/support/detect_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a9a8250a6e05eb53ec1837a43e155c7d1d0e6376 --- /dev/null +++ b/support/detect_utils.py @@ -0,0 +1,86 @@ +import os +import torch +from PIL import Image +import torchvision.transforms as transforms +import json + +def load_image(image_path, size=224): + """Load and preprocess an image for detection. + + Args: + image_path (str): Path to the image file + size (int): Size to resize the image to (default: 224) + + Returns: + torch.Tensor: Preprocessed image tensor + PIL.Image: Original loaded image + """ + if not os.path.exists(image_path): + raise FileNotFoundError(f"Image not found: {image_path}") + + image = Image.open(image_path).convert('RGB') + + # Standard normalization used by most models + preprocess = transforms.Compose([ + transforms.Resize(size), + transforms.CenterCrop(size), + transforms.ToTensor(), + transforms.Normalize( + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225] + ) + ]) + + return preprocess(image).unsqueeze(0), image + +def format_result(prediction, confidence, elapsed_time): + """Format detection results. + + Args: + prediction (str): 'real' or 'fake' + confidence (float): Confidence score (0-1) + elapsed_time (float): Detection time in seconds + + Returns: + dict: Formatted results + """ + return { + "prediction": prediction, + "confidence": float(confidence), + "elapsed_time": float(elapsed_time) + } + +def save_result(result, output_path): + """Save detection result to JSON file. + + Args: + result (dict): Detection result dictionary + output_path (str): Path to save the JSON file + """ + if os.path.dirname(output_path): + os.makedirs(os.path.dirname(output_path), exist_ok=True) + with open(output_path, 'w') as f: + json.dump(result, f, indent=2) + +def get_device(): + """Get the best available device (CUDA or CPU). + + Returns: + torch.device: Device to use for computation + """ + return torch.device('cuda' if torch.cuda.is_available() else 'cpu') + +def load_model(model_path, device): + """Load a PyTorch model from checkpoint. + + Args: + model_path (str): Path to model checkpoint + device (torch.device): Device to load the model to + + Returns: + dict: Loaded checkpoint + """ + if not os.path.exists(model_path): + raise FileNotFoundError(f"Model checkpoint not found: {model_path}") + + return torch.load(model_path, map_location=device) \ No newline at end of file diff --git a/support/json_compile.py b/support/json_compile.py new file mode 100644 index 0000000000000000000000000000000000000000..6c9b4c471f6869b647233d935882f43ad3c2d931 --- /dev/null +++ b/support/json_compile.py @@ -0,0 +1,75 @@ +import os +import glob +import pandas +import random +from torch.utils.data import DataLoader, random_split +import json +import bisect + +dataset_path = os.path.join(os.sep, 'path', 'to', 'dataset') + +datasets = [] +for dataset_root, dataset_dirs, dataset_files in os.walk(os.path.join(dataset_path), topdown=True, followlinks=True): + if len(dataset_dirs): + continue + + # if 'Telegram' not in dataset_root: + # continue + + id = dataset_root.split('Real/')[-1].split('Fake/')[-1] + #print(id) + + shr = dataset_root.replace(dataset_path + os.sep, '').split('/')[0] + #print(shr) + if 'FORLAB' in id or 'FFHQ' in id: + files = sorted([file.replace('.jpg', '').replace('.png', '') for file in dataset_files])[:40000] + else: + files = sorted([file.replace('.jpg', '').replace('.png', '') for file in dataset_files]) + #print(len(files)) + datasets.append({'id': id, 'shared': shr, 'root': dataset_root, 'files': files}) + + +split = [] + +train_set = [] +val_set = [] +test_set = [] +breakpoint() +for dataset in [dataset for dataset in datasets if (dataset['shared'] == 'Telegram')]: + print(dataset['id']) + files_pre = [dataset_com for dataset_com in datasets if dataset_com['shared'] == 'PreSocial' and dataset_com['id'] == dataset['id']][0]['files'] + files_post = dataset['files'] + + train_set_post, val_set_post, test_set_post = random_split(files_post, [0.7, 0.15, 0.15]) + + residual_pre = [file for file in files_pre if file not in files_post] + residual_pre_neg = [file for file in files_pre if file in files_post] + + train_set_pre, val_set_pre, test_set_pre = random_split(residual_pre, [0.7, 0.15, 0.15]) + + train_set = train_set + [os.path.join(dataset['id'], file) for file in train_set_post] + [os.path.join(dataset['id'], file) for file in train_set_pre] + val_set = val_set + [os.path.join(dataset['id'], file) for file in val_set_post] + [os.path.join(dataset['id'], file) for file in val_set_pre] + test_set = test_set + [os.path.join(dataset['id'], file) for file in test_set_post] + [os.path.join(dataset['id'], file) for file in test_set_pre] + + print(len(train_set_post), len(val_set_post), len(test_set_post), ':', len(train_set_post)+len(val_set_post)+len(test_set_post)) + print(len(train_set_pre), len(val_set_pre), len(test_set_pre), ':', len(train_set_pre)+len(val_set_pre)+len(test_set_pre)) + print(len(train_set_pre)+len(train_set_post), len(val_set_pre)+len(val_set_post), len(test_set_pre)+len(test_set_post), ':', len(train_set_pre)+len(train_set_post)+len(val_set_pre)+len(val_set_post)+len(test_set_pre)+len(test_set_post)) + #print(val_set) + #print(test_set) + #train_set = train_set + [os.path.join(dataset['id'], file) for file in train_set_pre] + #val_set = val_set + [os.path.join(dataset['id'], file) for file in val_set_pre] + #test_set = test_set + [os.path.join(dataset['id'], file) for file in test_set_pre] + +print(len(train_set), len(val_set), len(test_set), ':', len(train_set)+len(val_set)+len(test_set)) + +#with open("train.json", "w") as f: +# json.dump(train_set, f) + +#with open("val.json", "w") as f: +# json.dump(val_set, f) + +#with open("test.json", "w") as f: +# json.dump(test_set, f) + +with open("split.json", "w") as f: + json.dump({'train': sorted(train_set), 'val': sorted(val_set), 'test': sorted(test_set)}, f) \ No newline at end of file diff --git a/support/parser.py b/support/parser.py new file mode 100644 index 0000000000000000000000000000000000000000..9b6cae39ae4c11930ad5c4ae198a3c32c8a1a407 --- /dev/null +++ b/support/parser.py @@ -0,0 +1,373 @@ +import os +import glob +import pandas as pd +import numpy as np + +def name_mapper(name): + gen_keys = { + 'gan1':['SG'], + 'gan2':['SG2'], + 'gan3':['SG3'], + 'sd15':['SD1.5'], + 'sd2':['SD2.1'], + 'sd3':['SD3'], + 'sdXL':['SDXL'], + 'flux':['FLUX.1'], + 'realFFHQ':['FFHQ'], + 'realFORLAB':['FORLAB'] + } + return gen_keys[name][0] + +def to_latex_table(df, caption, train_key): + first_line = df.columns.to_list() + + with open(f'latex/{train_key}.txt', 'a') as f: + header = f'\\begin{{table*}}[]\n\\caption{{{caption}}}\n\\begin{{tblr}}{{{"l"*len(first_line)}}}\n' + f.write(header) + + header = f'{first_line[0]} & {" & ".join([name_mapper(first_line[i].split(":")[0]) for i in range(1, len(first_line))])} \\\ \hline \hline \n' + f.write(header) + + for i, row in df.iterrows(): + # row = ' & '.join([row[0]] + [f'{row[i]:.2f}' for i in range(1, len(row))]) + row = f'\SetCell[r=2]{{l}} {row[0]} & '.replace('R50_TF', 'Ours').replace('_', '\_').replace('nodown', 'ND') + " & ".join([f'\SetCell[r=2]{{c}} {row[i]:.2f}' for i in range(1, len(row))]) + ' \\\ \\\ \hline \n' + f.write(row) + + footer = f'{"&"*(len(first_line)-1)}\n\\end{{tblr}}\n\\label{{tab:dataset}}\n\\end{{table*}}\n\n' + f.write(footer) + +def to_latex_table_trans(df, caption, train_key): + df = df.set_index('Detector') + df = df.T.reset_index(names=['Dataset']) + first_line = df.columns.to_list() + + with open(f'latex/{train_key}.txt', 'a') as f: + header = f'\\begin{{table}}[]\n\\caption{{{caption}}}\n\\begin{{tblr}}{{{"X[1,l,m]"+ "X[1,c,m]"*(len(first_line)-1)}}}\n' + f.write(header) + + header = f'{" & ".join([first_line[i] for i in range(0, len(first_line))])} \\\ \hline \hline \n'.replace("R50_TF", "Ours").replace("_", "\_").replace("nodown", "ND") + f.write(header) + + for i, row in df.iterrows(): + # row = ' & '.join([row[0]] + [f'{row[i]:.2f}' for i in range(1, len(row))]) + row = f'{name_mapper(row[0].split(":")[0])} & ' + " & ".join([f'{row[i]:.2f}' for i in range(1, len(row))]) + ' \\\ \hline \n' + f.write(row) + + footer = f'\end{{tblr}}\n\\label{{tab:dataset}}\n\\end{{table}}\n\n' + f.write(footer) + +def to_latex_table_trans_diff(df, df2, caption, train_key): + df = df.set_index('Detector') + df2 = df2.set_index('Detector') + df = df.T.reset_index(names=['Data']) + df2 = df2.T.reset_index(names=['Data']) + first_line = df.columns.to_list() + + with open(f'latex/{train_key}.txt', 'a') as f: + header = f'\\begin{{table}}[]\n\\caption{{{caption}}}\n\\begin{{tblr}}{{{"l"*len(first_line)}}}\n' + f.write(header) + + header = f'{" & ".join([first_line[i] for i in range(0, len(first_line))])} \\\ \hline \hline \n'.replace("R50_TF", "Ours").replace("_", "\_").replace("nodown", "ND") + f.write(header) + + for (i, row), (i2, row2) in zip(df.iterrows(), df2.iterrows()): + # diffs = [f'{row2[i] - row[i]:.2f}' for i in range(1, len(row))] + diffs = [row2[i] - row[i] for i in range(1, len(row))] + diffs_str = [f'{diff:.2f}' for diff in diffs] + print([f'{diff:.2f}' for diff in diffs]) + # row = ' & '.join([row[0]] + [f'{row[i]:.2f}' for i in range(1, len(row))]) + # row_head = f'\SetCell[r=2]{{l}} {name_mapper(row[0].split(":")[0])} & ' + row_head = f'\SetCell[r=3]{{l}} {name_mapper(row[0].split(":")[0])} & ' \ + + f'& Facebook {"&"*(len(first_line)-2)} \\\ \n' \ + + f'& Telegram {"&"*(len(first_line)-2)} \\\ \n' \ + + f'& Twitter {"&"*(len(first_line)-2)} \\\ \n' + row_center = " & ".join([f'\SetCell[r=2]{{c}} \\textcolor{{{"red" if diff < 0 else "blue"}}}{{{diff_str}}}' for diff, diff_str in zip(diffs, diffs_str)]) + row_tail = ' \\\ \\\ \hline \n' + row = row_head + row_center + row_tail + f.write(row) + + footer = f'{"&"*(len(first_line)-1)}\n\\end{{tblr}}\n\\label{{tab:dataset}}\n\\end{{table}}\n\n' + f.write(footer) + +def to_latex_table_diffs(df, df_fb, df_tl, df_tw, caption, train_key): + df = df.set_index('Detector') + df = df.T.reset_index(names=['Data']) + + df_fb = df_fb.set_index('Detector') + df_fb = df_fb.T.reset_index(names=['Data']) + + df_tl = df_tl.set_index('Detector') + df_tl = df_tl.T.reset_index(names=['Data']) + + df_tw = df_tw.set_index('Detector') + df_tw = df_tw.T.reset_index(names=['Data']) + + first_line = df.columns.to_list() + + with open(f'latex/{train_key}.txt', 'a') as f: + header = f'\\begin{{table}}[]\n\\caption{{{caption}}}\n\\begin{{tblr}}{{rows = {{abovesep=1pt,belowsep=1pt}}, colsep = 2pt, colspec = {{{"X[1,l,m]X[-1,l,m]"+"X[1,c,m]"*(len(first_line)-1)}}}}}\n' + + + f.write(header) + + header = f'\SetCell[c=2]{{c}} {first_line[0]}' + ' & & '+ f'{" & ".join([first_line[i] for i in range(1, len(first_line))])} \\\ \hline \hline \n'.replace("R50_TF", "Ours").replace("R50_nodown", "R50-ND") + f.write(header) + + for (i, row), (_, row_fb), (_, row_tl), (_, row_tw)in zip(df.iterrows(), df_fb.iterrows(), df_tl.iterrows(), df_tw.iterrows()): + row_head = f'\SetCell[r=3]{{r}} \\footnotesize {name_mapper(row[0].split(":")[0])} & ' + row_pad = f' & ' + + row = [row[0]] + [float(f'{row[i]:.2f}') for i in range(1, len(row))] + row_fb = [row_fb[0]] + [float(f'{row_fb[i]:.2f}') for i in range(1, len(row_fb))] + row_tl = [row_tl[0]] + [float(f'{row_tl[i]:.2f}') for i in range(1, len(row_tl))] + row_tw = [row_tw[0]] + [float(f'{row_tw[i]:.2f}') for i in range(1, len(row_tw))] + + row_center_fb = "\\tiny FB & \\scriptsize " + " & \\scriptsize ".join([f'\\textcolor{{{"black" if abs(row_fb[i] - row[i]) < 0.05 else ("red" if row_fb[i] - row[i] < 0 else "blue")}}}{{{row_fb[i]:.2f} \\tiny {row_fb[i] - row[i]:+.2f}}}' for i in range(1, len(row))]) + row_center_tl = "\\tiny TL & \\scriptsize " + " & \\scriptsize ".join([f'\\textcolor{{{"black" if abs(row_tl[i] - row[i]) < 0.05 else ("red" if row_tl[i] - row[i] < 0 else "blue")}}}{{{row_tl[i]:.2f} \\tiny {row_tl[i] - row[i]:+.2f}}}' for i in range(1, len(row))]) + row_center_tw = "\\tiny TW & \\scriptsize " + " & \\scriptsize ".join([f'\\textcolor{{{"black" if abs(row_tw[i] - row[i]) < 0.05 else ("red" if row_tw[i] - row[i] < 0 else "blue")}}}{{{row_tw[i]:.2f} \\tiny {row_tw[i] - row[i]:+.2f}}}' for i in range(1, len(row))]) + + row_tail_middle = ' \\\ \n' + row_tail_end = ' \\\ \hline \n' + row = row_head + row_center_fb + row_tail_middle + row_pad + row_center_tl + row_tail_middle + row_pad + row_center_tw + row_tail_end + f.write(row) + + footer = f'\\end{{tblr}}\n\\label{{tab:dataset}}\n\\end{{table}}\n\n' + f.write(footer) + +def to_latex_table_all_trans(df, df_fb, df_tl, df_tw, caption, train_key): + df = df.set_index('Detector') + df = df.T.reset_index(names=['Data']) + + df_fb = df_fb.set_index('Detector') + df_fb = df_fb.T.reset_index(names=['Data']) + + df_tl = df_tl.set_index('Detector') + df_tl = df_tl.T.reset_index(names=['Data']) + + df_tw = df_tw.set_index('Detector') + df_tw = df_tw.T.reset_index(names=['Data']) + + first_line = df.columns.to_list() + + + with open(f'latex/{train_key}.txt', 'a') as f: + header = f'\\begin{{table}}[]\n\\caption{{{caption}}}\n\\begin{{tblr}}{{rows = {{abovesep=1pt,belowsep=1pt}}, colsep = 2pt, colspec = {{{"X[1,l,m]X[-1,l,m]"+"X[1,c,m]"*(len(first_line)-1)}}}}}\n' + + + f.write(header) + + header = f'\SetCell[c=2]{{c}} {first_line[0]}' + ' & & '+ f'{" & ".join([first_line[i] for i in range(1, len(first_line))])} \\\ \hline \hline \n'.replace("R50_TF", "Ours").replace("R50_nodown", "R50-ND") + f.write(header) + + for (i, row), (_, row_fb), (_, row_tl), (_, row_tw)in zip(df.iterrows(), df_fb.iterrows(), df_tl.iterrows(), df_tw.iterrows()): + row_head = f'\SetCell[r=4]{{r}} \\footnotesize {name_mapper(row[0].split(":")[0])} & ' + row_pad = f' & ' + + row = [row[0]] + [float(f'{row[i]:.2f}') for i in range(1, len(row))] + row_fb = [row_fb[0]] + [float(f'{row_fb[i]:.2f}') for i in range(1, len(row_fb))] + row_tl = [row_tl[0]] + [float(f'{row_tl[i]:.2f}') for i in range(1, len(row_tl))] + row_tw = [row_tw[0]] + [float(f'{row_tw[i]:.2f}') for i in range(1, len(row_tw))] + + row_center = "\\scriptsize \\textbf{Pre} & \\scriptsize " + " & \\scriptsize ".join([f'\\textcolor{{{"black"}}}{{{row[i]:.2f}}}' for i in range(1, len(row))]) + row_center_fb = "\\scriptsize FB & \\scriptsize " + " & \\scriptsize ".join([f'\\textcolor{{{"black" if abs(row_fb[i] - row[i]) < 0.01 else ("red" if row_fb[i] - row[i] < 0 else "blue")}}}{{{row_fb[i]:.2f} \\tiny {row_fb[i] - row[i]:+.2f}}}' for i in range(1, len(row))]) + row_center_tl = "\\scriptsize TL & \\scriptsize " + " & \\scriptsize ".join([f'\\textcolor{{{"black" if abs(row_tl[i] - row[i]) < 0.01 else ("red" if row_tl[i] - row[i] < 0 else "blue")}}}{{{row_tl[i]:.2f} \\tiny {row_tl[i] - row[i]:+.2f}}}' for i in range(1, len(row))]) + row_center_tw = "\\scriptsize TW & \\scriptsize " + " & \\scriptsize ".join([f'\\textcolor{{{"black" if abs(row_tw[i] - row[i]) < 0.01 else ("red" if row_tw[i] - row[i] < 0 else "blue")}}}{{{row_tw[i]:.2f} \\tiny {row_tw[i] - row[i]:+.2f}}}' for i in range(1, len(row))]) + + row_tail_middle = ' \\\ \n' + row_tail_end = ' \\\ \hline \n' + row = row_head + row_center + row_tail_middle + row_pad + row_center_fb + row_tail_middle + row_pad + row_center_tl + row_tail_middle + row_pad + row_center_tw + row_tail_end + f.write(row) + + footer = f'\\end{{tblr}}\n\\label{{tab:dataset}}\n\\end{{table}}\n\n' + f.write(footer) + +def to_latex_table_all_trans2(df, df_fb, df_tl, df_tw, caption, train_key): + df = df.set_index('Detector') + df = df.T.reset_index(names=['Data']) + + df_fb = df_fb.set_index('Detector') + df_fb = df_fb.T.reset_index(names=['Data']) + + df_tl = df_tl.set_index('Detector') + df_tl = df_tl.T.reset_index(names=['Data']) + + df_tw = df_tw.set_index('Detector') + df_tw = df_tw.T.reset_index(names=['Data']) + + first_line = df.columns.to_list() + + + with open(f'latex/{train_key}.txt', 'a') as f: + header = f'\\begin{{table}}[]\n\\caption{{{caption}}}\n\\begin{{tblr}}{{rows = {{abovesep=1pt,belowsep=1pt}}, colsep = 2pt, colspec = {{{"X[1,l,m]"+"X[1,r,m]X[1,l,m]"*(len(first_line)-1)}}}}}\n' + f.write(header) + + header = f'{first_line[0]} & \SetCell[c=2]{{c}} ' + f'{" && TMP ".join([first_line[i] for i in range(1, len(first_line))])} \\\ \hline \hline \n'.replace("R50_TF", "Ours").replace("R50_nodown", "R50-ND").replace('TMP', '\SetCell[c=2]{c}') + f.write(header) + + for (i, row), (_, row_fb), (_, row_tl), (_, row_tw)in zip(df.iterrows(), df_fb.iterrows(), df_tl.iterrows(), df_tw.iterrows()): + + row = [row[0]] + [float(f'{row[i]:.2f}') for i in range(1, len(row))] + row_fb = [row_fb[0]] + [float(f'{row_fb[i]:.2f}') for i in range(1, len(row_fb))] + row_tl = [row_tl[0]] + [float(f'{row_tl[i]:.2f}') for i in range(1, len(row_tl))] + row_tw = [row_tw[0]] + [float(f'{row_tw[i]:.2f}') for i in range(1, len(row_tw))] + + first_row = f'\SetCell[r=3]{{l}} \\footnotesize {name_mapper(row[0].split(":")[0])} & ' + ' & '.join([f'\SetCell[r=3]{{r}} \\textcolor{{{"black"}}}{{{row[i]:.2f}}} & ' + f'\\tiny FB \\scriptsize \\textcolor{{{"black" if abs(row_fb[i] - row[i]) < 0.01 else ("red" if row_fb[i] - row[i] < 0 else "blue")}}}{{{row_fb[i]:.2f}}}' for i in range(1, len(row))]) + ' \\\ \n' + # first_row = f'\SetCell[r=3]{{r}} \\footnotesize {name_mapper(row[0].split(":")[0])} & ' + ' & '.join([f'\SetCell[r=3]{{r}} \\textcolor{{{"black"}}}{{{row[i]:.2f}}} & ' + f'\\textcolor{{{"black" if abs(row_fb[i] - row[i]) < 0.01 else ("red" if row_fb[i] - row[i] < 0 else "blue")}}}{{{row_fb[i]:.2f} \\tiny {row_fb[i] - row[i]:+.2f}}}' for i in range(1, len(row))]) + ' \\\ \n' + second_row = ' & ' + ' & '.join([' & ' + f'\\tiny TL \\scriptsize \\textcolor{{{"black" if abs(row_tl[i] - row[i]) < 0.01 else ("red" if row_tl[i] - row[i] < 0 else "blue")}}}{{{row_tl[i]:.2f}}}' for i in range(1, len(row))]) + ' \\\ \n' + # second_row = ' & ' + ' & '.join([' & ' + f'\\textcolor{{{"black" if abs(row_tl[i] - row[i]) < 0.01 else ("red" if row_tl[i] - row[i] < 0 else "blue")}}}{{{row_tl[i]:.2f} \\tiny {row_tl[i] - row[i]:+.2f}}}' for i in range(1, len(row))]) + ' \\\ \n' + third_row = ' & ' + ' & '.join([' & ' + f'\\tiny TW \\scriptsize\\textcolor{{{"black" if abs(row_tw[i] - row[i]) < 0.01 else ("red" if row_tw[i] - row[i] < 0 else "blue")}}}{{{row_tw[i]:.2f}}}' for i in range(1, len(row))]) + ' \\\ \hline \n' + # third_row = ' & ' + ' & '.join([' & ' + f'\\textcolor{{{"black" if abs(row_tw[i] - row[i]) < 0.01 else ("red" if row_tw[i] - row[i] < 0 else "blue")}}}{{{row_tw[i]:.2f} \\tiny {row_tw[i] - row[i]:+.2f}}}' for i in range(1, len(row))]) + ' \\\ \hline \n' + row = first_row + second_row + third_row + f.write(row) + + footer = f'\\end{{tblr}}\n\\label{{tab:dataset}}\n\\end{{table}}\n\n' + f.write(footer) + +def to_latex_table_all(df, df_fb, df_tl, df_tw, caption, train_key): + first_line = df.columns.to_list() + + + with open(f'latex/{train_key}.txt', 'a') as f: + header = f'\\begin{{table*}}[]\n\\caption{{{caption}}}\n\\begin{{tblr}}{{rows = {{abovesep=1pt,belowsep=1pt}}, colsep = 2pt, colspec = {{{"X[1,l,m]X[1,l,m]"+"X[1,c,m]"*(len(first_line)-1)}}}}}\n' + + + f.write(header) + header = f'{first_line[0]} & & ' + " & ".join([f'{name_mapper(first_line[i].split(":")[0])} ' for i in range(1, len(first_line))]) + ' \\\ \hline \hline \n' + + #header = f'{first_line[0]}' + ' & & '+ f'{" & ".join([first_line[i] for i in range(1, len(first_line))])} \\\ \hline \hline \n'.replace("R50_TF", "Ours").replace("_", "\_").replace("nodown", "ND") + f.write(header) + + for (i, row), (_, row_fb), (_, row_tl), (_, row_tw)in zip(df.iterrows(), df_fb.iterrows(), df_tl.iterrows(), df_tw.iterrows()): + row_head = f'\SetCell[r=4]{{r}} \\footnotesize ' + f'{row[0]} & '.replace('R50_TF', 'Ours').replace('R50_nodown', 'R50-ND') + row_pad = f' & ' + + + row = [row[0]] + [float(f'{row[i]:.2f}') for i in range(1, len(row))] + row_fb = [row_fb[0]] + [float(f'{row_fb[i]:.2f}') for i in range(1, len(row_fb))] + row_tl = [row_tl[0]] + [float(f'{row_tl[i]:.2f}') for i in range(1, len(row_tl))] + row_tw = [row_tw[0]] + [float(f'{row_tw[i]:.2f}') for i in range(1, len(row_tw))] + + # row_center = "\\tiny \\textbf{PreSocial} & \\scriptsize " + " & \\scriptsize ".join([f'\\textcolor{{{"black"}}}{{{row[i]:.2f}}}' for i in range(1, len(row))]) + # row_center_fb = "\\tiny FB & \\scriptsize " + " & \\scriptsize ".join([f'\\textcolor{{{"black" if abs(row_fb[i] - row[i]) < 0.01 else ("red" if row_fb[i] - row[i] < 0 else "blue")}}}{{{row_fb[i]:.2f} \\tiny {row_fb[i] - row[i]:+.2f}}}' for i in range(1, len(row))]) + # row_center_tl = "\\tiny TL & \\scriptsize " + " & \\scriptsize ".join([f'\\textcolor{{{"black" if abs(row_tl[i] - row[i]) < 0.01 else ("red" if row_tl[i] - row[i] < 0 else "blue")}}}{{{row_tl[i]:.2f} \\tiny {row_tl[i] - row[i]:+.2f}}}' for i in range(1, len(row))]) + # row_center_tw = "\\tiny TW & \\scriptsize " + " & \\scriptsize ".join([f'\\textcolor{{{"black" if abs(row_tw[i] - row[i]) < 0.01 else ("red" if row_tw[i] - row[i] < 0 else "blue")}}}{{{row_tw[i]:.2f} \\tiny {row_tw[i] - row[i]:+.2f}}}' for i in range(1, len(row))]) + + row_tail_middle = ' \\\ \n' + row_tail_end = ' \\\ \hline \n' + # row = row_head + row_center + row_tail_middle + row_pad + row_center_fb + row_tail_middle + row_pad + row_center_tl + row_tail_middle + row_pad + row_center_tw + row_tail_end + + row_center = "\\tiny Pre & \\scriptsize " + " & \\scriptsize ".join([f'{row[i]:.2f}' for i in range(1, len(row))]) + row_center_fb = "\\tiny FB & \\scriptsize " + " & \\scriptsize ".join([f'{row_fb[i]:.2f}' for i in range(1, len(row))]) + row_center_tl = "\\tiny TL & \\scriptsize " + " & \\scriptsize ".join([f'{row_tl[i]:.2f}' for i in range(1, len(row))]) + row_center_tw = "\\tiny TW & \\scriptsize " + " & \\scriptsize ".join([f'{row_tw[i]:.2f}' for i in range(1, len(row))]) + + # row_center = "\\tiny Pre & \\scriptsize " + " & \\scriptsize ".join([f'\\textcolor{{{"black"}}}{{{row[i]:.2f}}}' for i in range(1, len(row))]) + # row_center_fb = "\\tiny FB & \\scriptsize " + " & \\scriptsize ".join([f'\\textcolor{{{"black" if abs(row_fb[i] - row[i]) < 0.01 else ("red" if row_fb[i] - row[i] < 0 else "blue")}}}{{{row_fb[i]:.2f} \\tiny {row_fb[i] - row[i]:+.2f}}}' for i in range(1, len(row))]) + # row_center_tl = "\\tiny TL & \\scriptsize " + " & \\scriptsize ".join([f'\\textcolor{{{"black" if abs(row_tl[i] - row[i]) < 0.01 else ("red" if row_tl[i] - row[i] < 0 else "blue")}}}{{{row_tl[i]:.2f} \\tiny {row_tl[i] - row[i]:+.2f}}}' for i in range(1, len(row))]) + # row_center_tw = "\\tiny TW & \\scriptsize " + " & \\scriptsize ".join([f'\\textcolor{{{"black" if abs(row_tw[i] - row[i]) < 0.01 else ("red" if row_tw[i] - row[i] < 0 else "blue")}}}{{{row_tw[i]:.2f} \\tiny {row_tw[i] - row[i]:+.2f}}}' for i in range(1, len(row))]) + + row = row_head + row_center + row_tail_middle + row_pad + row_center_fb + row_tail_middle + row_pad + row_center_tl + row_tail_middle + row_pad + row_center_tw + row_tail_end + f.write(row) + + footer = f'\\end{{tblr}}\n\\label{{tab:dataset}}\n\\end{{table*}}\n\n' + f.write(footer) + +def column_sorter(list_of_columns): + columns_real = sorted([column for column in list_of_columns if 'real' in column]) + columns_gan = sorted([column for column in list_of_columns if 'gan' in column]) + columns_sd = sorted([column for column in list_of_columns if 'sd' in column]) + columns_flux = sorted([column for column in list_of_columns if 'flux' in column]) + return columns_real + columns_gan + columns_sd + columns_flux + +detectors = ['CLIP-D', 'MISLNet', 'NPR', 'P2G', 'R50_nodown', 'R50_TF'] +detectors = ['R50_TF', 'R50_nodown', 'MISLNet', 'NPR', 'CLIP-D', 'P2G'] +# detectors = ['R50_TF', 'R50_nodown', 'CLIP-D', 'P2G'] + +train = {} + + +for detector in detectors: + runs = os.listdir(os.path.join('..', 'detectors', detector, 'train')) + for run in runs: + train_set = run + if train_set not in train: + train[train_set] = {} + + results = glob.glob(os.path.join('..', 'detectors', detector, 'train', run, 'data', '**', '*.csv')) + for result in results: + test_set = result.split('/')[-2] + df = pd.read_csv(result) + + try: + y_pred = df['pro'].values + except KeyError: + y_pred = df['pro_mix'].values + + y_pred = y_pred > 0 + + y_true = df['flag'].values + + recall = np.sum(y_true == y_pred) / len(y_true) + + if test_set not in train[train_set]: + train[train_set][test_set] = [{'detector': detector, 'result': recall}] + else: + train[train_set][test_set].append({'detector': detector, 'result': recall}) + + +for train_key, train_dicts in train.items(): + df_train = pd.DataFrame(columns=['Detector', *train_dicts.keys()]) + + test_dict = {} + for test_key, test_dicts in train_dicts.items(): + detectors = [test_dict['detector'] for test_dict in test_dicts] + results = [test_dict['result'] for test_dict in test_dicts] + test_dict[test_key] = dict(zip(detectors, results)) + + for detector in detectors: + scores = [test_dict[test_key][detector] for test_key in test_dict.keys()] + df_train = df_train._append({'Detector': detector, **dict(zip(list(train_dicts.keys()), scores))}, ignore_index=True) + + try: + os.remove(f'latex/{train_key}.txt') + os.remove(f'latex/diff_{train_key}.txt') + except FileNotFoundError: + pass + df_train_pre = df_train[['Detector'] + column_sorter([column for column in df_train.columns if 'pre' in column])] + + df_train_fb = df_train[['Detector'] + column_sorter([column for column in df_train.columns if 'fb' in column])] + # to_latex_table_trans(df_train_fb, f'Facebook trained on {train_key}', train_key) + # to_latex_table_trans_diff(df_train_pre, df_train_fb, f'Delta Pre-FB trained on {train_key}'.replace('&', '\&'), train_key) + # quit() + + df_train_tl = df_train[['Detector'] + column_sorter([column for column in df_train.columns if 'tl' in column])] + # to_latex_table_trans(df_train_tl, f'Telegram trained on {train_key}', train_key) + # to_latex_table_trans_diff(df_train_pre, df_train_tl, f'Delta Pre-TL trained on {train_key}'.replace('&', '\&'), train_key) + + df_train_tw = df_train[['Detector'] + column_sorter([column for column in df_train.columns if 'tw' in column])] + # to_latex_table_trans(df_train_tw, f'Twitter trained on {train_key}', train_key) + # to_latex_table_trans_diff(df_train_pre, df_train_tw, f'Delta Pre-TW trained on {train_key}'.replace('&', '\&'), train_key) + + if 'gan' in train_key and 'sd' in train_key: + to_latex_table_trans(df_train_pre, f'PreSocial, trained on {train_key}'.replace('&', '\&'), train_key) + to_latex_table_diffs(df_train_pre, df_train_fb, df_train_tl, df_train_tw, f'Deltas, trained on {train_key}'.replace('&', '\&'), train_key) + + else: + to_latex_table_all_trans2(df_train_pre, df_train_fb, df_train_tl, df_train_tw, f'All, trained on {train_key}'.replace('&', '\&'), train_key) + + if 'freeze' in train_key or True: + df_train_pre.to_csv(f'results/{train_key}_pre.csv', index=False, float_format='%.2f') + df_train_fb.to_csv(f'results/{train_key}_fb.csv', index=False, float_format='%.2f') + df_train_tl.to_csv(f'results/{train_key}_tl.csv', index=False, float_format='%.2f') + df_train_tw.to_csv(f'results/{train_key}_tw.csv', index=False, float_format='%.2f') + + lines = [] + for csv in ['pre', 'fb', 'tl', 'tw']: + with open(f'results/{train_key}_{csv}.csv', 'r') as f: + lines.extend(f.readlines()) + lines.append('\n') + + with open(f'results/{train_key}.csv', 'w') as f: + f.writelines(lines) + + diff --git a/test_result.json b/test_result.json new file mode 100644 index 0000000000000000000000000000000000000000..9b1c8c8a2fad3225a0626d0d4af22f2208b50a78 --- /dev/null +++ b/test_result.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fea69fffb9a7a21e88e339c260874fc816dbee11c0f7d91e27c34c17a88faa1 +size 75 diff --git a/test_result_cpu.json b/test_result_cpu.json new file mode 100644 index 0000000000000000000000000000000000000000..4f3a0e5a2f9dc2c56233164a171f6252e0f9e5b3 --- /dev/null +++ b/test_result_cpu.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5899cbac2eeb4a09dcd3c42a40330c006d1e2e44e96f129b3b4c83769e2a0c04 +size 75