Spaces:
Running
Running
Commit
·
ef83142
0
Parent(s):
DetectAI API - Initial commit
Browse files- .gitignore +9 -0
- api.py +104 -0
- ensemble_image_detector.py +201 -0
- explain_model.py +267 -0
- requirements.txt +6 -0
.gitignore
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.pyc
|
| 3 |
+
*.pyo
|
| 4 |
+
*.pyd
|
| 5 |
+
.Python
|
| 6 |
+
env/
|
| 7 |
+
venv/
|
| 8 |
+
*.log
|
| 9 |
+
.DS_Store
|
api.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
import os
|
| 3 |
+
from flask import Flask, request, jsonify
|
| 4 |
+
from flask_cors import CORS
|
| 5 |
+
from explain_model import SentenceBasedTextDetector
|
| 6 |
+
from ensemble_image_detector import EnsembleImageDetector
|
| 7 |
+
import traceback
|
| 8 |
+
|
| 9 |
+
app = Flask(__name__)
|
| 10 |
+
CORS(app)
|
| 11 |
+
|
| 12 |
+
# Load models
|
| 13 |
+
TEXT_MODEL = "Hello-SimpleAI/chatgpt-detector-roberta"
|
| 14 |
+
IMAGE_MODEL = "Organika/sdxl-detector"
|
| 15 |
+
|
| 16 |
+
print("Starting server and loading models...")
|
| 17 |
+
text_detector = SentenceBasedTextDetector(TEXT_MODEL)
|
| 18 |
+
image_detector = EnsembleImageDetector()
|
| 19 |
+
print("Server ready!")
|
| 20 |
+
|
| 21 |
+
@app.route('/', methods=['GET'])
|
| 22 |
+
def home():
|
| 23 |
+
"""Home endpoint"""
|
| 24 |
+
return jsonify({
|
| 25 |
+
'status': 'ok',
|
| 26 |
+
'message': 'DetectAI API is running',
|
| 27 |
+
'endpoints': {
|
| 28 |
+
'health': '/health',
|
| 29 |
+
'text': '/analyze',
|
| 30 |
+
'image': '/analyze-image'
|
| 31 |
+
}
|
| 32 |
+
})
|
| 33 |
+
|
| 34 |
+
@app.route('/health', methods=['GET'])
|
| 35 |
+
def health():
|
| 36 |
+
"""Check if server is running"""
|
| 37 |
+
return jsonify({
|
| 38 |
+
'status': 'ok',
|
| 39 |
+
'message': 'Server is running',
|
| 40 |
+
'text_model': TEXT_MODEL,
|
| 41 |
+
'image_model': IMAGE_MODEL
|
| 42 |
+
})
|
| 43 |
+
|
| 44 |
+
@app.route('/analyze', methods=['POST'])
|
| 45 |
+
def analyze_text():
|
| 46 |
+
"""Analyze text and return prediction"""
|
| 47 |
+
try:
|
| 48 |
+
data = request.get_json()
|
| 49 |
+
|
| 50 |
+
if not data or 'text' not in data:
|
| 51 |
+
return jsonify({'error': 'No text provided'}), 400
|
| 52 |
+
|
| 53 |
+
text = data['text'].strip()
|
| 54 |
+
|
| 55 |
+
if len(text) == 0:
|
| 56 |
+
return jsonify({'error': 'Text is empty'}), 400
|
| 57 |
+
|
| 58 |
+
if len(text) < 10:
|
| 59 |
+
return jsonify({'error': 'Text is too short (minimum 10 characters)'}), 400
|
| 60 |
+
|
| 61 |
+
print(f"Analyzing text ({len(text)} characters)...")
|
| 62 |
+
result = text_detector.explain(text)
|
| 63 |
+
print(f"Result: {result['prediction']} ({result['ai_probability']}%)")
|
| 64 |
+
|
| 65 |
+
return jsonify(result)
|
| 66 |
+
|
| 67 |
+
except Exception as e:
|
| 68 |
+
print(f"Error: {str(e)}")
|
| 69 |
+
traceback.print_exc()
|
| 70 |
+
return jsonify({'error': 'Analysis failed'}), 500
|
| 71 |
+
|
| 72 |
+
@app.route('/analyze-image', methods=['POST'])
|
| 73 |
+
def analyze_image():
|
| 74 |
+
"""Analyze image and return prediction"""
|
| 75 |
+
try:
|
| 76 |
+
data = request.get_json()
|
| 77 |
+
|
| 78 |
+
if not data or 'image' not in data:
|
| 79 |
+
return jsonify({'error': 'No image provided'}), 400
|
| 80 |
+
|
| 81 |
+
image_base64 = data['image']
|
| 82 |
+
|
| 83 |
+
print("Analyzing image...")
|
| 84 |
+
result = image_detector.detect_from_base64(image_base64)
|
| 85 |
+
print(f"Result: {result['prediction']} ({result['ai_probability']}%)")
|
| 86 |
+
|
| 87 |
+
return jsonify(result)
|
| 88 |
+
|
| 89 |
+
except Exception as e:
|
| 90 |
+
print(f"Error: {str(e)}")
|
| 91 |
+
traceback.print_exc()
|
| 92 |
+
return jsonify({'error': 'Analysis failed'}), 500
|
| 93 |
+
|
| 94 |
+
if __name__ == '__main__':
|
| 95 |
+
PORT = int(os.environ.get('PORT', 5000))
|
| 96 |
+
print("\n" + "=" * 70)
|
| 97 |
+
print("DetectAI API Server")
|
| 98 |
+
print("=" * 70)
|
| 99 |
+
print(f"Text Model: {TEXT_MODEL}")
|
| 100 |
+
print(f"Image Model: {IMAGE_MODEL}")
|
| 101 |
+
print(f"Server running on port: {PORT}")
|
| 102 |
+
print("=" * 70 + "\n")
|
| 103 |
+
|
| 104 |
+
app.run(host='0.0.0.0', port=PORT, debug=False)
|
ensemble_image_detector.py
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
from transformers import AutoImageProcessor, AutoModelForImageClassification
|
| 3 |
+
from PIL import Image
|
| 4 |
+
import torch
|
| 5 |
+
import io
|
| 6 |
+
import base64
|
| 7 |
+
|
| 8 |
+
class EnsembleImageDetector:
|
| 9 |
+
def __init__(self):
|
| 10 |
+
"""Load multiple models for better accuracy"""
|
| 11 |
+
print("Loading ensemble image detectors...")
|
| 12 |
+
|
| 13 |
+
self.models = []
|
| 14 |
+
model_names = [
|
| 15 |
+
"umm-maybe/AI-image-detector",
|
| 16 |
+
"Organika/sdxl-detector"
|
| 17 |
+
]
|
| 18 |
+
|
| 19 |
+
for model_name in model_names:
|
| 20 |
+
try:
|
| 21 |
+
print(f" Loading {model_name}...")
|
| 22 |
+
processor = AutoImageProcessor.from_pretrained(model_name)
|
| 23 |
+
model = AutoModelForImageClassification.from_pretrained(model_name)
|
| 24 |
+
model.eval()
|
| 25 |
+
self.models.append({
|
| 26 |
+
'name': model_name,
|
| 27 |
+
'processor': processor,
|
| 28 |
+
'model': model
|
| 29 |
+
})
|
| 30 |
+
print(f" ✓ {model_name} loaded")
|
| 31 |
+
except Exception as e:
|
| 32 |
+
print(f" ✗ Failed to load {model_name}: {e}")
|
| 33 |
+
|
| 34 |
+
if len(self.models) == 0:
|
| 35 |
+
raise Exception("Failed to load any models!")
|
| 36 |
+
|
| 37 |
+
print(f"Loaded {len(self.models)} models for ensemble\n")
|
| 38 |
+
|
| 39 |
+
def detect_from_base64(self, base64_string):
|
| 40 |
+
"""Detect using ensemble voting"""
|
| 41 |
+
try:
|
| 42 |
+
if ',' in base64_string:
|
| 43 |
+
base64_string = base64_string.split(',')[1]
|
| 44 |
+
|
| 45 |
+
image_data = base64.b64decode(base64_string)
|
| 46 |
+
image = Image.open(io.BytesIO(image_data)).convert('RGB')
|
| 47 |
+
return self.detect_from_image(image)
|
| 48 |
+
except Exception as e:
|
| 49 |
+
print(f"Error decoding image: {e}")
|
| 50 |
+
raise
|
| 51 |
+
|
| 52 |
+
def detect_from_image(self, image):
|
| 53 |
+
"""Ensemble detection with voting and metadata analysis"""
|
| 54 |
+
width, height = image.size
|
| 55 |
+
total_pixels = width * height
|
| 56 |
+
megapixels = total_pixels / 1000000
|
| 57 |
+
|
| 58 |
+
print(f"Analyzing: {width}x{height} ({megapixels:.1f}MP)")
|
| 59 |
+
|
| 60 |
+
# Get predictions from all models
|
| 61 |
+
predictions = []
|
| 62 |
+
for model_info in self.models:
|
| 63 |
+
try:
|
| 64 |
+
inputs = model_info['processor'](images=image, return_tensors="pt")
|
| 65 |
+
|
| 66 |
+
with torch.no_grad():
|
| 67 |
+
outputs = model_info['model'](**inputs)
|
| 68 |
+
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
|
| 69 |
+
|
| 70 |
+
if probs.shape[1] == 2:
|
| 71 |
+
ai_prob = probs[0][1].item()
|
| 72 |
+
else:
|
| 73 |
+
ai_prob = probs[0][0].item()
|
| 74 |
+
|
| 75 |
+
predictions.append(ai_prob)
|
| 76 |
+
print(f" Model prediction: {ai_prob*100:.1f}% AI")
|
| 77 |
+
except Exception as e:
|
| 78 |
+
print(f" Model error: {e}")
|
| 79 |
+
|
| 80 |
+
if not predictions:
|
| 81 |
+
raise Exception("All models failed!")
|
| 82 |
+
|
| 83 |
+
# Average predictions
|
| 84 |
+
avg_ai_prob = sum(predictions) / len(predictions)
|
| 85 |
+
|
| 86 |
+
# Metadata analysis
|
| 87 |
+
has_exif = False
|
| 88 |
+
exif_count = 0
|
| 89 |
+
try:
|
| 90 |
+
exif = image.getexif()
|
| 91 |
+
if exif:
|
| 92 |
+
exif_count = len(exif)
|
| 93 |
+
has_exif = exif_count > 8
|
| 94 |
+
except:
|
| 95 |
+
pass
|
| 96 |
+
|
| 97 |
+
# Check AI characteristics
|
| 98 |
+
aspect_ratio = width / height
|
| 99 |
+
is_square = 0.95 < aspect_ratio < 1.05
|
| 100 |
+
common_ai_sizes = [512, 768, 1024, 1536, 2048]
|
| 101 |
+
is_ai_size = width in common_ai_sizes and height in common_ai_sizes
|
| 102 |
+
|
| 103 |
+
# Strong indicators
|
| 104 |
+
strong_real = sum([has_exif, megapixels > 8, not is_ai_size])
|
| 105 |
+
strong_ai = sum([exif_count == 0, is_square, is_ai_size])
|
| 106 |
+
|
| 107 |
+
# Apply calibration
|
| 108 |
+
final_prob = avg_ai_prob
|
| 109 |
+
|
| 110 |
+
if strong_real >= 2:
|
| 111 |
+
final_prob = final_prob * 0.5
|
| 112 |
+
elif has_exif:
|
| 113 |
+
final_prob = final_prob * 0.6
|
| 114 |
+
|
| 115 |
+
if strong_ai >= 2:
|
| 116 |
+
final_prob = final_prob * 1.3
|
| 117 |
+
|
| 118 |
+
final_prob = final_prob * 0.9
|
| 119 |
+
final_prob = max(0.05, min(0.95, final_prob))
|
| 120 |
+
|
| 121 |
+
print(f"Final: {final_prob*100:.1f}% AI")
|
| 122 |
+
|
| 123 |
+
# Generate explanations
|
| 124 |
+
explanations = self._generate_explanations(
|
| 125 |
+
has_exif, is_square, is_ai_size, megapixels, width, height, final_prob
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
distance = abs(final_prob - 0.5)
|
| 129 |
+
confidence = "High" if distance > 0.3 else "Medium" if distance > 0.2 else "Low"
|
| 130 |
+
|
| 131 |
+
return {
|
| 132 |
+
'prediction': 'AI' if final_prob > 0.5 else 'Real',
|
| 133 |
+
'ai_probability': round(final_prob * 100, 2),
|
| 134 |
+
'real_probability': round((1 - final_prob) * 100, 2),
|
| 135 |
+
'confidence': confidence,
|
| 136 |
+
'explanations': explanations
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
def _generate_explanations(self, has_exif, is_square, is_ai_size, mp, w, h, prob):
|
| 140 |
+
"""Generate user-friendly explanations"""
|
| 141 |
+
explanations = []
|
| 142 |
+
|
| 143 |
+
if has_exif:
|
| 144 |
+
explanations.append({
|
| 145 |
+
'indicator': 'Camera Metadata Detected',
|
| 146 |
+
'description': 'Image contains extensive EXIF data with camera settings, strongly suggesting authentic photograph.',
|
| 147 |
+
'type': 'Real'
|
| 148 |
+
})
|
| 149 |
+
else:
|
| 150 |
+
explanations.append({
|
| 151 |
+
'indicator': 'No Camera Metadata',
|
| 152 |
+
'description': 'Missing EXIF data normally present in photos from cameras and smartphones.',
|
| 153 |
+
'type': 'AI'
|
| 154 |
+
})
|
| 155 |
+
|
| 156 |
+
if is_ai_size:
|
| 157 |
+
explanations.append({
|
| 158 |
+
'indicator': 'AI-Standard Dimensions',
|
| 159 |
+
'description': f'Image size ({w}x{h}) matches common AI generation formats.',
|
| 160 |
+
'type': 'AI'
|
| 161 |
+
})
|
| 162 |
+
else:
|
| 163 |
+
explanations.append({
|
| 164 |
+
'indicator': 'Unique Dimensions',
|
| 165 |
+
'description': f'Non-standard dimensions ({w}x{h}) typical of real camera sensors.',
|
| 166 |
+
'type': 'Real'
|
| 167 |
+
})
|
| 168 |
+
|
| 169 |
+
if mp > 8:
|
| 170 |
+
explanations.append({
|
| 171 |
+
'indicator': 'High Camera Resolution',
|
| 172 |
+
'description': f'Very high resolution ({mp:.1f}MP) typical of modern cameras.',
|
| 173 |
+
'type': 'Real'
|
| 174 |
+
})
|
| 175 |
+
elif mp < 2:
|
| 176 |
+
explanations.append({
|
| 177 |
+
'indicator': 'Low Resolution',
|
| 178 |
+
'description': f'Low resolution ({mp:.1f}MP) common in AI-generated images.',
|
| 179 |
+
'type': 'AI'
|
| 180 |
+
})
|
| 181 |
+
|
| 182 |
+
if prob > 0.7:
|
| 183 |
+
explanations.append({
|
| 184 |
+
'indicator': 'Strong AI Patterns',
|
| 185 |
+
'description': 'Multiple models detected characteristic AI generation patterns.',
|
| 186 |
+
'type': 'AI'
|
| 187 |
+
})
|
| 188 |
+
elif prob < 0.3:
|
| 189 |
+
explanations.append({
|
| 190 |
+
'indicator': 'Authentic Photography',
|
| 191 |
+
'description': 'Multiple models confirmed natural photographic characteristics.',
|
| 192 |
+
'type': 'Real'
|
| 193 |
+
})
|
| 194 |
+
else:
|
| 195 |
+
explanations.append({
|
| 196 |
+
'indicator': 'Uncertain',
|
| 197 |
+
'description': 'Modern AI generation is extremely realistic. Consider other evidence.',
|
| 198 |
+
'type': 'Neutral'
|
| 199 |
+
})
|
| 200 |
+
|
| 201 |
+
return explanations[:5]
|
explain_model.py
ADDED
|
@@ -0,0 +1,267 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 3 |
+
import torch
|
| 4 |
+
import re
|
| 5 |
+
import sys
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
if sys.platform == 'win32':
|
| 9 |
+
sys.stdout.reconfigure(encoding='utf-8')
|
| 10 |
+
|
| 11 |
+
class SentenceBasedTextDetector:
|
| 12 |
+
def __init__(self, model_name="Hello-SimpleAI/chatgpt-detector-roberta"):
|
| 13 |
+
"""
|
| 14 |
+
Best working models:
|
| 15 |
+
1. "Hello-SimpleAI/chatgpt-detector-roberta" - Best overall (RECOMMENDED)
|
| 16 |
+
2. "C:/Users/Kush/Desktop/ai-text-detector-model 2" - Good for formal AI
|
| 17 |
+
"""
|
| 18 |
+
print(f"Loading model: {model_name}")
|
| 19 |
+
|
| 20 |
+
if os.path.exists(str(model_name)):
|
| 21 |
+
print("[*] Loading from local path...")
|
| 22 |
+
else:
|
| 23 |
+
print("[*] Downloading from Hugging Face (first time only)...")
|
| 24 |
+
|
| 25 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 26 |
+
self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
| 27 |
+
self.model.eval()
|
| 28 |
+
self.model_name = model_name
|
| 29 |
+
print("[OK] Model loaded successfully")
|
| 30 |
+
|
| 31 |
+
def split_into_sentences(self, text):
|
| 32 |
+
"""Split text into sentences"""
|
| 33 |
+
sentences = re.split(r'(?<=[.!?])\s+(?=[A-Z])', text)
|
| 34 |
+
return [s.strip() for s in sentences if len(s.strip()) > 10]
|
| 35 |
+
|
| 36 |
+
def analyze_sentence(self, sentence):
|
| 37 |
+
"""Get AI probability for a sentence with calibration"""
|
| 38 |
+
inputs = self.tokenizer(
|
| 39 |
+
sentence,
|
| 40 |
+
return_tensors="pt",
|
| 41 |
+
truncation=True,
|
| 42 |
+
max_length=512,
|
| 43 |
+
padding=True
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
with torch.no_grad():
|
| 47 |
+
outputs = self.model(**inputs)
|
| 48 |
+
probs = torch.softmax(outputs.logits, dim=-1)
|
| 49 |
+
|
| 50 |
+
if probs.shape[1] == 2:
|
| 51 |
+
ai_prob = probs[0][1].item()
|
| 52 |
+
else:
|
| 53 |
+
ai_prob = probs[0][0].item()
|
| 54 |
+
|
| 55 |
+
# ✅ CALIBRATION: Adjust predictions based on patterns
|
| 56 |
+
sentence_lower = sentence.lower()
|
| 57 |
+
|
| 58 |
+
# Strong human indicators - reduce AI score
|
| 59 |
+
informal_markers = ['lol', 'haha', 'omg', 'btw', 'tbh', 'gonna', 'wanna',
|
| 60 |
+
'kinda', 'sorta', 'yeah', 'nah', 'idk', ' u ', 'ngl',
|
| 61 |
+
'???', '!!!', '...']
|
| 62 |
+
informal_count = sum(1 for marker in informal_markers if marker in sentence_lower)
|
| 63 |
+
|
| 64 |
+
if informal_count >= 3:
|
| 65 |
+
ai_prob *= 0.3 # Very informal - definitely human
|
| 66 |
+
elif informal_count >= 2:
|
| 67 |
+
ai_prob *= 0.5 # Somewhat informal - likely human
|
| 68 |
+
elif informal_count >= 1:
|
| 69 |
+
ai_prob *= 0.7 # Some informality
|
| 70 |
+
|
| 71 |
+
# Check for contractions (human trait)
|
| 72 |
+
contractions = ["can't", "won't", "ain't", "shouldn't", "wouldn't",
|
| 73 |
+
"i'm", "we're", "they're", "it's", "that's"]
|
| 74 |
+
if any(c in sentence_lower for c in contractions):
|
| 75 |
+
ai_prob *= 0.8
|
| 76 |
+
|
| 77 |
+
# Questions often human
|
| 78 |
+
if '?' in sentence and len(sentence.split()) < 15:
|
| 79 |
+
ai_prob *= 0.8
|
| 80 |
+
|
| 81 |
+
# Very short casual sentences are human
|
| 82 |
+
if len(sentence.split()) < 10 and any(m in sentence_lower for m in ['hey', 'hi', 'yo', 'sup']):
|
| 83 |
+
ai_prob *= 0.5
|
| 84 |
+
|
| 85 |
+
# Strong AI indicators - increase score
|
| 86 |
+
formal_transitions = ['furthermore', 'moreover', 'additionally', 'consequently',
|
| 87 |
+
'therefore', 'thus', 'hence', 'nevertheless', 'nonetheless']
|
| 88 |
+
if any(t in sentence_lower for t in formal_transitions):
|
| 89 |
+
ai_prob = min(ai_prob * 1.3, 0.95)
|
| 90 |
+
|
| 91 |
+
# AI buzzwords
|
| 92 |
+
ai_buzzwords = ['facilitate', 'utilize', 'leverage', 'comprehensive',
|
| 93 |
+
'optimize', 'strategic', 'framework', 'methodology']
|
| 94 |
+
buzzword_count = sum(1 for word in ai_buzzwords if word in sentence_lower)
|
| 95 |
+
if buzzword_count >= 2:
|
| 96 |
+
ai_prob = min(ai_prob * 1.4, 0.95)
|
| 97 |
+
elif buzzword_count >= 1:
|
| 98 |
+
ai_prob = min(ai_prob * 1.2, 0.95)
|
| 99 |
+
|
| 100 |
+
# Clamp between 5% and 95%
|
| 101 |
+
ai_prob = max(0.05, min(0.95, ai_prob))
|
| 102 |
+
|
| 103 |
+
return ai_prob
|
| 104 |
+
|
| 105 |
+
def get_sentence_explanation(self, sentence, ai_score):
|
| 106 |
+
"""Generate explanation for sentence classification"""
|
| 107 |
+
sentence_lower = sentence.lower()
|
| 108 |
+
reasons = []
|
| 109 |
+
|
| 110 |
+
# AI Indicators
|
| 111 |
+
formal_transitions = ['furthermore', 'moreover', 'additionally', 'consequently',
|
| 112 |
+
'therefore', 'thus', 'hence', 'nevertheless', 'nonetheless',
|
| 113 |
+
'in conclusion', 'to summarize', 'it is important to note']
|
| 114 |
+
|
| 115 |
+
ai_buzzwords = ['delve', 'utilize', 'leverage', 'facilitate', 'implement',
|
| 116 |
+
'comprehensive', 'robust', 'seamless', 'streamline', 'optimize',
|
| 117 |
+
'strategic', 'framework', 'methodology', 'paramount']
|
| 118 |
+
|
| 119 |
+
passive_voice = ['is known', 'are made', 'was created', 'were developed',
|
| 120 |
+
'can be found', 'has been', 'have been', 'will be']
|
| 121 |
+
|
| 122 |
+
# Human Indicators
|
| 123 |
+
informal_markers = ['lol', 'haha', 'omg', 'btw', 'tbh', 'gonna', 'wanna',
|
| 124 |
+
'kinda', 'sorta', 'yeah', 'nah', 'idk', ' u ', 'ngl',
|
| 125 |
+
'...', '!!', '??', 'bruh', 'fr', 'lowkey']
|
| 126 |
+
|
| 127 |
+
contractions = ["can't", "won't", "ain't", "shouldn't", "wouldn't",
|
| 128 |
+
"i'm", "we're", "they're", "it's"]
|
| 129 |
+
|
| 130 |
+
# Check patterns
|
| 131 |
+
if any(m in sentence_lower for m in informal_markers):
|
| 132 |
+
reasons.append("Informal conversational language")
|
| 133 |
+
|
| 134 |
+
if any(c in sentence_lower for c in contractions):
|
| 135 |
+
reasons.append("Natural contractions")
|
| 136 |
+
|
| 137 |
+
if any(t in sentence_lower for t in formal_transitions):
|
| 138 |
+
reasons.append("Formal tone and structure")
|
| 139 |
+
|
| 140 |
+
if any(w in sentence_lower for w in ai_buzzwords):
|
| 141 |
+
reasons.append("Technical/corporate vocabulary")
|
| 142 |
+
|
| 143 |
+
if any(p in sentence_lower for p in passive_voice):
|
| 144 |
+
reasons.append("Passive voice construction")
|
| 145 |
+
|
| 146 |
+
if len(sentence.split()) > 25:
|
| 147 |
+
reasons.append("Very long, complex sentence")
|
| 148 |
+
|
| 149 |
+
if sentence.count(',') >= 3:
|
| 150 |
+
reasons.append("Multiple clauses")
|
| 151 |
+
|
| 152 |
+
if '?' in sentence:
|
| 153 |
+
reasons.append("Direct question")
|
| 154 |
+
|
| 155 |
+
# Default reasons
|
| 156 |
+
if not reasons:
|
| 157 |
+
if ai_score > 0.7:
|
| 158 |
+
reasons.append("Formulaic structure")
|
| 159 |
+
elif ai_score < 0.3:
|
| 160 |
+
reasons.append("Natural expression")
|
| 161 |
+
else:
|
| 162 |
+
reasons.append("Mixed characteristics")
|
| 163 |
+
|
| 164 |
+
return ". ".join(reasons[:2]) + "."
|
| 165 |
+
|
| 166 |
+
def explain(self, text):
|
| 167 |
+
"""Analyze text and return sentence-level explanations"""
|
| 168 |
+
sentences = self.split_into_sentences(text)
|
| 169 |
+
|
| 170 |
+
if not sentences:
|
| 171 |
+
return self._analyze_whole_text(text)
|
| 172 |
+
|
| 173 |
+
sentence_results = []
|
| 174 |
+
|
| 175 |
+
for sentence in sentences:
|
| 176 |
+
score = self.analyze_sentence(sentence)
|
| 177 |
+
reason = self.get_sentence_explanation(sentence, score)
|
| 178 |
+
|
| 179 |
+
sentence_results.append({
|
| 180 |
+
'sentence': sentence,
|
| 181 |
+
'ai_probability': score,
|
| 182 |
+
'reason': reason
|
| 183 |
+
})
|
| 184 |
+
|
| 185 |
+
# Calculate overall score as weighted average
|
| 186 |
+
total_weight = 0
|
| 187 |
+
weighted_sum = 0
|
| 188 |
+
|
| 189 |
+
for result in sentence_results:
|
| 190 |
+
weight = abs(result['ai_probability'] - 0.5) + 0.5
|
| 191 |
+
weighted_sum += result['ai_probability'] * weight
|
| 192 |
+
total_weight += weight
|
| 193 |
+
|
| 194 |
+
overall_ai_prob = weighted_sum / total_weight if total_weight > 0 else 0.5
|
| 195 |
+
|
| 196 |
+
# Sort by AI probability
|
| 197 |
+
sentence_results.sort(key=lambda x: x['ai_probability'], reverse=True)
|
| 198 |
+
|
| 199 |
+
# Get indicators
|
| 200 |
+
ai_indicators = [s for s in sentence_results if s['ai_probability'] > 0.55][:5]
|
| 201 |
+
human_indicators = [s for s in sentence_results if s['ai_probability'] < 0.45][:5]
|
| 202 |
+
|
| 203 |
+
# Calculate confidence
|
| 204 |
+
distance = abs(overall_ai_prob - 0.5)
|
| 205 |
+
confidence = "High" if distance > 0.25 else "Medium" if distance > 0.15 else "Low"
|
| 206 |
+
|
| 207 |
+
return {
|
| 208 |
+
'prediction': 'AI' if overall_ai_prob > 0.5 else 'Human',
|
| 209 |
+
'ai_probability': round(overall_ai_prob * 100, 2),
|
| 210 |
+
'human_probability': round((1 - overall_ai_prob) * 100, 2),
|
| 211 |
+
'confidence': confidence,
|
| 212 |
+
'ai_indicators': ai_indicators,
|
| 213 |
+
'human_indicators': human_indicators
|
| 214 |
+
}
|
| 215 |
+
|
| 216 |
+
def _analyze_whole_text(self, text):
|
| 217 |
+
"""Fallback for short text"""
|
| 218 |
+
inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
|
| 219 |
+
|
| 220 |
+
with torch.no_grad():
|
| 221 |
+
outputs = self.model(**inputs)
|
| 222 |
+
probs = torch.softmax(outputs.logits, dim=-1)
|
| 223 |
+
overall_ai_prob = probs[0][1].item() if probs.shape[1] == 2 else probs[0][0].item()
|
| 224 |
+
|
| 225 |
+
distance = abs(overall_ai_prob - 0.5)
|
| 226 |
+
confidence = "High" if distance > 0.25 else "Medium" if distance > 0.15 else "Low"
|
| 227 |
+
|
| 228 |
+
return {
|
| 229 |
+
'prediction': 'AI' if overall_ai_prob > 0.5 else 'Human',
|
| 230 |
+
'ai_probability': round(overall_ai_prob * 100, 2),
|
| 231 |
+
'human_probability': round((1 - overall_ai_prob) * 100, 2),
|
| 232 |
+
'confidence': confidence,
|
| 233 |
+
'ai_indicators': [] if overall_ai_prob <= 0.5 else [{
|
| 234 |
+
'sentence': text,
|
| 235 |
+
'score': overall_ai_prob,
|
| 236 |
+
'reason': self.get_sentence_explanation(text, overall_ai_prob)
|
| 237 |
+
}],
|
| 238 |
+
'human_indicators': [] if overall_ai_prob > 0.5 else [{
|
| 239 |
+
'sentence': text,
|
| 240 |
+
'score': overall_ai_prob,
|
| 241 |
+
'reason': self.get_sentence_explanation(text, overall_ai_prob)
|
| 242 |
+
}]
|
| 243 |
+
}
|
| 244 |
+
|
| 245 |
+
|
| 246 |
+
if __name__ == "__main__":
|
| 247 |
+
print("\n" + "="*70)
|
| 248 |
+
print("AI Text Detection - Testing with Calibration")
|
| 249 |
+
print("="*70)
|
| 250 |
+
|
| 251 |
+
MODEL_NAME = "Hello-SimpleAI/chatgpt-detector-roberta"
|
| 252 |
+
|
| 253 |
+
detector = SentenceBasedTextDetector(MODEL_NAME)
|
| 254 |
+
|
| 255 |
+
# Test cases
|
| 256 |
+
tests = [
|
| 257 |
+
("omg i cant believe what happened yesterday lol", "Human"),
|
| 258 |
+
("Furthermore, it is important to note that comprehensive analysis", "AI"),
|
| 259 |
+
("hey whats up? wanna hang out later?", "Human"),
|
| 260 |
+
("The strategic framework facilitates optimal outcomes", "AI")
|
| 261 |
+
]
|
| 262 |
+
|
| 263 |
+
for text, expected in tests:
|
| 264 |
+
result = detector.explain(text)
|
| 265 |
+
status = "[OK]" if result['prediction'] == expected else "[FAIL]"
|
| 266 |
+
print(f"\n{status} {text[:50]}...")
|
| 267 |
+
print(f"Expected: {expected}, Got: {result['prediction']} ({result['ai_probability']:.1f}%)")
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Flask==3.0.0
|
| 2 |
+
Flask-CORS==4.0.0
|
| 3 |
+
transformers==4.35.0
|
| 4 |
+
torch==2.1.0
|
| 5 |
+
torchvision==0.16.0
|
| 6 |
+
Pillow==10.1.0
|