Spaces:

kushalkv
/

detectai-api

Running

App Files Files Community

kushalkv commited on 16 days ago

Commit

ef83142

0 Parent(s):

DetectAI API - Initial commit

Browse files

Files changed (5) hide show

.gitignore +9 -0
api.py +104 -0
ensemble_image_detector.py +201 -0
explain_model.py +267 -0
requirements.txt +6 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,9 @@

+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.Python
+env/
+venv/
+*.log
+.DS_Store

api.py ADDED Viewed

	@@ -0,0 +1,104 @@

+# -*- coding: utf-8 -*-
+import os
+from flask import Flask, request, jsonify
+from flask_cors import CORS
+from explain_model import SentenceBasedTextDetector
+from ensemble_image_detector import EnsembleImageDetector
+import traceback
+app = Flask(__name__)
+CORS(app)
+# Load models
+TEXT_MODEL = "Hello-SimpleAI/chatgpt-detector-roberta"
+IMAGE_MODEL = "Organika/sdxl-detector"
+print("Starting server and loading models...")
+text_detector = SentenceBasedTextDetector(TEXT_MODEL)
+image_detector = EnsembleImageDetector()
+print("Server ready!")
+@app.route('/', methods=['GET'])
+def home():
+    """Home endpoint"""
+    return jsonify({
+        'status': 'ok',
+        'message': 'DetectAI API is running',
+        'endpoints': {
+            'health': '/health',
+            'text': '/analyze',
+            'image': '/analyze-image'
+        }
+    })
+@app.route('/health', methods=['GET'])
+def health():
+    """Check if server is running"""
+    return jsonify({
+        'status': 'ok',
+        'message': 'Server is running',
+        'text_model': TEXT_MODEL,
+        'image_model': IMAGE_MODEL
+    })
+@app.route('/analyze', methods=['POST'])
+def analyze_text():
+    """Analyze text and return prediction"""
+    try:
+        data = request.get_json()
+        if not data or 'text' not in data:
+            return jsonify({'error': 'No text provided'}), 400
+        text = data['text'].strip()
+        if len(text) == 0:
+            return jsonify({'error': 'Text is empty'}), 400
+        if len(text) < 10:
+            return jsonify({'error': 'Text is too short (minimum 10 characters)'}), 400
+        print(f"Analyzing text ({len(text)} characters)...")
+        result = text_detector.explain(text)
+        print(f"Result: {result['prediction']} ({result['ai_probability']}%)")
+        return jsonify(result)
+    except Exception as e:
+        print(f"Error: {str(e)}")
+        traceback.print_exc()
+        return jsonify({'error': 'Analysis failed'}), 500
+@app.route('/analyze-image', methods=['POST'])
+def analyze_image():
+    """Analyze image and return prediction"""
+    try:
+        data = request.get_json()
+        if not data or 'image' not in data:
+            return jsonify({'error': 'No image provided'}), 400
+        image_base64 = data['image']
+        print("Analyzing image...")
+        result = image_detector.detect_from_base64(image_base64)
+        print(f"Result: {result['prediction']} ({result['ai_probability']}%)")
+        return jsonify(result)
+    except Exception as e:
+        print(f"Error: {str(e)}")
+        traceback.print_exc()
+        return jsonify({'error': 'Analysis failed'}), 500
+if __name__ == '__main__':
+    PORT = int(os.environ.get('PORT', 5000))
+    print("\n" + "=" * 70)
+    print("DetectAI API Server")
+    print("=" * 70)
+    print(f"Text Model: {TEXT_MODEL}")
+    print(f"Image Model: {IMAGE_MODEL}")
+    print(f"Server running on port: {PORT}")
+    print("=" * 70 + "\n")
+    app.run(host='0.0.0.0', port=PORT, debug=False)

ensemble_image_detector.py ADDED Viewed

	@@ -0,0 +1,201 @@

+# -*- coding: utf-8 -*-
+from transformers import AutoImageProcessor, AutoModelForImageClassification
+from PIL import Image
+import torch
+import io
+import base64
+class EnsembleImageDetector:
+    def __init__(self):
+        """Load multiple models for better accuracy"""
+        print("Loading ensemble image detectors...")
+        self.models = []
+        model_names = [
+            "umm-maybe/AI-image-detector",
+            "Organika/sdxl-detector"
+        ]
+        for model_name in model_names:
+            try:
+                print(f"  Loading {model_name}...")
+                processor = AutoImageProcessor.from_pretrained(model_name)
+                model = AutoModelForImageClassification.from_pretrained(model_name)
+                model.eval()
+                self.models.append({
+                    'name': model_name,
+                    'processor': processor,
+                    'model': model
+                })
+                print(f"  ✓ {model_name} loaded")
+            except Exception as e:
+                print(f"  ✗ Failed to load {model_name}: {e}")
+        if len(self.models) == 0:
+            raise Exception("Failed to load any models!")
+        print(f"Loaded {len(self.models)} models for ensemble\n")
+    def detect_from_base64(self, base64_string):
+        """Detect using ensemble voting"""
+        try:
+            if ',' in base64_string:
+                base64_string = base64_string.split(',')[1]
+            image_data = base64.b64decode(base64_string)
+            image = Image.open(io.BytesIO(image_data)).convert('RGB')
+            return self.detect_from_image(image)
+        except Exception as e:
+            print(f"Error decoding image: {e}")
+            raise
+    def detect_from_image(self, image):
+        """Ensemble detection with voting and metadata analysis"""
+        width, height = image.size
+        total_pixels = width * height
+        megapixels = total_pixels / 1000000
+        print(f"Analyzing: {width}x{height} ({megapixels:.1f}MP)")
+        # Get predictions from all models
+        predictions = []
+        for model_info in self.models:
+            try:
+                inputs = model_info['processor'](images=image, return_tensors="pt")
+                with torch.no_grad():
+                    outputs = model_info['model'](**inputs)
+                    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
+                if probs.shape[1] == 2:
+                    ai_prob = probs[0][1].item()
+                else:
+                    ai_prob = probs[0][0].item()
+                predictions.append(ai_prob)
+                print(f"  Model prediction: {ai_prob*100:.1f}% AI")
+            except Exception as e:
+                print(f"  Model error: {e}")
+        if not predictions:
+            raise Exception("All models failed!")
+        # Average predictions
+        avg_ai_prob = sum(predictions) / len(predictions)
+        # Metadata analysis
+        has_exif = False
+        exif_count = 0
+        try:
+            exif = image.getexif()
+            if exif:
+                exif_count = len(exif)
+                has_exif = exif_count > 8
+        except:
+            pass
+        # Check AI characteristics
+        aspect_ratio = width / height
+        is_square = 0.95 < aspect_ratio < 1.05
+        common_ai_sizes = [512, 768, 1024, 1536, 2048]
+        is_ai_size = width in common_ai_sizes and height in common_ai_sizes
+        # Strong indicators
+        strong_real = sum([has_exif, megapixels > 8, not is_ai_size])
+        strong_ai = sum([exif_count == 0, is_square, is_ai_size])
+        # Apply calibration
+        final_prob = avg_ai_prob
+        if strong_real >= 2:
+            final_prob = final_prob * 0.5
+        elif has_exif:
+            final_prob = final_prob * 0.6
+        if strong_ai >= 2:
+            final_prob = final_prob * 1.3
+        final_prob = final_prob * 0.9
+        final_prob = max(0.05, min(0.95, final_prob))
+        print(f"Final: {final_prob*100:.1f}% AI")
+        # Generate explanations
+        explanations = self._generate_explanations(
+            has_exif, is_square, is_ai_size, megapixels, width, height, final_prob
+        )
+        distance = abs(final_prob - 0.5)
+        confidence = "High" if distance > 0.3 else "Medium" if distance > 0.2 else "Low"
+        return {
+            'prediction': 'AI' if final_prob > 0.5 else 'Real',
+            'ai_probability': round(final_prob * 100, 2),
+            'real_probability': round((1 - final_prob) * 100, 2),
+            'confidence': confidence,
+            'explanations': explanations
+        }
+    def _generate_explanations(self, has_exif, is_square, is_ai_size, mp, w, h, prob):
+        """Generate user-friendly explanations"""
+        explanations = []
+        if has_exif:
+            explanations.append({
+                'indicator': 'Camera Metadata Detected',
+                'description': 'Image contains extensive EXIF data with camera settings, strongly suggesting authentic photograph.',
+                'type': 'Real'
+            })
+        else:
+            explanations.append({
+                'indicator': 'No Camera Metadata',
+                'description': 'Missing EXIF data normally present in photos from cameras and smartphones.',
+                'type': 'AI'
+            })
+        if is_ai_size:
+            explanations.append({
+                'indicator': 'AI-Standard Dimensions',
+                'description': f'Image size ({w}x{h}) matches common AI generation formats.',
+                'type': 'AI'
+            })
+        else:
+            explanations.append({
+                'indicator': 'Unique Dimensions',
+                'description': f'Non-standard dimensions ({w}x{h}) typical of real camera sensors.',
+                'type': 'Real'
+            })
+        if mp > 8:
+            explanations.append({
+                'indicator': 'High Camera Resolution',
+                'description': f'Very high resolution ({mp:.1f}MP) typical of modern cameras.',
+                'type': 'Real'
+            })
+        elif mp < 2:
+            explanations.append({
+                'indicator': 'Low Resolution',
+                'description': f'Low resolution ({mp:.1f}MP) common in AI-generated images.',
+                'type': 'AI'
+            })
+        if prob > 0.7:
+            explanations.append({
+                'indicator': 'Strong AI Patterns',
+                'description': 'Multiple models detected characteristic AI generation patterns.',
+                'type': 'AI'
+            })
+        elif prob < 0.3:
+            explanations.append({
+                'indicator': 'Authentic Photography',
+                'description': 'Multiple models confirmed natural photographic characteristics.',
+                'type': 'Real'
+            })
+        else:
+            explanations.append({
+                'indicator': 'Uncertain',
+                'description': 'Modern AI generation is extremely realistic. Consider other evidence.',
+                'type': 'Neutral'
+            })
+        return explanations[:5]

explain_model.py ADDED Viewed

	@@ -0,0 +1,267 @@

+# -*- coding: utf-8 -*-
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+import re
+import sys
+import os
+if sys.platform == 'win32':
+    sys.stdout.reconfigure(encoding='utf-8')
+class SentenceBasedTextDetector:
+    def __init__(self, model_name="Hello-SimpleAI/chatgpt-detector-roberta"):
+        """
+        Best working models:
+        1. "Hello-SimpleAI/chatgpt-detector-roberta" - Best overall (RECOMMENDED)
+        2. "C:/Users/Kush/Desktop/ai-text-detector-model 2" - Good for formal AI
+        """
+        print(f"Loading model: {model_name}")
+        if os.path.exists(str(model_name)):
+            print("[*] Loading from local path...")
+        else:
+            print("[*] Downloading from Hugging Face (first time only)...")
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
+        self.model.eval()
+        self.model_name = model_name
+        print("[OK] Model loaded successfully")
+    def split_into_sentences(self, text):
+        """Split text into sentences"""
+        sentences = re.split(r'(?<=[.!?])\s+(?=[A-Z])', text)
+        return [s.strip() for s in sentences if len(s.strip()) > 10]
+    def analyze_sentence(self, sentence):
+        """Get AI probability for a sentence with calibration"""
+        inputs = self.tokenizer(
+            sentence,
+            return_tensors="pt",
+            truncation=True,
+            max_length=512,
+            padding=True
+        )
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+            probs = torch.softmax(outputs.logits, dim=-1)
+            if probs.shape[1] == 2:
+                ai_prob = probs[0][1].item()
+            else:
+                ai_prob = probs[0][0].item()
+        # ✅ CALIBRATION: Adjust predictions based on patterns
+        sentence_lower = sentence.lower()
+        # Strong human indicators - reduce AI score
+        informal_markers = ['lol', 'haha', 'omg', 'btw', 'tbh', 'gonna', 'wanna',
+                           'kinda', 'sorta', 'yeah', 'nah', 'idk', ' u ', 'ngl',
+                           '???', '!!!', '...']
+        informal_count = sum(1 for marker in informal_markers if marker in sentence_lower)
+        if informal_count >= 3:
+            ai_prob *= 0.3  # Very informal - definitely human
+        elif informal_count >= 2:
+            ai_prob *= 0.5  # Somewhat informal - likely human
+        elif informal_count >= 1:
+            ai_prob *= 0.7  # Some informality
+        # Check for contractions (human trait)
+        contractions = ["can't", "won't", "ain't", "shouldn't", "wouldn't",
+                       "i'm", "we're", "they're", "it's", "that's"]
+        if any(c in sentence_lower for c in contractions):
+            ai_prob *= 0.8
+        # Questions often human
+        if '?' in sentence and len(sentence.split()) < 15:
+            ai_prob *= 0.8
+        # Very short casual sentences are human
+        if len(sentence.split()) < 10 and any(m in sentence_lower for m in ['hey', 'hi', 'yo', 'sup']):
+            ai_prob *= 0.5
+        # Strong AI indicators - increase score
+        formal_transitions = ['furthermore', 'moreover', 'additionally', 'consequently',
+                            'therefore', 'thus', 'hence', 'nevertheless', 'nonetheless']
+        if any(t in sentence_lower for t in formal_transitions):
+            ai_prob = min(ai_prob * 1.3, 0.95)
+        # AI buzzwords
+        ai_buzzwords = ['facilitate', 'utilize', 'leverage', 'comprehensive',
+                       'optimize', 'strategic', 'framework', 'methodology']
+        buzzword_count = sum(1 for word in ai_buzzwords if word in sentence_lower)
+        if buzzword_count >= 2:
+            ai_prob = min(ai_prob * 1.4, 0.95)
+        elif buzzword_count >= 1:
+            ai_prob = min(ai_prob * 1.2, 0.95)
+        # Clamp between 5% and 95%
+        ai_prob = max(0.05, min(0.95, ai_prob))
+        return ai_prob
+    def get_sentence_explanation(self, sentence, ai_score):
+        """Generate explanation for sentence classification"""
+        sentence_lower = sentence.lower()
+        reasons = []
+        # AI Indicators
+        formal_transitions = ['furthermore', 'moreover', 'additionally', 'consequently',
+                            'therefore', 'thus', 'hence', 'nevertheless', 'nonetheless',
+                            'in conclusion', 'to summarize', 'it is important to note']
+        ai_buzzwords = ['delve', 'utilize', 'leverage', 'facilitate', 'implement',
+                       'comprehensive', 'robust', 'seamless', 'streamline', 'optimize',
+                       'strategic', 'framework', 'methodology', 'paramount']
+        passive_voice = ['is known', 'are made', 'was created', 'were developed',
+                        'can be found', 'has been', 'have been', 'will be']
+        # Human Indicators
+        informal_markers = ['lol', 'haha', 'omg', 'btw', 'tbh', 'gonna', 'wanna',
+                           'kinda', 'sorta', 'yeah', 'nah', 'idk', ' u ', 'ngl',
+                           '...', '!!', '??', 'bruh', 'fr', 'lowkey']
+        contractions = ["can't", "won't", "ain't", "shouldn't", "wouldn't",
+                       "i'm", "we're", "they're", "it's"]
+        # Check patterns
+        if any(m in sentence_lower for m in informal_markers):
+            reasons.append("Informal conversational language")
+        if any(c in sentence_lower for c in contractions):
+            reasons.append("Natural contractions")
+        if any(t in sentence_lower for t in formal_transitions):
+            reasons.append("Formal tone and structure")
+        if any(w in sentence_lower for w in ai_buzzwords):
+            reasons.append("Technical/corporate vocabulary")
+        if any(p in sentence_lower for p in passive_voice):
+            reasons.append("Passive voice construction")
+        if len(sentence.split()) > 25:
+            reasons.append("Very long, complex sentence")
+        if sentence.count(',') >= 3:
+            reasons.append("Multiple clauses")
+        if '?' in sentence:
+            reasons.append("Direct question")
+        # Default reasons
+        if not reasons:
+            if ai_score > 0.7:
+                reasons.append("Formulaic structure")
+            elif ai_score < 0.3:
+                reasons.append("Natural expression")
+            else:
+                reasons.append("Mixed characteristics")
+        return ". ".join(reasons[:2]) + "."
+    def explain(self, text):
+        """Analyze text and return sentence-level explanations"""
+        sentences = self.split_into_sentences(text)
+        if not sentences:
+            return self._analyze_whole_text(text)
+        sentence_results = []
+        for sentence in sentences:
+            score = self.analyze_sentence(sentence)
+            reason = self.get_sentence_explanation(sentence, score)
+            sentence_results.append({
+                'sentence': sentence,
+                'ai_probability': score,
+                'reason': reason
+            })
+        # Calculate overall score as weighted average
+        total_weight = 0
+        weighted_sum = 0
+        for result in sentence_results:
+            weight = abs(result['ai_probability'] - 0.5) + 0.5
+            weighted_sum += result['ai_probability'] * weight
+            total_weight += weight
+        overall_ai_prob = weighted_sum / total_weight if total_weight > 0 else 0.5
+        # Sort by AI probability
+        sentence_results.sort(key=lambda x: x['ai_probability'], reverse=True)
+        # Get indicators
+        ai_indicators = [s for s in sentence_results if s['ai_probability'] > 0.55][:5]
+        human_indicators = [s for s in sentence_results if s['ai_probability'] < 0.45][:5]
+        # Calculate confidence
+        distance = abs(overall_ai_prob - 0.5)
+        confidence = "High" if distance > 0.25 else "Medium" if distance > 0.15 else "Low"
+        return {
+            'prediction': 'AI' if overall_ai_prob > 0.5 else 'Human',
+            'ai_probability': round(overall_ai_prob * 100, 2),
+            'human_probability': round((1 - overall_ai_prob) * 100, 2),
+            'confidence': confidence,
+            'ai_indicators': ai_indicators,
+            'human_indicators': human_indicators
+        }
+    def _analyze_whole_text(self, text):
+        """Fallback for short text"""
+        inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+            probs = torch.softmax(outputs.logits, dim=-1)
+            overall_ai_prob = probs[0][1].item() if probs.shape[1] == 2 else probs[0][0].item()
+        distance = abs(overall_ai_prob - 0.5)
+        confidence = "High" if distance > 0.25 else "Medium" if distance > 0.15 else "Low"
+        return {
+            'prediction': 'AI' if overall_ai_prob > 0.5 else 'Human',
+            'ai_probability': round(overall_ai_prob * 100, 2),
+            'human_probability': round((1 - overall_ai_prob) * 100, 2),
+            'confidence': confidence,
+            'ai_indicators': [] if overall_ai_prob <= 0.5 else [{
+                'sentence': text,
+                'score': overall_ai_prob,
+                'reason': self.get_sentence_explanation(text, overall_ai_prob)
+            }],
+            'human_indicators': [] if overall_ai_prob > 0.5 else [{
+                'sentence': text,
+                'score': overall_ai_prob,
+                'reason': self.get_sentence_explanation(text, overall_ai_prob)
+            }]
+        }
+if __name__ == "__main__":
+    print("\n" + "="*70)
+    print("AI Text Detection - Testing with Calibration")
+    print("="*70)
+    MODEL_NAME = "Hello-SimpleAI/chatgpt-detector-roberta"
+    detector = SentenceBasedTextDetector(MODEL_NAME)
+    # Test cases
+    tests = [
+        ("omg i cant believe what happened yesterday lol", "Human"),
+        ("Furthermore, it is important to note that comprehensive analysis", "AI"),
+        ("hey whats up? wanna hang out later?", "Human"),
+        ("The strategic framework facilitates optimal outcomes", "AI")
+    ]
+    for text, expected in tests:
+        result = detector.explain(text)
+        status = "[OK]" if result['prediction'] == expected else "[FAIL]"
+        print(f"\n{status} {text[:50]}...")
+        print(f"Expected: {expected}, Got: {result['prediction']} ({result['ai_probability']:.1f}%)")

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+Flask==3.0.0
+Flask-CORS==4.0.0
+transformers==4.35.0
+torch==2.1.0
+torchvision==0.16.0
+Pillow==10.1.0