""" Backlinks Profile Module using RapidAPI endpoints Combines 3 RapidAPI endpoints: Best Backlink Checker, Majestic, and Domain Metrics Check """ import os import requests import time from typing import Dict, Any, List, Optional from urllib.parse import urlparse from datetime import datetime, timedelta from utils import safe_pct class ModuleResult: """Standard result object for SEO modules""" def __init__(self, success: bool, data: Dict[str, Any], error: str = None): self.success = success self.data = data self.error = error class BacklinksModule: def __init__(self): self.rapidapi_key = os.getenv('RAPIDAPI_KEY') self.timeout = int(os.getenv('RAPIDAPI_TIMEOUT', '30')) self.max_retries = int(os.getenv('BACKLINKS_MAX_RETRIES', '3')) # RapidAPI endpoints self.backlink_checker_url = "https://best-backlink-checker-api.p.rapidapi.com/excatbacklinks_noneng.php" self.majestic_url = "https://majestic1.p.rapidapi.com/url_metrics" self.domain_metrics_url = "https://domain-metrics-check.p.rapidapi.com/domain-metrics" # Common headers self.headers = { 'x-rapidapi-key': self.rapidapi_key, 'Accept': 'application/json' } def analyze(self, url: str, quick_scan: bool = False) -> ModuleResult: """ Analyze backlink profile using multiple RapidAPI endpoints Args: url: Target website URL quick_scan: If True, use cached data or limited analysis Returns: ModuleResult with comprehensive backlinks data """ try: if not self.rapidapi_key: return self._generate_no_api_data(url) domain = self._extract_domain(url) # Call all 3 APIs with retry logic and track status api_status = { 'working_apis': [], 'failed_apis': [], 'failed_messages': [] } print("🔄 Trying Best Backlink Checker API...") individual_backlinks = self._get_individual_backlinks(domain, quick_scan) if individual_backlinks: api_status['working_apis'].append('Best Backlink Checker') print("✅ Best Backlink Checker API - SUCCESS") else: api_status['failed_apis'].append('Best Backlink Checker') api_status['failed_messages'].append("❌ Best Backlink Checker API failed - using mock data") print("❌ Best Backlink Checker API - FAILED") print("🔄 Trying Majestic API...") majestic_metrics = self._get_majestic_metrics(domain) if majestic_metrics: api_status['working_apis'].append('Majestic') print("✅ Majestic API - SUCCESS") else: api_status['failed_apis'].append('Majestic') api_status['failed_messages'].append("❌ Majestic API failed - using mock data") print("❌ Majestic API - FAILED") print("🔄 Trying Domain Metrics Check API...") domain_metrics = self._get_domain_metrics(domain) if domain_metrics: api_status['working_apis'].append('Domain Metrics Check') print("✅ Domain Metrics Check API - SUCCESS") else: api_status['failed_apis'].append('Domain Metrics Check') api_status['failed_messages'].append("❌ Domain Metrics Check API failed - using mock data") print("❌ Domain Metrics Check API - FAILED") # Combine and process all data combined_data = self._combine_backlink_data( domain, individual_backlinks, majestic_metrics, domain_metrics, quick_scan, api_status ) return ModuleResult(success=True, data=combined_data) except Exception as e: return ModuleResult( success=False, data={}, error=f"Backlinks analysis failed: {str(e)}" ) def _extract_domain(self, url: str) -> str: if not url.startswith(('http://', 'https://')): url = 'https://' + url domain = urlparse(url).netloc.replace('www.', '') return domain def _api_request_with_retry(self, url: str, params: Dict = None, headers: Dict = None) -> Optional[Dict]: if headers is None: headers = self.headers.copy() for attempt in range(self.max_retries): try: response = requests.get(url, params=params, headers=headers, timeout=self.timeout) if response.status_code == 200: return response.json() elif response.status_code == 429: wait_time = (attempt + 1) * 2 print(f"Rate limited, waiting {wait_time}s...") time.sleep(wait_time) continue else: print(f"API error {response.status_code}: {response.text}") except requests.exceptions.Timeout: print(f"Timeout on attempt {attempt + 1}") if attempt < self.max_retries - 1: time.sleep(2) except Exception as e: print(f"Request error: {str(e)}") if attempt < self.max_retries - 1: time.sleep(2) return None def _get_individual_backlinks(self, domain: str, quick_scan: bool = False) -> List[Dict]: """Get individual backlinks data""" try: headers = self.headers.copy() headers['x-rapidapi-host'] = 'best-backlink-checker-api.p.rapidapi.com' params = {'domain': f'https://{domain}'} data = self._api_request_with_retry(self.backlink_checker_url, params, headers) if data and isinstance(data, list): # Limit results for quick scan if quick_scan: return data[:50] return data[:500] except Exception as e: print(f"Individual backlinks API error: {str(e)}") return [] def _get_majestic_metrics(self, domain: str) -> Dict[str, Any]: try: headers = self.headers.copy() headers['x-rapidapi-host'] = 'majestic1.p.rapidapi.com' params = {'url': domain} data = self._api_request_with_retry(self.majestic_url, params, headers) if data and data.get('status') == 'success': return data except Exception as e: print(f"Majestic RapidAPI error: {str(e)}") return {} def _get_domain_metrics(self, domain: str) -> Dict[str, Any]: """Get comprehensive domain metrics""" try: headers = self.headers.copy() headers['x-rapidapi-host'] = 'domain-metrics-check.p.rapidapi.com' # API expects domain with trailing slash url = f"{self.domain_metrics_url}/{domain}/" data = self._api_request_with_retry(url, headers=headers) if data and data.get('domain'): return data except Exception as e: print(f"Domain metrics API error: {str(e)}") return {} def _combine_backlink_data(self, domain: str, individual_backlinks: List[Dict], majestic_metrics: Dict, domain_metrics: Dict, quick_scan: bool, api_status: Dict) -> Dict[str, Any]: """Combine data from all 3 APIs into comprehensive backlinks profile""" # Primary metrics (prefer Domain Metrics Check, fallback to Majestic) total_backlinks = ( int(domain_metrics.get('ahrefsBacklinks', 0)) or int(domain_metrics.get('majesticLinks', 0)) or int(majestic_metrics.get('majesticLinks', 0)) or len(individual_backlinks) ) total_ref_domains = ( int(domain_metrics.get('ahrefsRefDomains', 0)) or int(domain_metrics.get('majesticRefDomains', 0)) or int(majestic_metrics.get('majesticRefDomains', 0)) or len(set(link.get('url_from', '').split('/')[2] for link in individual_backlinks if link.get('url_from'))) ) # Authority scores (multiple sources for validation) domain_rating = ( int(domain_metrics.get('ahrefsDR', 0)) or int(domain_metrics.get('majesticTF', 0)) or int(majestic_metrics.get('majesticTF', 0)) ) # Process individual backlinks for detailed analysis referring_domains = self._extract_referring_domains(individual_backlinks) anchor_distribution = self._extract_anchor_distribution(individual_backlinks) monthly_changes = self._calculate_monthly_changes(individual_backlinks) top_backlinks = self._get_top_backlinks(individual_backlinks) # Link quality analysis quality_metrics = self._analyze_link_quality(individual_backlinks, domain_metrics) # Comprehensive backlinks data backlinks_data = { 'ref_domains': total_ref_domains, # Match expected key name 'new_backlinks_30d': monthly_changes.get('new_backlinks', 0), 'lost_backlinks_30d': None, # Explicit N/A placeholder 'total_backlinks': total_backlinks, 'total_ref_domains': total_ref_domains, 'domain_rating': domain_rating, # Authority scores from multiple sources 'authority_scores': { 'ahrefs_dr': int(domain_metrics.get('ahrefsDR', 0)), 'moz_da': int(domain_metrics.get('mozDA', 0)), 'moz_pa': int(domain_metrics.get('mozPA', 0)), 'majestic_tf': int(domain_metrics.get('majesticTF', 0) or majestic_metrics.get('majesticTF', 0)), 'majestic_cf': int(domain_metrics.get('majesticCF', 0) or majestic_metrics.get('majesticCF', 0)) }, # Detailed analysis 'referring_domains': referring_domains, 'anchor_distribution': anchor_distribution, 'monthly_changes': monthly_changes, 'top_backlinks': top_backlinks, 'quality_metrics': quality_metrics, # Educational and government links (high-quality indicators) 'edu_links': int(domain_metrics.get('majesticRefEDU', 0) or majestic_metrics.get('majesticRefEDU', 0)), 'gov_links': int(domain_metrics.get('majesticRefGov', 0) or majestic_metrics.get('majesticRefGov', 0)), # Traffic estimates (if available) 'estimated_organic_traffic': float(domain_metrics.get('ahrefsTraffic', 0)), 'organic_keywords': int(domain_metrics.get('ahrefsOrganicKeywords', 0)), # Data sources and metadata 'data_sources': self._get_data_sources(individual_backlinks, majestic_metrics, domain_metrics), 'data_source': self._get_primary_data_source(individual_backlinks, majestic_metrics, domain_metrics), 'api_status': api_status, 'last_updated': datetime.now().isoformat(), 'quick_scan': quick_scan, 'analysis_depth': 'comprehensive' if not quick_scan else 'basic' } return backlinks_data def _extract_referring_domains(self, backlinks: List[Dict]) -> List[Dict[str, Any]]: """Extract and analyze referring domains""" domain_stats = {} for link in backlinks: if not link.get('url_from'): continue try: source_domain = urlparse(link['url_from']).netloc if source_domain not in domain_stats: domain_stats[source_domain] = { 'domain': source_domain, 'backlinks': 0, 'first_seen': link.get('first_seen', ''), 'domain_authority': link.get('domain_inlink_rank', 0), 'follow_links': 0, 'nofollow_links': 0 } domain_stats[source_domain]['backlinks'] += 1 if link.get('nofollow'): domain_stats[source_domain]['nofollow_links'] += 1 else: domain_stats[source_domain]['follow_links'] += 1 except Exception: continue # Sort by backlinks count and return top domains top_domains = sorted(domain_stats.values(), key=lambda x: x['backlinks'], reverse=True) return top_domains[:20] def _extract_anchor_distribution(self, backlinks: List[Dict]) -> List[Dict[str, Any]]: """Analyze anchor text distribution""" anchor_stats = {} for link in backlinks: anchor = link.get('anchor', '').strip() if not anchor or len(anchor) > 100: continue if anchor not in anchor_stats: anchor_stats[anchor] = { 'anchor_text': anchor, 'backlinks': 0, 'follow_links': 0, 'nofollow_links': 0, 'unique_domains': set() } anchor_stats[anchor]['backlinks'] += 1 if link.get('nofollow'): anchor_stats[anchor]['nofollow_links'] += 1 else: anchor_stats[anchor]['follow_links'] += 1 # Track unique domains for this anchor try: domain = urlparse(link.get('url_from', '')).netloc anchor_stats[anchor]['unique_domains'].add(domain) except Exception: pass # Convert sets to counts and sort anchor_distribution = [] for anchor_data in anchor_stats.values(): anchor_data['unique_domains'] = len(anchor_data['unique_domains']) anchor_distribution.append(anchor_data) # Sort by backlinks count anchor_distribution.sort(key=lambda x: x['backlinks'], reverse=True) return anchor_distribution[:15] def _calculate_monthly_changes(self, backlinks: List[Dict]) -> Dict[str, int]: """Calculate monthly backlinks changes""" now = datetime.now() last_month = now - timedelta(days=30) new_links = 0 recent_links = 0 for link in backlinks: first_seen = link.get('first_seen', '') if not first_seen: continue try: link_date = datetime.strptime(first_seen, '%Y-%m-%d') if link_date >= last_month: new_links += 1 if link_date >= now - timedelta(days=90): recent_links += 1 except Exception: continue return { 'new_backlinks': new_links, 'lost_backlinks_30d': None, # Explicit N/A placeholder 'net_change': new_links, 'recent_backlinks_3m': recent_links } def _get_top_backlinks(self, backlinks: List[Dict]) -> List[Dict[str, Any]]: """Get top-quality backlinks""" # Sort by inlink_rank (higher is better) sorted_links = sorted( backlinks, key=lambda x: x.get('inlink_rank', 0), reverse=True ) top_links = [] for link in sorted_links[:10]: top_links.append({ 'source_url': link.get('url_from', ''), 'source_title': link.get('title', ''), 'anchor_text': link.get('anchor', ''), 'is_follow': not link.get('nofollow', True), 'authority_score': link.get('inlink_rank', 0), 'first_seen': link.get('first_seen', '') }) return top_links def _analyze_link_quality(self, backlinks: List[Dict], domain_metrics: Dict) -> Dict[str, Any]: """Analyze overall link quality metrics""" if not backlinks: return {'follow_ratio': 0, 'avg_authority': 0, 'quality_score': 0} follow_count = sum(1 for link in backlinks if not link.get('nofollow', True)) total_links = len(backlinks) follow_ratio = (follow_count / total_links * 100) if total_links > 0 else 0 # Average authority score authority_scores = [link.get('inlink_rank', 0) for link in backlinks if link.get('inlink_rank')] avg_authority = sum(authority_scores) / len(authority_scores) if authority_scores else 0 # Quality score (0-100) quality_score = min(100, ( (follow_ratio * 0.4) + (avg_authority * 2) + (min(20, len(set(link.get('url_from', '').split('/')[2] for link in backlinks))) * 1) )) return { 'follow_ratio': round(follow_ratio, 1), 'avg_authority': round(avg_authority, 1), 'quality_score': round(quality_score, 1), 'total_analyzed': total_links, 'edu_gov_count': int(domain_metrics.get('majesticRefEDU', 0)) + int(domain_metrics.get('majesticRefGov', 0)) } def _get_data_sources(self, individual_backlinks: List, majestic_metrics: Dict, domain_metrics: Dict) -> List[str]: sources = [] if individual_backlinks: sources.append('Best Backlink Checker API') if majestic_metrics: sources.append('Majestic RapidAPI') if domain_metrics: sources.append('Domain Metrics Check API') return sources or ['No data sources available'] def _get_primary_data_source(self, individual_backlinks: List, majestic_metrics: Dict, domain_metrics: Dict) -> str: """Get primary data source for labeling""" if domain_metrics: return 'Domain Metrics Check API' elif majestic_metrics: return 'Majestic RapidAPI' elif individual_backlinks: return 'Best Backlink Checker API' else: return 'No API credentials available' def _generate_no_api_data(self, url: str) -> ModuleResult: domain = self._extract_domain(url) no_api_data = { 'total_backlinks': 0, 'total_ref_domains': 0, 'domain_rating': 0, 'authority_scores': { 'ahrefs_dr': 0, 'moz_da': 0, 'moz_pa': 0, 'majestic_tf': 0, 'majestic_cf': 0 }, 'referring_domains': [], 'anchor_distribution': [], 'monthly_changes': { 'new_backlinks': 0, 'lost_backlinks_30d': None, # Explicit N/A 'net_change': 0 }, 'ref_domains': 0, 'new_backlinks_30d': 0, 'lost_backlinks_30d': None, 'top_backlinks': [], 'quality_metrics': { 'follow_ratio': 0, 'avg_authority': 0, 'quality_score': 0 }, 'edu_links': 0, 'gov_links': 0, 'estimated_organic_traffic': 0, 'organic_keywords': 0, 'data_sources': ['No API credentials available'], 'data_source': 'No API credentials available', 'api_status': { 'working_apis': [], 'failed_apis': ['Best Backlink Checker', 'Majestic', 'Domain Metrics Check'], 'failed_messages': [ '❌ Best Backlink Checker API failed - no RAPIDAPI_KEY', '❌ Majestic API failed - no RAPIDAPI_KEY', '❌ Domain Metrics Check API failed - no RAPIDAPI_KEY' ] }, 'last_updated': datetime.now().isoformat(), 'placeholder': True, 'message': 'Add RAPIDAPI_KEY to your .env file to unlock comprehensive backlinks analysis using Best Backlink Checker, Majestic, and Domain Metrics Check RapidAPIs.' } return ModuleResult(success=True, data=no_api_data)