ThinklySEO / modules /backlinks.py
yashgori20's picture
ya
5f0cfa7
raw
history blame
18.4 kB
"""
Backlinks Profile Module using RapidAPI endpoints
Combines 3 RapidAPI endpoints: Best Backlink Checker, Majestic, and Domain Metrics Check
"""
import os
import requests
import time
from typing import Dict, Any, List, Optional
from urllib.parse import urlparse
from datetime import datetime, timedelta
class ModuleResult:
"""Standard result object for SEO modules"""
def __init__(self, success: bool, data: Dict[str, Any], error: str = None):
self.success = success
self.data = data
self.error = error
class BacklinksModule:
def __init__(self):
self.rapidapi_key = os.getenv('RAPIDAPI_KEY')
self.timeout = int(os.getenv('RAPIDAPI_TIMEOUT', '30'))
self.max_retries = int(os.getenv('BACKLINKS_MAX_RETRIES', '3'))
# RapidAPI endpoints
self.backlink_checker_url = "https://best-backlink-checker-api.p.rapidapi.com/excatbacklinks_noneng.php"
self.majestic_url = "https://majestic1.p.rapidapi.com/url_metrics"
self.domain_metrics_url = "https://domain-metrics-check.p.rapidapi.com/domain-metrics"
# Common headers
self.headers = {
'x-rapidapi-key': self.rapidapi_key,
'Accept': 'application/json'
}
def analyze(self, url: str, quick_scan: bool = False) -> ModuleResult:
"""
Analyze backlink profile using multiple RapidAPI endpoints
Args:
url: Target website URL
quick_scan: If True, use cached data or limited analysis
Returns:
ModuleResult with comprehensive backlinks data
"""
try:
if not self.rapidapi_key:
return self._generate_no_api_data(url)
domain = self._extract_domain(url)
# Call all 3 APIs with retry logic
individual_backlinks = self._get_individual_backlinks(domain, quick_scan)
majestic_metrics = self._get_majestic_metrics(domain)
domain_metrics = self._get_domain_metrics(domain)
# Combine and process all data
combined_data = self._combine_backlink_data(
domain, individual_backlinks, majestic_metrics, domain_metrics, quick_scan
)
return ModuleResult(success=True, data=combined_data)
except Exception as e:
return ModuleResult(
success=False,
data={},
error=f"Backlinks analysis failed: {str(e)}"
)
def _extract_domain(self, url: str) -> str:
"""Extract clean domain from URL"""
if not url.startswith(('http://', 'https://')):
url = 'https://' + url
domain = urlparse(url).netloc.replace('www.', '')
return domain
def _api_request_with_retry(self, url: str, params: Dict = None, headers: Dict = None) -> Optional[Dict]:
"""Make API request with retry logic"""
if headers is None:
headers = self.headers.copy()
for attempt in range(self.max_retries):
try:
response = requests.get(url, params=params, headers=headers, timeout=self.timeout)
if response.status_code == 200:
return response.json()
elif response.status_code == 429: # Rate limit
wait_time = (attempt + 1) * 2 # Exponential backoff
print(f"Rate limited, waiting {wait_time}s...")
time.sleep(wait_time)
continue
else:
print(f"API error {response.status_code}: {response.text}")
except requests.exceptions.Timeout:
print(f"Timeout on attempt {attempt + 1}")
if attempt < self.max_retries - 1:
time.sleep(2)
except Exception as e:
print(f"Request error: {str(e)}")
if attempt < self.max_retries - 1:
time.sleep(2)
return None
def _get_individual_backlinks(self, domain: str, quick_scan: bool = False) -> List[Dict]:
"""Get individual backlinks data"""
try:
headers = self.headers.copy()
headers['x-rapidapi-host'] = 'best-backlink-checker-api.p.rapidapi.com'
params = {'domain': f'https://{domain}'}
data = self._api_request_with_retry(self.backlink_checker_url, params, headers)
if data and isinstance(data, list):
# Limit results for quick scan
if quick_scan:
return data[:50]
return data[:500] # Reasonable limit to avoid memory issues
except Exception as e:
print(f"Individual backlinks API error: {str(e)}")
return []
def _get_majestic_metrics(self, domain: str) -> Dict[str, Any]:
"""Get Majestic domain metrics via RapidAPI"""
try:
headers = self.headers.copy()
headers['x-rapidapi-host'] = 'majestic1.p.rapidapi.com'
params = {'url': domain}
data = self._api_request_with_retry(self.majestic_url, params, headers)
if data and data.get('status') == 'success':
return data
except Exception as e:
print(f"Majestic RapidAPI error: {str(e)}")
return {}
def _get_domain_metrics(self, domain: str) -> Dict[str, Any]:
"""Get comprehensive domain metrics"""
try:
headers = self.headers.copy()
headers['x-rapidapi-host'] = 'domain-metrics-check.p.rapidapi.com'
# API expects domain with trailing slash
url = f"{self.domain_metrics_url}/{domain}/"
data = self._api_request_with_retry(url, headers=headers)
if data and data.get('domain'):
return data
except Exception as e:
print(f"Domain metrics API error: {str(e)}")
return {}
def _combine_backlink_data(self, domain: str, individual_backlinks: List[Dict],
majestic_metrics: Dict, domain_metrics: Dict, quick_scan: bool) -> Dict[str, Any]:
"""Combine data from all 3 APIs into comprehensive backlinks profile"""
# Primary metrics (prefer Domain Metrics Check, fallback to Majestic)
total_backlinks = (
int(domain_metrics.get('ahrefsBacklinks', 0)) or
int(domain_metrics.get('majesticLinks', 0)) or
int(majestic_metrics.get('majesticLinks', 0)) or
len(individual_backlinks)
)
total_ref_domains = (
int(domain_metrics.get('ahrefsRefDomains', 0)) or
int(domain_metrics.get('majesticRefDomains', 0)) or
int(majestic_metrics.get('majesticRefDomains', 0)) or
len(set(link.get('url_from', '').split('/')[2] for link in individual_backlinks if link.get('url_from')))
)
# Authority scores (multiple sources for validation)
domain_rating = (
int(domain_metrics.get('ahrefsDR', 0)) or
int(domain_metrics.get('majesticTF', 0)) or
int(majestic_metrics.get('majesticTF', 0))
)
# Process individual backlinks for detailed analysis
referring_domains = self._extract_referring_domains(individual_backlinks)
anchor_distribution = self._extract_anchor_distribution(individual_backlinks)
monthly_changes = self._calculate_monthly_changes(individual_backlinks)
top_backlinks = self._get_top_backlinks(individual_backlinks)
# Link quality analysis
quality_metrics = self._analyze_link_quality(individual_backlinks, domain_metrics)
# Comprehensive backlinks data
backlinks_data = {
'total_backlinks': total_backlinks,
'total_ref_domains': total_ref_domains,
'domain_rating': domain_rating,
# Authority scores from multiple sources
'authority_scores': {
'ahrefs_dr': int(domain_metrics.get('ahrefsDR', 0)),
'moz_da': int(domain_metrics.get('mozDA', 0)),
'moz_pa': int(domain_metrics.get('mozPA', 0)),
'majestic_tf': int(domain_metrics.get('majesticTF', 0) or majestic_metrics.get('majesticTF', 0)),
'majestic_cf': int(domain_metrics.get('majesticCF', 0) or majestic_metrics.get('majesticCF', 0))
},
# Detailed analysis
'referring_domains': referring_domains,
'anchor_distribution': anchor_distribution,
'monthly_changes': monthly_changes,
'top_backlinks': top_backlinks,
'quality_metrics': quality_metrics,
# Educational and government links (high-quality indicators)
'edu_links': int(domain_metrics.get('majesticRefEDU', 0) or majestic_metrics.get('majesticRefEDU', 0)),
'gov_links': int(domain_metrics.get('majesticRefGov', 0) or majestic_metrics.get('majesticRefGov', 0)),
# Traffic estimates (if available)
'estimated_organic_traffic': float(domain_metrics.get('ahrefsTraffic', 0)),
'organic_keywords': int(domain_metrics.get('ahrefsOrganicKeywords', 0)),
# Data sources and metadata
'data_sources': self._get_data_sources(individual_backlinks, majestic_metrics, domain_metrics),
'last_updated': datetime.now().isoformat(),
'quick_scan': quick_scan,
'analysis_depth': 'comprehensive' if not quick_scan else 'basic'
}
return backlinks_data
def _extract_referring_domains(self, backlinks: List[Dict]) -> List[Dict[str, Any]]:
"""Extract and analyze referring domains"""
domain_stats = {}
for link in backlinks:
if not link.get('url_from'):
continue
try:
source_domain = urlparse(link['url_from']).netloc
if source_domain not in domain_stats:
domain_stats[source_domain] = {
'domain': source_domain,
'backlinks': 0,
'first_seen': link.get('first_seen', ''),
'domain_authority': link.get('domain_inlink_rank', 0),
'follow_links': 0,
'nofollow_links': 0
}
domain_stats[source_domain]['backlinks'] += 1
if link.get('nofollow'):
domain_stats[source_domain]['nofollow_links'] += 1
else:
domain_stats[source_domain]['follow_links'] += 1
except Exception:
continue
# Sort by backlinks count and return top domains
top_domains = sorted(domain_stats.values(), key=lambda x: x['backlinks'], reverse=True)
return top_domains[:20] # Top 20 referring domains
def _extract_anchor_distribution(self, backlinks: List[Dict]) -> List[Dict[str, Any]]:
"""Analyze anchor text distribution"""
anchor_stats = {}
for link in backlinks:
anchor = link.get('anchor', '').strip()
if not anchor or len(anchor) > 100: # Skip very long anchors
continue
if anchor not in anchor_stats:
anchor_stats[anchor] = {
'anchor_text': anchor,
'backlinks': 0,
'follow_links': 0,
'nofollow_links': 0,
'unique_domains': set()
}
anchor_stats[anchor]['backlinks'] += 1
if link.get('nofollow'):
anchor_stats[anchor]['nofollow_links'] += 1
else:
anchor_stats[anchor]['follow_links'] += 1
# Track unique domains for this anchor
try:
domain = urlparse(link.get('url_from', '')).netloc
anchor_stats[anchor]['unique_domains'].add(domain)
except Exception:
pass
# Convert sets to counts and sort
anchor_distribution = []
for anchor_data in anchor_stats.values():
anchor_data['unique_domains'] = len(anchor_data['unique_domains'])
anchor_distribution.append(anchor_data)
# Sort by backlinks count
anchor_distribution.sort(key=lambda x: x['backlinks'], reverse=True)
return anchor_distribution[:15] # Top 15 anchor texts
def _calculate_monthly_changes(self, backlinks: List[Dict]) -> Dict[str, int]:
"""Calculate monthly backlinks changes"""
now = datetime.now()
last_month = now - timedelta(days=30)
new_links = 0
recent_links = 0
for link in backlinks:
first_seen = link.get('first_seen', '')
if not first_seen:
continue
try:
link_date = datetime.strptime(first_seen, '%Y-%m-%d')
if link_date >= last_month:
new_links += 1
if link_date >= now - timedelta(days=90): # 3 months
recent_links += 1
except Exception:
continue
return {
'new_backlinks': new_links,
'lost_backlinks': 0, # Can't calculate without historical data
'net_change': new_links,
'recent_backlinks_3m': recent_links
}
def _get_top_backlinks(self, backlinks: List[Dict]) -> List[Dict[str, Any]]:
"""Get top-quality backlinks"""
# Sort by inlink_rank (higher is better)
sorted_links = sorted(
backlinks,
key=lambda x: x.get('inlink_rank', 0),
reverse=True
)
top_links = []
for link in sorted_links[:10]:
top_links.append({
'source_url': link.get('url_from', ''),
'source_title': link.get('title', ''),
'anchor_text': link.get('anchor', ''),
'is_follow': not link.get('nofollow', True),
'authority_score': link.get('inlink_rank', 0),
'first_seen': link.get('first_seen', '')
})
return top_links
def _analyze_link_quality(self, backlinks: List[Dict], domain_metrics: Dict) -> Dict[str, Any]:
"""Analyze overall link quality metrics"""
if not backlinks:
return {'follow_ratio': 0, 'avg_authority': 0, 'quality_score': 0}
follow_count = sum(1 for link in backlinks if not link.get('nofollow', True))
total_links = len(backlinks)
follow_ratio = (follow_count / total_links * 100) if total_links > 0 else 0
# Average authority score
authority_scores = [link.get('inlink_rank', 0) for link in backlinks if link.get('inlink_rank')]
avg_authority = sum(authority_scores) / len(authority_scores) if authority_scores else 0
# Quality score (0-100)
quality_score = min(100, (
(follow_ratio * 0.4) + # 40% weight on follow ratio
(avg_authority * 2) + # 40% weight on authority (scaled)
(min(20, len(set(link.get('url_from', '').split('/')[2] for link in backlinks))) * 1) # 20% on domain diversity
))
return {
'follow_ratio': round(follow_ratio, 1),
'avg_authority': round(avg_authority, 1),
'quality_score': round(quality_score, 1),
'total_analyzed': total_links,
'edu_gov_count': int(domain_metrics.get('majesticRefEDU', 0)) + int(domain_metrics.get('majesticRefGov', 0))
}
def _get_data_sources(self, individual_backlinks: List, majestic_metrics: Dict, domain_metrics: Dict) -> List[str]:
"""Track which data sources provided information"""
sources = []
if individual_backlinks:
sources.append('Best Backlink Checker API')
if majestic_metrics:
sources.append('Majestic RapidAPI')
if domain_metrics:
sources.append('Domain Metrics Check API')
return sources or ['No data sources available']
def _generate_no_api_data(self, url: str) -> ModuleResult:
"""Generate response when no API key is available"""
domain = self._extract_domain(url)
no_api_data = {
'total_backlinks': 0,
'total_ref_domains': 0,
'domain_rating': 0,
'authority_scores': {
'ahrefs_dr': 0,
'moz_da': 0,
'moz_pa': 0,
'majestic_tf': 0,
'majestic_cf': 0
},
'referring_domains': [],
'anchor_distribution': [],
'monthly_changes': {
'new_backlinks': 0,
'lost_backlinks': 0,
'net_change': 0
},
'top_backlinks': [],
'quality_metrics': {
'follow_ratio': 0,
'avg_authority': 0,
'quality_score': 0
},
'edu_links': 0,
'gov_links': 0,
'estimated_organic_traffic': 0,
'organic_keywords': 0,
'data_sources': ['No API credentials available'],
'last_updated': datetime.now().isoformat(),
'placeholder': True,
'message': 'Add RAPIDAPI_KEY to your .env file to unlock comprehensive backlinks analysis using Best Backlink Checker, Majestic, and Domain Metrics Check RapidAPIs.'
}
return ModuleResult(success=True, data=no_api_data)