buildborderless
/

CommunityForensics-DeepfakeDet-ViT

Image Classification

deepfake_detection

Model card Files Files and versions

CommunityForensics-DeepfakeDet-ViT / scripts /quick_analysis.py

LPX55's picture

Create quick_analysis.py

83314dd verified 12 months ago

history blame contribute delete

3.81 kB

	import os
	from PIL import Image
	from collections import Counter

	def analyze_images(directory):
	analysis_results = {}

	for root, dirs, files in os.walk(directory):
	if files:
	model_folder_name = os.path.basename(root)
	if model_folder_name not in analysis_results:
	analysis_results[model_folder_name] = {
	'image_count': 0,
	'total_size': 0,
	'resolutions': Counter()
	}

	for file in files:
	file_path = os.path.join(root, file)

	# Count the image
	analysis_results[model_folder_name]['image_count'] += 1

	# Calculate the size of the image
	try:
	with Image.open(file_path) as img:
	# Get the size of the image in bytes
	file_size = os.path.getsize(file_path)
	analysis_results[model_folder_name]['total_size'] += file_size

	# Get image dimensions
	width, height = img.size
	analysis_results[model_folder_name]['resolutions'][(width, height)] += 1
	except Exception as e:
	print(f"Error reading file {file_path}: {e}")

	return analysis_results

	def print_and_log_analysis_results(analysis_results, dataset_name, log_file):
	# Determine the maximum length of model names
	max_model_length = max(len(model) for model in analysis_results.keys())
	model_column_width = max(max_model_length, 20) # Ensure at least 20 characters

	# Define column widths
	image_count_width = 12
	total_size_width = 14
	resolution_width = 25

	# Create header
	header = f"{'Model':<{model_column_width}} \| {'Image Count':>{image_count_width}} \| {'Total Size (MB)':>{total_size_width}} \| {'Most Common Resolution':<{resolution_width}}"
	separator = "-" * (model_column_width + image_count_width + total_size_width + resolution_width + 7) # 7 for separators

	result_lines = []
	result_lines.append(f"Analysis for {dataset_name}:\n")
	result_lines.append(header + "\n")
	result_lines.append(separator + "\n")

	for model, data in analysis_results.items():
	total_size_mb = data['total_size'] / (1024 * 1024)
	most_common_resolution = data['resolutions'].most_common(1)

	if most_common_resolution:
	common_res = f"{most_common_resolution[0][0][0]}x{most_common_resolution[0][0][1]} ({most_common_resolution[0][1]} images)"
	else:
	common_res = "None"

	result_lines.append(f"{model:<{model_column_width}} \| {data['image_count']:>{image_count_width}} \| {total_size_mb:>{total_size_width}.2f} \| {common_res:<{resolution_width}}\n")

	result_lines.append("\n")

	# Print to console
	for line in result_lines:
	print(line, end='')

	# Write to log file
	with open(log_file, 'a') as f:
	f.writelines(result_lines)

	def main():
	# Define directories
	generated_dir = 'resampledEvalSet'
	real_dir = 'real'
	log_file = 'analysis_results.txt'

	# Clear the log file (optional, comment out if you want to append)
	with open(log_file, 'w') as f:
	pass

	# Analyze generated images
	generated_analysis_results = analyze_images(generated_dir)
	print_and_log_analysis_results(generated_analysis_results, "Generated Images", log_file)

	# Analyze real images
	real_analysis_results = analyze_images(real_dir)
	print_and_log_analysis_results(real_analysis_results, "Real Images", log_file)

	if __name__ == "__main__":
	main()