MnemoCore / tests /benchmark_encoding.py
Granis87's picture
Initial upload of MnemoCore
dbb04e4 verified
import time
import numpy as np
import sys
import os
# Add src to path
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from mnemocore.core.engine import HAIMEngine
from mnemocore.core.binary_hdv import TextEncoder
def benchmark():
print("Initializing Engine...")
engine = HAIMEngine(dimension=10000) # Slightly smaller for quick bench if needed, but 10k is realistic
encoded_text = "The quick brown fox jumps over the lazy dog " * 50 # 450 words
print(f"Text length: {len(encoded_text.split())} tokens")
# 1. Benchmark Legacy Encoding
print("\n--- Legacy Encoding (Float/Numpy) ---")
# Force legacy mode if possible or just call the method directly to be sure
start_time = time.time()
iterations = 50
for _ in range(iterations):
_ = engine._legacy_encode_content_numpy(encoded_text)
end_time = time.time()
avg_time = (end_time - start_time) / iterations
print(f"Average time per document: {avg_time*1000:.2f} ms")
print(f"Total time for {iterations} docs: {end_time - start_time:.4f} s")
# 2. Benchmark Binary Encoding
print("\n--- Binary Encoding (BinaryHDV) ---")
encoder = TextEncoder(dimension=10000)
start_time = time.time()
iterations = 50
for _ in range(iterations):
# Clear cache to measure raw encoding speed?
# Or measure with cache to see benefit?
# Real usage is with cache, but "first load" matters too.
# Let's keep cache for now as it's the default behavior.
_ = encoder.encode(encoded_text)
end_time = time.time()
avg_time = (end_time - start_time) / iterations
print(f"Average time per document (with cache): {avg_time*1000:.2f} ms")
# Measure without cache hit (force new tokens)
print("\n--- Binary Encoding (Cold Cache equivalent) ---")
start_time = time.time()
iterations = 10
for i in range(iterations):
# Unique tokens every time
unique_text = f"{encoded_text} {i}"
_ = encoder.encode(unique_text)
end_time = time.time()
avg_time = (end_time - start_time) / iterations
print(f"Average time per document (unique suffix): {avg_time*1000:.2f} ms")
if __name__ == "__main__":
benchmark()