File size: 2,325 Bytes
dbb04e4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import time
import numpy as np
import sys
import os
# Add src to path
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from mnemocore.core.engine import HAIMEngine
from mnemocore.core.binary_hdv import TextEncoder
def benchmark():
print("Initializing Engine...")
engine = HAIMEngine(dimension=10000) # Slightly smaller for quick bench if needed, but 10k is realistic
encoded_text = "The quick brown fox jumps over the lazy dog " * 50 # 450 words
print(f"Text length: {len(encoded_text.split())} tokens")
# 1. Benchmark Legacy Encoding
print("\n--- Legacy Encoding (Float/Numpy) ---")
# Force legacy mode if possible or just call the method directly to be sure
start_time = time.time()
iterations = 50
for _ in range(iterations):
_ = engine._legacy_encode_content_numpy(encoded_text)
end_time = time.time()
avg_time = (end_time - start_time) / iterations
print(f"Average time per document: {avg_time*1000:.2f} ms")
print(f"Total time for {iterations} docs: {end_time - start_time:.4f} s")
# 2. Benchmark Binary Encoding
print("\n--- Binary Encoding (BinaryHDV) ---")
encoder = TextEncoder(dimension=10000)
start_time = time.time()
iterations = 50
for _ in range(iterations):
# Clear cache to measure raw encoding speed?
# Or measure with cache to see benefit?
# Real usage is with cache, but "first load" matters too.
# Let's keep cache for now as it's the default behavior.
_ = encoder.encode(encoded_text)
end_time = time.time()
avg_time = (end_time - start_time) / iterations
print(f"Average time per document (with cache): {avg_time*1000:.2f} ms")
# Measure without cache hit (force new tokens)
print("\n--- Binary Encoding (Cold Cache equivalent) ---")
start_time = time.time()
iterations = 10
for i in range(iterations):
# Unique tokens every time
unique_text = f"{encoded_text} {i}"
_ = encoder.encode(unique_text)
end_time = time.time()
avg_time = (end_time - start_time) / iterations
print(f"Average time per document (unique suffix): {avg_time*1000:.2f} ms")
if __name__ == "__main__":
benchmark()
|