import os import json import sys from huggingface_hub import snapshot_download import subprocess # 1. Setup Configuration Paths HOME = os.environ.get("HOME", "/home/user") CONFIG_FILE = os.path.join(HOME, "magic-pdf.json") MODEL_DIR = os.path.join(HOME, "models") print("--- Starting MinerU Setup ---") # 2. Download Models (if not present) if not os.path.exists(MODEL_DIR): print(f"Downloading models to {MODEL_DIR}...") try: snapshot_download( "opendatalab/PDF-Extract-Kit-1.0", local_dir=MODEL_DIR, max_workers=4 ) print("Model download complete.") except Exception as e: print(f"Error downloading models: {e}") sys.exit(1) else: print("Models found. Skipping download.") # 3. Generate magic-pdf.json Config # FIX: Define REAL_MODEL_DIR before using it REAL_MODEL_DIR = os.path.join(MODEL_DIR, "models") config_data = { "models-dir": REAL_MODEL_DIR, # Now this variable exists "device-mode": "cpu", "table-config": { "model": "TableMaster", "is_table_recog_enable": False, "max_time": 400 } } print(f"Writing configuration to {CONFIG_FILE}...") with open(CONFIG_FILE, "w") as f: json.dump(config_data, f, indent=4) # 4. Launch the MinerU REST API print("Launching MinerU REST API...") command = [ "mineru-api", "--host", "0.0.0.0", "--port", "7860" ] subprocess.run(command)