yangluo
/

univer-api-training-scripts

Model card Files Files and versions

xet

Community

yangluo commited on 27 days ago

Commit

fafdd6c

verified ·

1 Parent(s): 72b4aee

Upload merge_hf_cloud.py with huggingface_hub

Browse files

Files changed (1) hide show

merge_hf_cloud.py +134 -0

merge_hf_cloud.py ADDED Viewed

	@@ -0,0 +1,134 @@

+#!/usr/bin/env python3
+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "torch",
+#     "peft>=0.18.0",
+#     "transformers>=5.1.0",
+#     "accelerate>=1.6.0",
+#     "huggingface_hub",
+# ]
+# ///
+"""
+Merge LoRA Adapter and Push to Hub (HF Cloud)
+This script is designed to run on Hugging Face Jobs infrastructure.
+It loads a base model + LoRA adapter, merges them, and pushes the
+standalone merged model to the Hub.
+Usage:
+    # Submit via Makefile
+    make merge-cloud
+    # Or submit directly
+    python scripts/submit_cloud_training.py \
+        --flavor a100x4 --timeout 2h \
+        --script scripts/merge_hf_cloud.py
+"""
+import os
+import torch
+from huggingface_hub import login
+from peft import PeftModel
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# ═══════════════════════════════════════════════════════════════
+# CONFIGURATION
+# ═══════════════════════════════════════════════════════════════
+BASE_MODEL = "Qwen/Qwen3-Coder-Next"
+ADAPTER_MODEL = "yangluo/univer-api-qwen3-next-coder"
+MERGED_MODEL_ID = "yangluo/univer-api-qwen3-next-coder-merged"
+# ═══════════════════════════════════════════════════════════════
+# MERGE SCRIPT
+# ═══════════════════════════════════════════════════════════════
+def main():
+    # Login to Hugging Face Hub
+    hf_token = os.environ.get("HF_TOKEN")
+    if hf_token:
+        print("Logging in to Hugging Face Hub...")
+        login(token=hf_token, add_to_git_credential=False)
+        print("  Login successful!")
+    else:
+        print("ERROR: HF_TOKEN not found in environment!")
+        print("Cannot push merged model without authentication.")
+        return 1
+    print("=" * 60)
+    print("    MERGE LoRA ADAPTER & PUSH TO HUB")
+    print("=" * 60)
+    print(f"Base model:    {BASE_MODEL}")
+    print(f"LoRA adapter:  {ADAPTER_MODEL}")
+    print(f"Merged output: {MERGED_MODEL_ID}")
+    print(f"Dtype:         bfloat16 (full precision, no quantization)")
+    print("=" * 60)
+    # ─────────────────────────────────────────────────────
+    # Load Tokenizer
+    # ─────────────────────────────────────────────────────
+    print("\n[1/5] Loading tokenizer...")
+    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
+    print(f"  Tokenizer loaded: {tokenizer.__class__.__name__}")
+    # ─────────────────────────────────────────────────────
+    # Load Base Model (bf16, no quantization)
+    # ─────────────────────────────────────────────────────
+    print(f"\n[2/5] Loading base model: {BASE_MODEL}")
+    print("  Loading in bfloat16 with device_map='auto'...")
+    n_gpus = torch.cuda.device_count()
+    print(f"  Detected {n_gpus} GPU(s)")
+    model = AutoModelForCausalLM.from_pretrained(
+        BASE_MODEL,
+        torch_dtype=torch.bfloat16,
+        device_map="auto",
+        trust_remote_code=True,
+    )
+    if torch.cuda.is_available():
+        allocated = torch.cuda.memory_allocated() / 1024**3
+        print(f"  Base model loaded, GPU memory: {allocated:.1f} GB")
+    # ─────────────────────────────────────────────────────
+    # Load LoRA Adapter
+    # ─────────────────────────────────────────────────────
+    print(f"\n[3/5] Loading LoRA adapter: {ADAPTER_MODEL}")
+    model = PeftModel.from_pretrained(model, ADAPTER_MODEL)
+    print("  Adapter loaded successfully")
+    # ─────────────────────────────────────────────────────
+    # Merge and Unload
+    # ─────────────────────────────────────────────────────
+    print("\n[4/5] Merging adapter into base model...")
+    model = model.merge_and_unload()
+    print("  Merge complete!")
+    if torch.cuda.is_available():
+        allocated = torch.cuda.memory_allocated() / 1024**3
+        print(f"  GPU memory after merge: {allocated:.1f} GB")
+    # ─────────────────────────────────────────────────────
+    # Push to Hub
+    # ─────────────────────────────────────────────────────
+    print(f"\n[5/5] Pushing merged model to Hub: {MERGED_MODEL_ID}")
+    print("  Pushing model (this may take a while for large models)...")
+    model.push_to_hub(MERGED_MODEL_ID)
+    print("  Model pushed!")
+    print("  Pushing tokenizer...")
+    tokenizer.push_to_hub(MERGED_MODEL_ID)
+    print("  Tokenizer pushed!")
+    print("\n" + "=" * 60)
+    print("         MERGE COMPLETE!")
+    print("=" * 60)
+    print(f"Merged model: https://huggingface.co/{MERGED_MODEL_ID}")
+if __name__ == "__main__":
+    exit(main() or 0)