| import argparse |
| import os |
|
|
| import torch |
| from transformers import AutoTokenizer |
| from transformers import AutoModelForSeq2SeqLM |
| from peft import PeftModel |
|
|
| from prompting import encode_prompt |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser(description="Generate SQL from a question + db_id using the RLHF model.") |
| parser.add_argument("--question", type=str, required=True) |
| parser.add_argument("--db_id", type=str, required=True) |
| parser.add_argument("--model_dir", type=str, default=None, help="Defaults to outputs/rlhf_text2sql") |
| parser.add_argument("--use_schema", action="store_true", help="Include schema in the prompt (must match training).") |
| parser.add_argument("--max_schema_chars", type=int, default=1500) |
| parser.add_argument("--max_new_tokens", type=int, default=80) |
| args = parser.parse_args() |
|
|
| device = "mps" if torch.backends.mps.is_available() else "cpu" |
|
|
| project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
| adapter_dir = args.model_dir or os.path.join(project_root, "outputs", "rlhf_text2sql") |
| base_model = os.environ.get("BASE_MODEL", "t5-small") |
| fallback_base_model = os.path.join(project_root, "models", "t5_spider_sft") |
| if not os.path.isdir(base_model) and os.path.isdir(fallback_base_model): |
| base_model = fallback_base_model |
|
|
| local_only = not os.path.isdir(base_model) |
| tokenizer_source = adapter_dir if os.path.isdir(adapter_dir) else base_model |
| tokenizer = AutoTokenizer.from_pretrained(tokenizer_source, local_files_only=not os.path.isdir(tokenizer_source)) |
| base = AutoModelForSeq2SeqLM.from_pretrained(base_model, local_files_only=local_only).to(device) |
| model = PeftModel.from_pretrained(base, adapter_dir).to(device) |
| |
| model = model.merge_and_unload() |
| model.config.use_cache = False |
|
|
| if tokenizer.pad_token_id is None and tokenizer.eos_token_id is not None: |
| tokenizer.pad_token = tokenizer.eos_token |
|
|
| input_ids = encode_prompt( |
| tokenizer, |
| args.question, |
| args.db_id, |
| device=device, |
| max_input_tokens=512, |
| ) |
|
|
| gen_kwargs = dict( |
| max_new_tokens=args.max_new_tokens, |
| do_sample=False, |
| num_beams=1, |
| early_stopping=True, |
| pad_token_id=tokenizer.pad_token_id, |
| eos_token_id=tokenizer.eos_token_id, |
| ) |
|
|
| with torch.no_grad(): |
| out = model.generate(input_ids=input_ids.unsqueeze(0), **gen_kwargs) |
|
|
| sql = tokenizer.decode(out[0], skip_special_tokens=True).strip() |
| print(sql) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|