Spaces:

nabeelshan
/

rlhf-gpt2-demo

Sleeping

App Files Files Community

nabeelshan commited on Sep 26

Commit

8f8386b

verified ·

1 Parent(s): 30b3344

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -14

app.py CHANGED Viewed

@@ -23,10 +23,7 @@ PPO_SUBFOLDER = "ppo_aligned_final"
 # Set device for inference (GPU if available, otherwise CPU)
 DEVICE = 0 if torch.cuda.is_available() else -1
-# --- 2. Load Models and Tokenizers ---
-print("Loading models... This may take a moment.")
-# Load the Supervised Fine-Tuned (SFT) model - our "before" model
 sft_model = AutoModelForCausalLM.from_pretrained(MODEL_ID, subfolder=SFT_SUBFOLDER)
 sft_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, subfolder=SFT_SUBFOLDER)
@@ -34,8 +31,6 @@ sft_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, subfolder=SFT_SUBFOLDER)
 ppo_model = AutoModelForCausalLM.from_pretrained(MODEL_ID, subfolder=PPO_SUBFOLDER)
 ppo_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, subfolder=PPO_SUBFOLDER)
-print("Models loaded successfully!")
 # --- 3. Create Text Generation Pipelines ---
 # Create a pipeline for each model to simplify text generation
 sft_pipeline = pipeline("text-generation", model=sft_model, tokenizer=sft_tokenizer, device=DEVICE)
@@ -47,7 +42,9 @@ def generate_responses(prompt):
     """
     Generates responses from both the SFT and PPO models for a given prompt.
     """
-    print(f"Received prompt: {prompt}")
     # Common generation parameters
     generation_kwargs = {
@@ -60,15 +57,14 @@ def generate_responses(prompt):
     }
     # Generate from SFT model
-    sft_output = sft_pipeline(prompt, **generation_kwargs)
-    sft_response = sft_output[0]['generated_text']
     # Generate from PPO model
-    ppo_output = ppo_pipeline(prompt, **generation_kwargs)
-    ppo_response = ppo_output[0]['generated_text']
-    print(f"SFT Response: {sft_response}")
-    print(f"PPO Response: {ppo_response}")
     return sft_response, ppo_response

 # Set device for inference (GPU if available, otherwise CPU)
 DEVICE = 0 if torch.cuda.is_available() else -1
+# 2. Load the Supervised Fine-Tuned (SFT) model - our "before" model
 sft_model = AutoModelForCausalLM.from_pretrained(MODEL_ID, subfolder=SFT_SUBFOLDER)
 sft_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, subfolder=SFT_SUBFOLDER)
 ppo_model = AutoModelForCausalLM.from_pretrained(MODEL_ID, subfolder=PPO_SUBFOLDER)
 ppo_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, subfolder=PPO_SUBFOLDER)
 # --- 3. Create Text Generation Pipelines ---
 # Create a pipeline for each model to simplify text generation
 sft_pipeline = pipeline("text-generation", model=sft_model, tokenizer=sft_tokenizer, device=DEVICE)
     """
     Generates responses from both the SFT and PPO models for a given prompt.
     """
+    # Format the user's prompt into the required structure
+    formatted_prompt = f"### Instruction:\n{prompt}\n\n### Response:"
     # Common generation parameters
     generation_kwargs = {
     }
     # Generate from SFT model
+    sft_output_full = sft_pipeline(formatted_prompt, **generation_kwargs)
+    # The pipeline returns the prompt + generated text, so we strip the prompt part
+    sft_response = sft_output_full[0]['generated_text'][len(formatted_prompt):].strip()
     # Generate from PPO model
+    ppo_output_full = ppo_pipeline(formatted_prompt, **generation_kwargs)
+    # The pipeline returns the prompt + generated text, so we strip the prompt part
+    ppo_response = ppo_output_full[0]['generated_text'][len(formatted_prompt):].strip()
     return sft_response, ppo_response