Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -23,10 +23,7 @@ PPO_SUBFOLDER = "ppo_aligned_final"
|
|
| 23 |
# Set device for inference (GPU if available, otherwise CPU)
|
| 24 |
DEVICE = 0 if torch.cuda.is_available() else -1
|
| 25 |
|
| 26 |
-
#
|
| 27 |
-
print("Loading models... This may take a moment.")
|
| 28 |
-
|
| 29 |
-
# Load the Supervised Fine-Tuned (SFT) model - our "before" model
|
| 30 |
sft_model = AutoModelForCausalLM.from_pretrained(MODEL_ID, subfolder=SFT_SUBFOLDER)
|
| 31 |
sft_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, subfolder=SFT_SUBFOLDER)
|
| 32 |
|
|
@@ -34,8 +31,6 @@ sft_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, subfolder=SFT_SUBFOLDER)
|
|
| 34 |
ppo_model = AutoModelForCausalLM.from_pretrained(MODEL_ID, subfolder=PPO_SUBFOLDER)
|
| 35 |
ppo_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, subfolder=PPO_SUBFOLDER)
|
| 36 |
|
| 37 |
-
print("Models loaded successfully!")
|
| 38 |
-
|
| 39 |
# --- 3. Create Text Generation Pipelines ---
|
| 40 |
# Create a pipeline for each model to simplify text generation
|
| 41 |
sft_pipeline = pipeline("text-generation", model=sft_model, tokenizer=sft_tokenizer, device=DEVICE)
|
|
@@ -47,7 +42,9 @@ def generate_responses(prompt):
|
|
| 47 |
"""
|
| 48 |
Generates responses from both the SFT and PPO models for a given prompt.
|
| 49 |
"""
|
| 50 |
-
|
|
|
|
|
|
|
| 51 |
|
| 52 |
# Common generation parameters
|
| 53 |
generation_kwargs = {
|
|
@@ -60,15 +57,14 @@ def generate_responses(prompt):
|
|
| 60 |
}
|
| 61 |
|
| 62 |
# Generate from SFT model
|
| 63 |
-
|
| 64 |
-
|
|
|
|
| 65 |
|
| 66 |
# Generate from PPO model
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
print(f"SFT Response: {sft_response}")
|
| 71 |
-
print(f"PPO Response: {ppo_response}")
|
| 72 |
|
| 73 |
return sft_response, ppo_response
|
| 74 |
|
|
|
|
| 23 |
# Set device for inference (GPU if available, otherwise CPU)
|
| 24 |
DEVICE = 0 if torch.cuda.is_available() else -1
|
| 25 |
|
| 26 |
+
# 2. Load the Supervised Fine-Tuned (SFT) model - our "before" model
|
|
|
|
|
|
|
|
|
|
| 27 |
sft_model = AutoModelForCausalLM.from_pretrained(MODEL_ID, subfolder=SFT_SUBFOLDER)
|
| 28 |
sft_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, subfolder=SFT_SUBFOLDER)
|
| 29 |
|
|
|
|
| 31 |
ppo_model = AutoModelForCausalLM.from_pretrained(MODEL_ID, subfolder=PPO_SUBFOLDER)
|
| 32 |
ppo_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, subfolder=PPO_SUBFOLDER)
|
| 33 |
|
|
|
|
|
|
|
| 34 |
# --- 3. Create Text Generation Pipelines ---
|
| 35 |
# Create a pipeline for each model to simplify text generation
|
| 36 |
sft_pipeline = pipeline("text-generation", model=sft_model, tokenizer=sft_tokenizer, device=DEVICE)
|
|
|
|
| 42 |
"""
|
| 43 |
Generates responses from both the SFT and PPO models for a given prompt.
|
| 44 |
"""
|
| 45 |
+
|
| 46 |
+
# Format the user's prompt into the required structure
|
| 47 |
+
formatted_prompt = f"### Instruction:\n{prompt}\n\n### Response:"
|
| 48 |
|
| 49 |
# Common generation parameters
|
| 50 |
generation_kwargs = {
|
|
|
|
| 57 |
}
|
| 58 |
|
| 59 |
# Generate from SFT model
|
| 60 |
+
sft_output_full = sft_pipeline(formatted_prompt, **generation_kwargs)
|
| 61 |
+
# The pipeline returns the prompt + generated text, so we strip the prompt part
|
| 62 |
+
sft_response = sft_output_full[0]['generated_text'][len(formatted_prompt):].strip()
|
| 63 |
|
| 64 |
# Generate from PPO model
|
| 65 |
+
ppo_output_full = ppo_pipeline(formatted_prompt, **generation_kwargs)
|
| 66 |
+
# The pipeline returns the prompt + generated text, so we strip the prompt part
|
| 67 |
+
ppo_response = ppo_output_full[0]['generated_text'][len(formatted_prompt):].strip()
|
|
|
|
|
|
|
| 68 |
|
| 69 |
return sft_response, ppo_response
|
| 70 |
|