nabeelshan commited on
Commit
8f8386b
·
verified ·
1 Parent(s): 30b3344

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -14
app.py CHANGED
@@ -23,10 +23,7 @@ PPO_SUBFOLDER = "ppo_aligned_final"
23
  # Set device for inference (GPU if available, otherwise CPU)
24
  DEVICE = 0 if torch.cuda.is_available() else -1
25
 
26
- # --- 2. Load Models and Tokenizers ---
27
- print("Loading models... This may take a moment.")
28
-
29
- # Load the Supervised Fine-Tuned (SFT) model - our "before" model
30
  sft_model = AutoModelForCausalLM.from_pretrained(MODEL_ID, subfolder=SFT_SUBFOLDER)
31
  sft_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, subfolder=SFT_SUBFOLDER)
32
 
@@ -34,8 +31,6 @@ sft_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, subfolder=SFT_SUBFOLDER)
34
  ppo_model = AutoModelForCausalLM.from_pretrained(MODEL_ID, subfolder=PPO_SUBFOLDER)
35
  ppo_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, subfolder=PPO_SUBFOLDER)
36
 
37
- print("Models loaded successfully!")
38
-
39
  # --- 3. Create Text Generation Pipelines ---
40
  # Create a pipeline for each model to simplify text generation
41
  sft_pipeline = pipeline("text-generation", model=sft_model, tokenizer=sft_tokenizer, device=DEVICE)
@@ -47,7 +42,9 @@ def generate_responses(prompt):
47
  """
48
  Generates responses from both the SFT and PPO models for a given prompt.
49
  """
50
- print(f"Received prompt: {prompt}")
 
 
51
 
52
  # Common generation parameters
53
  generation_kwargs = {
@@ -60,15 +57,14 @@ def generate_responses(prompt):
60
  }
61
 
62
  # Generate from SFT model
63
- sft_output = sft_pipeline(prompt, **generation_kwargs)
64
- sft_response = sft_output[0]['generated_text']
 
65
 
66
  # Generate from PPO model
67
- ppo_output = ppo_pipeline(prompt, **generation_kwargs)
68
- ppo_response = ppo_output[0]['generated_text']
69
-
70
- print(f"SFT Response: {sft_response}")
71
- print(f"PPO Response: {ppo_response}")
72
 
73
  return sft_response, ppo_response
74
 
 
23
  # Set device for inference (GPU if available, otherwise CPU)
24
  DEVICE = 0 if torch.cuda.is_available() else -1
25
 
26
+ # 2. Load the Supervised Fine-Tuned (SFT) model - our "before" model
 
 
 
27
  sft_model = AutoModelForCausalLM.from_pretrained(MODEL_ID, subfolder=SFT_SUBFOLDER)
28
  sft_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, subfolder=SFT_SUBFOLDER)
29
 
 
31
  ppo_model = AutoModelForCausalLM.from_pretrained(MODEL_ID, subfolder=PPO_SUBFOLDER)
32
  ppo_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, subfolder=PPO_SUBFOLDER)
33
 
 
 
34
  # --- 3. Create Text Generation Pipelines ---
35
  # Create a pipeline for each model to simplify text generation
36
  sft_pipeline = pipeline("text-generation", model=sft_model, tokenizer=sft_tokenizer, device=DEVICE)
 
42
  """
43
  Generates responses from both the SFT and PPO models for a given prompt.
44
  """
45
+
46
+ # Format the user's prompt into the required structure
47
+ formatted_prompt = f"### Instruction:\n{prompt}\n\n### Response:"
48
 
49
  # Common generation parameters
50
  generation_kwargs = {
 
57
  }
58
 
59
  # Generate from SFT model
60
+ sft_output_full = sft_pipeline(formatted_prompt, **generation_kwargs)
61
+ # The pipeline returns the prompt + generated text, so we strip the prompt part
62
+ sft_response = sft_output_full[0]['generated_text'][len(formatted_prompt):].strip()
63
 
64
  # Generate from PPO model
65
+ ppo_output_full = ppo_pipeline(formatted_prompt, **generation_kwargs)
66
+ # The pipeline returns the prompt + generated text, so we strip the prompt part
67
+ ppo_response = ppo_output_full[0]['generated_text'][len(formatted_prompt):].strip()
 
 
68
 
69
  return sft_response, ppo_response
70