Spaces:

EmTpro01
/

Python-code-Genration

Runtime error

App Files Files Community

EmTpro01 commited on Nov 2, 2024

Commit

bd1c7d4

verified ·

1 Parent(s): 746a37e

Update app.py

Browse files

Files changed (1) hide show

app.py +103 -40

app.py CHANGED Viewed

@@ -1,58 +1,116 @@
 import gradio as gr
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
-from peft import PeftModel, PeftConfig
-def load_model_with_lora(base_model_name, lora_path):
     """
-    Load base model and merge it with LoRA adapter
     """
-    # Load base model
-    base_model = AutoModelForCausalLM.from_pretrained(
-        base_model_name,
-        torch_dtype=torch.float16,
-        device_map="auto"
-    )
-    # Load and merge LoRA adapter
-    model = PeftModel.from_pretrained(base_model, lora_path)
-    model = model.merge_and_unload() # Merge adapter weights with base model
-    return model
-def load_tokenizer(base_model_name):
     """
-    Load tokenizer for the base model
     """
-    return AutoTokenizer.from_pretrained(base_model_name)
 def generate_code(prompt, model, tokenizer, max_length=512, temperature=0.7):
     """
     Generate code based on the prompt
     """
-    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-    outputs = model.generate(
-        **inputs,
-        max_length=max_length,
-        temperature=temperature,
-        do_sample=True,
-        pad_token_id=tokenizer.eos_token_id
-    )
-    return tokenizer.decode(outputs[0], skip_special_tokens=True)
 # Initialize model and tokenizer
-BASE_MODEL_NAME = "unsloth/Llama-3.2-3B-bnb-4bit"  # Replace with your base model name
-LORA_PATH = "EmTpro01/Llama-3.2-3B-peft"  # Replace with your LoRA adapter path
-model = load_model_with_lora(BASE_MODEL_NAME, LORA_PATH)
-tokenizer = load_tokenizer(BASE_MODEL_NAME)
-# Create Gradio interface
 def gradio_generate(prompt, temperature, max_length):
-    return generate_code(prompt, model, tokenizer, max_length, temperature)
 demo = gr.Interface(
     fn=gradio_generate,
     inputs=[
@@ -76,9 +134,14 @@ demo = gr.Interface(
             label="Max Length"
         )
     ],
-    outputs=gr.Code(language="python", label="Generated Code"),
-    title="Code Generation with LoRA",
-    description="Enter a prompt to generate code using a fine-tuned model with LoRA adapters",
 )
 if __name__ == "__main__":

 import gradio as gr
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+from peft import PeftModel
+import logging
+import os
+from huggingface_hub import snapshot_download
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def download_lora_weights():
+    """Download LoRA weights from Hugging Face"""
+    return snapshot_download(
+        repo_id="EmTpro01/Llama-3.2-3B-peft",
+        allow_patterns=["adapter_config.json", "adapter_model.bin"],
+    )
+def load_model_with_lora():
     """
+    Load Llama model and merge it with LoRA adapter
     """
+    try:
+        # Configure quantization
+        bnb_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_compute_dtype=torch.float16
+        )
+        # Load base model
+        base_model = AutoModelForCausalLM.from_pretrained(
+            "unsloth/llama-3.2-3b-bnb-4bit",
+            quantization_config=bnb_config,
+            device_map="auto",
+            trust_remote_code=True
+        )
+        logger.info("Successfully loaded base model")
+        # Download and load LoRA adapter
+        lora_path = download_lora_weights()
+        logger.info(f"Downloaded LoRA weights to: {lora_path}")
+        # Load and merge LoRA adapter
+        model = PeftModel.from_pretrained(base_model, lora_path)
+        logger.info("Successfully loaded LoRA adapter")
+        # For inference, we can merge the LoRA weights with the base model
+        model = model.merge_and_unload()
+        logger.info("Successfully merged LoRA weights with base model")
+        return model
+    except Exception as e:
+        logger.error(f"Error loading model: {str(e)}")
+        raise RuntimeError(f"Failed to load model: {str(e)}")
+def load_tokenizer():
     """
+    Load tokenizer for the Llama model
     """
+    try:
+        tokenizer = AutoTokenizer.from_pretrained("unsloth/llama-3.2-3b-bnb-4bit")
+        logger.info("Successfully loaded tokenizer")
+        return tokenizer
+    except Exception as e:
+        logger.error(f"Error loading tokenizer: {str(e)}")
+        raise RuntimeError(f"Failed to load tokenizer: {str(e)}")
 def generate_code(prompt, model, tokenizer, max_length=512, temperature=0.7):
     """
     Generate code based on the prompt
     """
+    try:
+        # Add any specific prompt template if needed
+        formatted_prompt = f"### Instruction: Write code for the following task:\n{prompt}\n\n### Response:"
+        inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
+        outputs = model.generate(
+            **inputs,
+            max_length=max_length,
+            temperature=temperature,
+            do_sample=True,
+            top_p=0.95,
+            top_k=50,
+            repetition_penalty=1.1,
+            pad_token_id=tokenizer.eos_token_id
+        )
+        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Extract only the response part
+        response = generated_text.split("### Response:")[-1].strip()
+        return response
+    except Exception as e:
+        logger.error(f"Error during code generation: {str(e)}")
+        return f"Error generating code: {str(e)}"
 # Initialize model and tokenizer
+logger.info("Starting model initialization...")
+model = load_model_with_lora()
+tokenizer = load_tokenizer()
+logger.info("Model initialization completed successfully")
+# Create Gradio interface with error handling
 def gradio_generate(prompt, temperature, max_length):
+    try:
+        return generate_code(prompt, model, tokenizer, max_length, temperature)
+    except Exception as e:
+        return f"Error: {str(e)}"
+# Create the Gradio interface
 demo = gr.Interface(
     fn=gradio_generate,
     inputs=[
             label="Max Length"
         )
     ],
+    outputs=gr.Code(label="Generated Code"),
+    title="Llama Code Generation with LoRA",
+    description="Enter a prompt to generate code using Llama 3.2 3B model fine-tuned with LoRA",
+    examples=[
+        ["Write a Python function to sort a list of numbers in ascending order"],
+        ["Create a simple REST API using FastAPI that handles GET and POST requests"],
+        ["Write a function to check if a string is a palindrome"]
+    ]
 )
 if __name__ == "__main__":