| | import gradio as gr |
| | import torch |
| | from transformers import AutoTokenizer, AutoModelForCausalLM |
| | from peft import PeftModel |
| |
|
| | |
| | BASE_MODEL = "bigcode/starcoder2-3b" |
| | ADAPTER_REPO = "simnJS/autotrain-fxp6j-p5s8i" |
| |
|
| | |
| | tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) |
| |
|
| | |
| | base_model = AutoModelForCausalLM.from_pretrained( |
| | BASE_MODEL, |
| | torch_dtype=torch.float16, |
| | device_map="auto" |
| | ) |
| |
|
| | |
| | model = PeftModel.from_pretrained( |
| | base_model, |
| | ADAPTER_REPO, |
| | torch_dtype=torch.float16 |
| | ) |
| |
|
| | |
| | def generate_answer(user_message, history): |
| | """ |
| | user_message: le dernier message de l'utilisateur |
| | history: liste de tuples (message_utilisateur, réponse_modèle) |
| | """ |
| | |
| | |
| | prompt = user_message |
| |
|
| | |
| | inputs = tokenizer(prompt, return_tensors="pt").to(model.device) |
| | |
| | outputs = model.generate( |
| | **inputs, |
| | max_new_tokens=100, |
| | temperature=0.7, |
| | do_sample=True, |
| | top_p=0.9 |
| | ) |
| | answer = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| | |
| | history.append((user_message, answer)) |
| | return history, history |
| |
|
| | |
| | with gr.Blocks() as demo: |
| | gr.Markdown("# Chat avec mon modèle LoRA Verse") |
| | chatbot = gr.Chatbot() |
| | msg = gr.Textbox(label="Tapez votre message ici...") |
| | state = gr.State([]) |
| |
|
| | def submit_message(user_message, history): |
| | return generate_answer(user_message, history) |
| |
|
| | msg.submit(submit_message, inputs=[msg, state], outputs=[chatbot, state]) |
| | |
| | |
| | |
| |
|
| | demo.launch() |
| |
|