car_damage / app.py
rayeneouanes's picture
Update app.py
eb62880 verified
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
MODEL_ID = "openbmb/MiniCPM-o-4_5"
tokenizer = None
model = None
def load_model():
global tokenizer, model
if model is not None:
return
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
# GPU recommandé
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
trust_remote_code=True,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto",
)
model.eval()
def chat(user_msg, history):
load_model()
prompt = user_msg
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
out = model.generate(
**inputs,
max_new_tokens=256,
do_sample=True,
temperature=0.7,
top_p=0.9,
)
text = tokenizer.decode(out[0], skip_special_tokens=True)
# On renvoie juste la réponse générée
return text
demo = gr.Interface(
fn=predict,
inputs=gr.Image(type="pil"),
out = gr.Image(type="pil")
btn.click(predict, inputs=inp, outputs=out)
)
if __name__ == "__main__":
demo.launch()