Spaces:

FlameF0X
/

lfm2

Running

File size: 3,324 Bytes

56ccbf2
0d4961f
56ccbf2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d4961f
56ccbf2
0d4961f
13f0e11
0d4961f
56ccbf2
 
cb13191
a7254c4
 
cb13191
a7254c4
 
 
 
cb13191
56ccbf2
0d4961f
56ccbf2
 
 
22c0038
56ccbf2
0d4961f
56ccbf2
 
 
 
13f0e11
56ccbf2
13f0e11
 
 
cb13191
13f0e11
56ccbf2
cb13191
13f0e11
cb13191
 
 
 
 
0d4961f
 
 
 
 
 
 
cb13191
 
 
 
 
 
0d4961f
56ccbf2
 
 
0d4961f
a7254c4

import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
import torch
from threading import Thread
MODEL_NAMES = {
    "LFM2-350M": "LiquidAI/LFM2-350M",
    "LFM2-700M": "LiquidAI/LFM2-700M",
    "LFM2-1.2B": "LiquidAI/LFM2-1.2B",
    "LFM2-2.6B": "LiquidAI/LFM2-2.6B",
    "LFM2-8B-A1B": "LiquidAI/LFM2-8B-A1B",
}
model_cache = {}
def load_model(model_key):
    if model_key in model_cache:
        return model_cache[model_key]
    model_name = MODEL_NAMES[model_key]
    print(f"Loading {model_name}...")
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        dtype=torch.float16 if device == "cuda" else torch.float32,
    ).to(device)
    model_cache[model_key] = (tokenizer, model)
    return tokenizer, model
def chat_with_model(message, history, model_choice):
    tokenizer, model = load_model(model_choice)
    device = model.device
    prompt = "You are LFM2, an intelligent and conversational AI assistant designed to help users with questions, problem-solving, and creative tasks. You communicate clearly, reason carefully, and explain your thoughts in an easy-to-understand way. Stay friendly, professional, and curious. If the user's request is ambiguous, ask clarifying questions before proceeding."
    for msg in history:
        role = msg["role"]
        content = msg["content"]
        prompt += f"{role.capitalize()}: {content}\n"
    prompt += f"User: {message}\nAssistant:"
    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    generation_kwargs = dict(
        **inputs,
        streamer=streamer,
        max_new_tokens=2048,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
    )
    thread = Thread(target=model.generate, kwargs=generation_kwargs)
    thread.start()
    partial_text = ""
    for new_text in streamer:
        partial_text += new_text
        yield history + [
            {"role": "user", "content": message},
            {"role": "assistant", "content": partial_text},
        ]
def create_demo():
    with gr.Blocks(title="LiquidAI Chat Playground") as demo:
        gr.Markdown("## 💧 LiquidAI Chat Playground")
        model_choice = gr.Dropdown(
            label="Select Model",
            choices=list(MODEL_NAMES.keys()),
            value="LFM2-1.2B"
        )
        chatbot = gr.Chatbot(
            label="Chat with LiquidAI",
            type="messages",
            height=450
        )
        msg = gr.Textbox(label="Your message", placeholder="Type something...")
        clear = gr.Button("Clear")
        def add_user_message(user_message, chat_history):
            chat_history = chat_history + [{"role": "user", "content": user_message}]
            return "", chat_history
        msg.submit(add_user_message, [msg, chatbot], [msg, chatbot], queue=False).then(
            chat_with_model, [msg, chatbot, model_choice], chatbot
        )
        clear.click(lambda: [], None, chatbot, queue=False)
    return demo
if __name__ == "__main__":
    demo = create_demo()
    demo.queue()
    demo.launch(server_name="0.0.0.0", server_port=7860)