import os import time import spaces import torch from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer import gradio as gr from threading import Thread from huggingface_hub import login from icrawler.builtin import BingImageCrawler MODEL_LIST = ["mistralai/Mistral-Nemo-Instruct-2407"] HF_TOKEN = os.environ.get("HF_TOKEN", None) login(token=HF_TOKEN) MODEL = "mistralai/Mistral-Nemo-Instruct-2407" TITLE = "

Mistral-Nemo

" PLACEHOLDER = """

The Mistral-Nemo is a pretrained generative text model of 12B parameters trained jointly by Mistral AI and NVIDIA.

""" CSS = """ .duplicate-button { margin: auto !important; color: white !important; background: black !important; border-radius: 100vh !important; } h3 { text-align: center; } #output_video { display: block; margin-left: auto!important; margin-right: auto !important; width: 20vw !important; } footer{visibility: hidden} """ device = "cuda" # or "cpu" # Recommended flag for this tokenizer tokenizer = AutoTokenizer.from_pretrained(MODEL, fix_mistral_regex=True) model = AutoModelForCausalLM.from_pretrained( MODEL, dtype=torch.bfloat16, # torch_dtype is deprecated in newer transformers device_map="auto", ignore_mismatched_sizes=True, ) def _system_prompt_for(name: str) -> str: return ( f"You should respond like {name}. " "You should have a meaningful conversation. Don't repeat yourself. " "You should only output your response. " "You don't need to put quotes around what you're saying. " "You don't need to put your name at the beginning of your response." ) def normalize_history(history): """ Gradio may send messages where `content` is a list of rich parts: {"role": "assistant", "content": [{"type": "text", "text": "hello"}]} We convert everything into: {"role": ..., "content": "plain string"} """ if history is None: return [] norm = [] for msg in history: role = msg.get("role", "user") content = msg.get("content", "") if isinstance(content, list): # e.g. [{"type":"text","text":"..."}, ...] parts = [] for part in content: if isinstance(part, dict) and part.get("type") == "text": parts.append(part.get("text", "")) else: parts.append(str(part)) content = "\n".join(parts) else: content = str(content) norm.append({"role": role, "content": content}) return norm @spaces.GPU() def get_response(conversation): """ conversation: list of {"role": "system" | "user" | "assistant", "content": str} """ temperature = 0.3 max_new_tokens = 512 top_p = 1.0 top_k = 20 penalty = 1.2 input_text = tokenizer.apply_chat_template(conversation, tokenize=False) inputs = tokenizer.encode(input_text, return_tensors="pt").to(device) streamer = TextIteratorStreamer( tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True, ) generate_kwargs = dict( input_ids=inputs, max_new_tokens=max_new_tokens, do_sample=False if temperature == 0 else True, top_p=top_p, top_k=top_k, temperature=temperature, streamer=streamer, repetition_penalty=penalty, pad_token_id=10, ) with torch.no_grad(): thread = Thread(target=model.generate, kwargs=generate_kwargs) thread.start() buffer = "" for new_text in streamer: buffer += new_text return buffer @spaces.GPU() def stream_chat(history, character_a, character_b): """ history: list of messages (messages format): [{"role": "user" | "assistant", "content": ...}, ...] In the UI: - user messages = Character B - assistant messages = Character A Each click: 1. B says something new (as 'user') 2. A replies (as 'assistant') """ # 🔑 Normalize history coming from Gradio into plain strings history = normalize_history(history) # ---------- B speaks (user side) ---------- if len(history) == 0: # First turn: B introduces themselves to A b_user_prompt = ( f"You are {character_b}. You are having a conversation with {character_a}. " "Introduce yourself and start the conversation." ) else: # Last assistant message (A) to respond to last_msg = history[-1] last_text = last_msg["content"] b_user_prompt = ( f"{character_a} just said: \"{last_text}\". " f"Respond in character as {character_b} and continue the conversation." ) conv_for_b = [ {"role": "system", "content": _system_prompt_for(character_b)}, *history, {"role": "user", "content": b_user_prompt}, ] response_b = get_response(conv_for_b) print("response_b:", response_b) # ---------- A speaks (assistant side) ---------- conv_for_a = [ {"role": "system", "content": _system_prompt_for(character_a)}, *history, {"role": "user", "content": response_b}, ] response_a = get_response(conv_for_a) print("response_a:", response_a) # ---------- Append to chat history ---------- new_history = history + [ {"role": "user", "content": response_b}, # B's line {"role": "assistant", "content": response_a}, # A's line ] print("history:", new_history) return new_history def get_img(keyword): path = "./" + keyword os.makedirs(path, exist_ok=True) bing_crawler = BingImageCrawler(storage={"root_dir": path}) bing_crawler.crawl(keyword=keyword, max_num=1) for file_name in os.listdir(path): if file_name.lower().endswith( (".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff") ): return os.path.join(path, file_name) return None def set_characters(a, b): img_a = get_img(a) img_b = get_img(b) # avatar_images=(user_avatar, assistant_avatar) => (B, A) # also reset chat history when characters change return img_a, img_b, gr.update(avatar_images=(img_b, img_a), value=[]) chatbot = gr.Chatbot(height=600, show_label=False) theme = gr.themes.Base().set( body_background_fill="#e1fceb", color_accent_soft="#ffffff", border_color_accent="#e1fceb", border_color_primary="#e1fceb", background_fill_secondary="#e1fceb", button_secondary_background_fill="#ffffff", button_primary_background_fill="#ffffff", button_primary_text_color="#1f2937", input_background_fill="#f8f8f8", ) with gr.Blocks() as demo: gr.HTML( """

Bot vs Bot

by Tony Assi

Pick two icons and watch them have a conversation

""" ) with gr.Row(): character_a = gr.Textbox( label="Character A", info="Choose a person", placeholder="Socrates, Edgar Allen Poe, George Washington", ) character_b = gr.Textbox( label="Character B", info="Choose a person", placeholder="Madonna, Paris Hilton, Liza Minnelli", ) character_button = gr.Button("Initiate Characters") with gr.Row(): image_a = gr.Image(show_label=False, interactive=False) gr.Markdown(" ") image_b = gr.Image(show_label=False, interactive=False) # No 'type' kwarg – your Gradio build doesn't support it, but it *does* use messages format chat = gr.Chatbot(show_label=False) submit_button = gr.Button("Start Conversation") character_button.click( set_characters, inputs=[character_a, character_b], outputs=[image_a, image_b, chat], ) submit_button.click( stream_chat, inputs=[chat, character_a, character_b], outputs=[chat], ) if __name__ == "__main__": demo.launch(css=CSS, theme=theme)