jorgeiv500 commited on
Commit
ac0510e
·
verified ·
1 Parent(s): 30bf3c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +154 -116
app.py CHANGED
@@ -1,5 +1,8 @@
1
- # app.py — DeepSeek-OCR + Med42 (HF conversational) — Gradio 5
2
- # ZeroGPU-safe (sin inicializar CUDA en el proceso principal)
 
 
 
3
 
4
  import os, re, json, tempfile, traceback
5
  import gradio as gr
@@ -7,39 +10,57 @@ import torch
7
  from PIL import Image
8
  from transformers import AutoModel, AutoTokenizer
9
  import spaces
10
- from huggingface_hub import InferenceClient
11
- import requests
12
 
13
  # =========================
14
  # CONFIG (env)
15
  # =========================
16
- LLM_MODEL_ID = os.getenv("BIO_MODEL_ID", "m42-health/Llama3-Med42-8B").strip()
17
- HF_TOKEN = os.getenv("HF_TOKEN")
18
-
19
- # Generación (determinista para obediencia)
20
- GEN_TEMPERATURE = float(os.getenv("GEN_TEMPERATURE", "0.0"))
21
- GEN_TOP_P = float(os.getenv("GEN_TOP_P", "1.0"))
22
- GEN_MAX_NEW_TOKENS = int(os.getenv("GEN_MAX_NEW_TOKENS", "384"))
23
- GEN_TIMEOUT = int(os.getenv("GEN_TIMEOUT", "60")) # s
24
- STOP_SEQS = ["\n###", "\nUser:", "\nAssistant:"]
25
-
26
- # (Opcional) fija una revisión estable del repo DeepSeek-OCR para evitar cambios inesperados
27
- DS_OCR_REV = os.getenv("DS_OCR_REV", None) # e.g., hash de commit
28
-
29
- # Cliente remoto (no toca CUDA aquí)
30
- _hf_client = InferenceClient(model=LLM_MODEL_ID, token=HF_TOKEN, timeout=GEN_TIMEOUT)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  # =========================
33
- # Prompt helpers
34
  # =========================
 
 
 
35
  def _truncate(s: str, n=3000):
36
  s = (s or "")
37
  return s if len(s) <= n else s[:n]
38
 
39
  def _clean_ocr(s: str) -> str:
40
  if not s: return ""
41
- s = re.sub(r'[^\S\r\n]+', ' ', s) # colapsa espacios
42
- s = re.sub(r'(\{#Sec\d+\}|#+\w*)', ' ', s) # anchors/headers raros
43
  s = re.sub(r'\s{2,}', ' ', s)
44
  lines = []
45
  for par in s.splitlines():
@@ -77,103 +98,87 @@ SALIDA_ES:
77
  - Evidencia OCR: "Indicaciones ilegibles"
78
  """.strip()
79
 
80
- def build_chat_messages(chat_msgs, ocr_md, ocr_txt, user_msg):
81
  raw = ocr_md if (ocr_md and ocr_md.strip()) else ocr_txt
82
  ctx = _truncate(_clean_ocr(raw), 3000)
83
- # Construimos el contenido del usuario con el contexto y few-shot
84
  question = (user_msg or "Analiza el CONTEXTO_OCR y resume lo clínicamente relevante en viñetas.").strip()
85
-
86
- user_content = (
87
- f"{FEWSHOT}\n\n### CONTEXTO_OCR\n{(ctx if ctx else '—')}\n\n"
88
  f"### PREGUNTA\n{question}\n\n"
89
- "### SALIDA_ES\n"
90
  )
91
-
92
- msgs = [{"role": "system", "content": SYSTEM_INSTR}]
93
- # opcional: incluir historial como mensajes previos
94
- for m in (chat_msgs or []):
95
- r = m.get("role")
96
- c = (m.get("content") or "").strip()
97
- if not c:
98
- continue
99
- if r == "user":
100
- msgs.append({"role": "user", "content": c})
101
- elif r == "assistant":
102
- msgs.append({"role": "assistant", "content": c})
103
-
104
- msgs.append({"role": "user", "content": user_content})
105
- return msgs
106
 
107
  # =========================
108
- # LLM remoto (Med42) conversational
109
  # =========================
110
- def med42_remote_generate(messages) -> (str, str):
111
- """
112
- 1) InferenceClient.chat.completions.create (task conversacional)
113
- 2) Fallback HTTP router: /v1/chat/completions
114
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  try:
116
- resp = _hf_client.chat.completions.create(
117
- model=LLM_MODEL_ID,
118
  messages=messages,
119
- max_tokens=GEN_MAX_NEW_TOKENS,
120
- temperature=GEN_TEMPERATURE,
121
- top_p=GEN_TOP_P,
122
- stop=STOP_SEQS,
123
  )
124
- answer = (resp.choices[0].message.content or "").strip()
125
- return answer, ""
126
- except Exception as e1:
127
- # Fallback al router nuevo
128
- try:
129
- headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
130
- payload = {
131
- "model": LLM_MODEL_ID,
132
- "messages": messages,
133
- "max_tokens": GEN_MAX_NEW_TOKENS,
134
- "temperature": GEN_TEMPERATURE,
135
- "top_p": GEN_TOP_P,
136
- "stop": STOP_SEQS,
137
- }
138
- for url in [
139
- "https://router.huggingface.co/v1/chat/completions",
140
- "https://router.huggingface.co/hf-inference/v1/chat/completions",
141
- ]:
142
- r = requests.post(url, headers=headers, json=payload, timeout=GEN_TIMEOUT)
143
- if r.status_code == 200:
144
- data = r.json()
145
- if isinstance(data, dict) and "choices" in data and data["choices"]:
146
- msg = data["choices"][0].get("message") or {}
147
- text = (msg.get("content") or "").strip()
148
- return text, f"[Fallback router: {url}] {e1}"
149
- raise RuntimeError(f"HTTP {r.status_code}: {r.text[:800]}")
150
- except Exception as e2:
151
- raise RuntimeError(
152
- f"Remote generation failed: {e1.__class__.__name__}: {e1} | HTTP fallback: {e2.__class__.__name__}: {e2}"
153
- )
154
-
155
- def med42_reply(user_msg, chat_msgs, ocr_md, ocr_txt):
156
- try:
157
- messages = build_chat_messages(chat_msgs, ocr_md, ocr_txt, user_msg)
158
- answer, dbg = med42_remote_generate(messages)
159
- updated = (chat_msgs or []) + [
160
- {"role": "user", "content": user_msg or "(analizar solo OCR)"},
161
- {"role": "assistant", "content": answer}
162
- ]
163
- return updated, "", gr.update(value=dbg)
164
  except Exception as e:
165
- tb = traceback.format_exc(limit=2)
166
- updated = (chat_msgs or []) + [
167
- {"role": "user", "content": user_msg or ""},
168
- {"role": "assistant", "content": f"⚠️ Error LLM: {e}"}
169
- ]
170
- return updated, "", gr.update(value=f"{e}\n{tb}")
171
-
172
- def clear_chat():
173
- return [], "", gr.update(value="")
174
 
175
  # =========================
176
- # DeepSeek-OCR (sin CUDA en main, GPU solo dentro del worker)
177
  # =========================
178
  def _load_ocr_model():
179
  model_name = "deepseek-ai/DeepSeek-OCR"
@@ -189,7 +194,6 @@ def _load_ocr_model():
189
  mdl = AutoModel.from_pretrained(model_name, **kwargs).eval()
190
  return tok, mdl
191
  except Exception as e:
192
- # Fallback si FA2 no está
193
  if any(k in str(e).lower() for k in ["flash_attn", "flashattention2", "flash_attention_2"]):
194
  kwargs["_attn_implementation"] = "eager"
195
  mdl = AutoModel.from_pretrained(model_name, **kwargs).eval()
@@ -202,7 +206,6 @@ tokenizer, model = _load_ocr_model()
202
  def process_image(image, model_size, task_type, is_eval_mode):
203
  if image is None:
204
  return None, "Please upload an image first.", "Please upload an image first."
205
-
206
  # mover a GPU SOLO dentro del worker
207
  if torch.cuda.is_available():
208
  dtype = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
@@ -253,15 +256,46 @@ def process_image(image, model_size, task_type, is_eval_mode):
253
  text_result = plain_text if plain_text else markdown_content
254
  return result_image, markdown_content, text_result
255
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  # =========================
257
  # UI (Gradio 5)
258
  # =========================
259
- with gr.Blocks(title="DeepSeek-OCR + Med42 (Conversational)", theme=gr.themes.Soft()) as demo:
260
  gr.Markdown(
261
  """
262
- # DeepSeek-OCR → Chat Clínico con **Med42**
263
  1) **Sube una imagen** y corre **OCR** (imagen anotada, Markdown y texto).
264
- 2) **Chatea** con **Med42** usando automáticamente el **OCR** como contexto.
265
  *Uso educativo; no reemplaza consejo médico.*
266
  """
267
  )
@@ -283,6 +317,7 @@ with gr.Blocks(title="DeepSeek-OCR + Med42 (Conversational)", theme=gr.themes.So
283
  eval_mode_checkbox = gr.Checkbox(value=False, label="Enable Evaluation Mode",
284
  info="Solo texto (más rápido). Desmárcalo para ver imagen anotada y markdown.")
285
  submit_btn = gr.Button("Process Image", variant="primary")
 
286
 
287
  with gr.Column(scale=2):
288
  with gr.Tabs():
@@ -296,16 +331,16 @@ with gr.Blocks(title="DeepSeek-OCR + Med42 (Conversational)", theme=gr.themes.So
296
  md_preview = gr.Textbox(label="Snapshot Markdown OCR", lines=10, interactive=False)
297
  txt_preview = gr.Textbox(label="Snapshot Texto OCR", lines=10, interactive=False)
298
 
299
- gr.Markdown("## Chat Clínico (Med42)")
300
  with gr.Row():
301
  with gr.Column(scale=2):
302
- chatbot = gr.Chatbot(label="Asistente OCR (Med42)", type="messages", height=420)
303
  user_in = gr.Textbox(label="Mensaje", placeholder="Escribe tu consulta… (vacío = analiza solo el OCR)", lines=2)
304
  with gr.Row():
305
  send_btn = gr.Button("Enviar", variant="primary")
306
  clear_btn = gr.Button("Limpiar")
307
  with gr.Column(scale=1):
308
- error_box = gr.Textbox(label="Debug (si hay error)", lines=8, interactive=False)
309
 
310
  # OCR
311
  submit_btn.click(
@@ -318,13 +353,16 @@ with gr.Blocks(title="DeepSeek-OCR + Med42 (Conversational)", theme=gr.themes.So
318
  outputs=[ocr_md_state, ocr_txt_state, md_preview, txt_preview],
319
  )
320
 
 
 
 
321
  # Chat
322
  send_btn.click(
323
- fn=med42_reply,
324
  inputs=[user_in, chatbot, ocr_md_state, ocr_txt_state],
325
- outputs=[chatbot, user_in, error_box]
326
  )
327
- clear_btn.click(fn=clear_chat, outputs=[chatbot, user_in, error_box])
328
 
329
  if __name__ == "__main__":
330
  demo.queue(max_size=20)
 
1
+ # app.py — DeepSeek-OCR + BioMedLM-7B (GGUF llama.cpp local, ZeroGPU-safe) — Gradio 5
2
+ # - OCR con DeepSeek-OCR (GPU solo en @spaces.GPU)
3
+ # - Chat con BioMedLM-7B GGUF via llama.cpp (GPU solo en @spaces.GPU)
4
+ # - Prompt reforzado (few-shot) y decodificación determinista
5
+ # - Configurable por variables de entorno: GGUF_REPO, GGUF_FILE, N_CTX, N_BATCH, N_GPU_LAYERS
6
 
7
  import os, re, json, tempfile, traceback
8
  import gradio as gr
 
10
  from PIL import Image
11
  from transformers import AutoModel, AutoTokenizer
12
  import spaces
13
+ from huggingface_hub import hf_hub_download
14
+ from llama_cpp import Llama
15
 
16
  # =========================
17
  # CONFIG (env)
18
  # =========================
19
+ # --- Llama.cpp (BioMedLM-7B GGUF) ---
20
+ GGUF_REPO = os.getenv("GGUF_REPO", "").strip() # ej: "theuser/biomedlm-7b-gguf" (pon el tuyo)
21
+ GGUF_FILE = os.getenv("GGUF_FILE", "").strip() # ej: "BioMedLM-7B.Q4_K_M.gguf"
22
+ # candidatos por defecto si no das GGUF_FILE
23
+ _GGUF_CANDIDATES = [
24
+ "BioMedLM-7B.Q4_K_M.gguf",
25
+ "BioMedLM-7B.Q5_K_M.gguf",
26
+ "BioMedLM-7B.Q8_0.gguf",
27
+ "BioMedLM-7B-f16.gguf",
28
+ "biomedlm-7b.Q4_K_M.gguf",
29
+ "biomedlm-7b.Q5_K_M.gguf",
30
+ "biomedlm-7b.Q8_0.gguf",
31
+ "biomedlm-7b-f16.gguf",
32
+ ]
33
+ GGUF_CANDIDATES = [GGUF_FILE] if GGUF_FILE else _GGUF_CANDIDATES
34
+
35
+ # rendimiento / memoria
36
+ N_CTX = int(os.getenv("N_CTX", "4096"))
37
+ N_THREADS = int(os.getenv("N_THREADS", str(os.cpu_count() or 4)))
38
+ N_GPU_LAYERS = int(os.getenv("N_GPU_LAYERS", "35")) # 7B ~32 capas; 35 = "todas"
39
+ N_BATCH = int(os.getenv("N_BATCH", "512")) # sube a 1024 si tu GPU lo permite
40
+
41
+ # generación determinista para obediencia
42
+ GEN_TEMPERATURE = float(os.getenv("TEMPERATURE", "0.0"))
43
+ GEN_TOP_P = float(os.getenv("TOP_P", "1.0"))
44
+ GEN_MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "384"))
45
+ STOP_SEQS = ["\n###", "\nUser:", "\nAssistant:", "\nUsuario:", "\nAsistente:"]
46
+
47
+ # DeepSeek-OCR revision opcional para evitar cambios inesperados
48
+ DS_OCR_REV = os.getenv("DS_OCR_REV", None) # e.g. hash de commit
49
 
50
  # =========================
51
+ # Estado global (solo dentro de workers GPU)
52
  # =========================
53
+ _llm = None
54
+ _llm_name = None
55
+
56
  def _truncate(s: str, n=3000):
57
  s = (s or "")
58
  return s if len(s) <= n else s[:n]
59
 
60
  def _clean_ocr(s: str) -> str:
61
  if not s: return ""
62
+ s = re.sub(r'[^\S\r\n]+', ' ', s) # colapsa espacios
63
+ s = re.sub(r'(\{#Sec\d+\}|#+\w*)', ' ', s) # anchors/headers raros
64
  s = re.sub(r'\s{2,}', ' ', s)
65
  lines = []
66
  for par in s.splitlines():
 
98
  - Evidencia OCR: "Indicaciones ilegibles"
99
  """.strip()
100
 
101
+ def build_user_prompt(ocr_md, ocr_txt, user_msg):
102
  raw = ocr_md if (ocr_md and ocr_md.strip()) else ocr_txt
103
  ctx = _truncate(_clean_ocr(raw), 3000)
 
104
  question = (user_msg or "Analiza el CONTEXTO_OCR y resume lo clínicamente relevante en viñetas.").strip()
105
+ prompt = (
106
+ f"{FEWSHOT}\n\n"
107
+ f"### CONTEXTO_OCR\n{(ctx if ctx else '—')}\n\n"
108
  f"### PREGUNTA\n{question}\n\n"
109
+ f"### SALIDA_ES\n"
110
  )
111
+ return prompt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
  # =========================
114
+ # BioMedLM-7B GGUF — llama.cpp (GPU solo en worker)
115
  # =========================
116
+ def _download_gguf_path():
117
+ last_err = None
118
+ if GGUF_REPO:
119
+ for fname in GGUF_CANDIDATES:
120
+ try:
121
+ path = hf_hub_download(repo_id=GGUF_REPO, filename=fname)
122
+ return path, f"{GGUF_REPO}:{fname}"
123
+ except Exception as e:
124
+ last_err = e
125
+ # fallback: si subiste el gguf al Space (en la carpeta del repo)
126
+ for fname in GGUF_CANDIDATES:
127
+ local_path = os.path.join(os.getcwd(), fname)
128
+ if os.path.exists(local_path):
129
+ return local_path, f"./{fname}"
130
+ raise RuntimeError(f"No se pudo localizar el GGUF. Configura GGUF_REPO/GGUF_FILE o sube el .gguf. Último error: {last_err}")
131
+
132
+ @spaces.GPU
133
+ def biomedlm_warmup():
134
+ """Inicializa llama.cpp dentro del worker GPU (evita CUDA en main)."""
135
+ global _llm, _llm_name
136
+ if _llm is not None:
137
+ return f"OK::warm (reusing {_llm_name})"
138
+ gguf_path, used = _download_gguf_path()
139
+ _llm = Llama(
140
+ model_path=gguf_path,
141
+ n_ctx=N_CTX,
142
+ n_threads=N_THREADS,
143
+ n_gpu_layers=N_GPU_LAYERS,
144
+ n_batch=N_BATCH,
145
+ # decodificación por defecto: greedy (sin sampling)
146
+ verbose=False,
147
+ )
148
+ _llm_name = used
149
+ return f"OK::loaded {used}"
150
+
151
+ def _to_chatml(system_prompt, user_prompt):
152
+ # formato simple ChatML-compatible para llama.cpp
153
+ return [
154
+ {"role": "system", "content": system_prompt},
155
+ {"role": "user", "content": user_prompt},
156
+ ]
157
+
158
+ @spaces.GPU
159
+ def biomedlm_chat(ocr_md, ocr_txt, user_msg, temperature=GEN_TEMPERATURE, top_p=GEN_TOP_P, max_tokens=GEN_MAX_NEW_TOKENS):
160
+ """Generación dentro del worker GPU con el LLM ya inicializado."""
161
+ global _llm
162
+ if _llm is None:
163
+ status = biomedlm_warmup()
164
+ if not str(status).startswith("OK::"):
165
+ return "ERR::No se pudo inicializar el modelo GGUF"
166
+ prompt_user = build_user_prompt(ocr_md, ocr_txt, user_msg)
167
+ messages = _to_chatml(SYSTEM_INSTR, prompt_user)
168
  try:
169
+ out = _llm.create_chat_completion(
 
170
  messages=messages,
171
+ temperature=temperature,
172
+ top_p=top_p,
173
+ max_tokens=max_tokens,
 
174
  )
175
+ ans = out["choices"][0]["message"]["content"]
176
+ return "OK::" + (ans or "").strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  except Exception as e:
178
+ return f"ERR::[{e.__class__.__name__}] {str(e) or repr(e)}"
 
 
 
 
 
 
 
 
179
 
180
  # =========================
181
+ # DeepSeek-OCR (GPU solo dentro del worker)
182
  # =========================
183
  def _load_ocr_model():
184
  model_name = "deepseek-ai/DeepSeek-OCR"
 
194
  mdl = AutoModel.from_pretrained(model_name, **kwargs).eval()
195
  return tok, mdl
196
  except Exception as e:
 
197
  if any(k in str(e).lower() for k in ["flash_attn", "flashattention2", "flash_attention_2"]):
198
  kwargs["_attn_implementation"] = "eager"
199
  mdl = AutoModel.from_pretrained(model_name, **kwargs).eval()
 
206
  def process_image(image, model_size, task_type, is_eval_mode):
207
  if image is None:
208
  return None, "Please upload an image first.", "Please upload an image first."
 
209
  # mover a GPU SOLO dentro del worker
210
  if torch.cuda.is_available():
211
  dtype = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
 
256
  text_result = plain_text if plain_text else markdown_content
257
  return result_image, markdown_content, text_result
258
 
259
+ # =========================
260
+ # Chat wrapper para la UI
261
+ # =========================
262
+ def biomedlm_reply(user_msg, chat_msgs, ocr_md, ocr_txt):
263
+ try:
264
+ res = biomedlm_chat(ocr_md, ocr_txt, user_msg, temperature=GEN_TEMPERATURE, top_p=GEN_TOP_P, max_tokens=GEN_MAX_NEW_TOKENS)
265
+ if str(res).startswith("OK::"):
266
+ answer = res[4:]
267
+ updated = (chat_msgs or []) + [
268
+ {"role": "user", "content": user_msg or "(analizar solo OCR)"},
269
+ {"role": "assistant", "content": answer}
270
+ ]
271
+ return updated, "", gr.update(value="")
272
+ else:
273
+ err_msg = res[5:] if str(res).startswith("ERR::") else str(res)
274
+ updated = (chat_msgs or []) + [
275
+ {"role": "user", "content": user_msg or ""},
276
+ {"role": "assistant", "content": "⚠️ Error LLM (local). Revisa el panel de debug."}
277
+ ]
278
+ return updated, "", gr.update(value=err_msg)
279
+ except Exception as e:
280
+ tb = traceback.format_exc(limit=2)
281
+ updated = (chat_msgs or []) + [
282
+ {"role": "user", "content": user_msg or ""},
283
+ {"role": "assistant", "content": f"⚠️ Error LLM: {e}"}
284
+ ]
285
+ return updated, "", gr.update(value=f"{e}\n{tb}")
286
+
287
+ def clear_chat():
288
+ return [], "", gr.update(value="")
289
+
290
  # =========================
291
  # UI (Gradio 5)
292
  # =========================
293
+ with gr.Blocks(title="OpScanIA — DeepSeek-OCR + BioMedLM-7B (GGUF)", theme=gr.themes.Soft()) as demo:
294
  gr.Markdown(
295
  """
296
+ # DeepSeek-OCR → Chat Clínico con **BioMedLM-7B (GGUF local)**
297
  1) **Sube una imagen** y corre **OCR** (imagen anotada, Markdown y texto).
298
+ 2) **Chatea** con **BioMedLM-7B GGUF (llama.cpp)** usando automáticamente el **OCR** como contexto.
299
  *Uso educativo; no reemplaza consejo médico.*
300
  """
301
  )
 
317
  eval_mode_checkbox = gr.Checkbox(value=False, label="Enable Evaluation Mode",
318
  info="Solo texto (más rápido). Desmárcalo para ver imagen anotada y markdown.")
319
  submit_btn = gr.Button("Process Image", variant="primary")
320
+ warm_btn = gr.Button("Warmup BioMedLM-7B (GGUF)")
321
 
322
  with gr.Column(scale=2):
323
  with gr.Tabs():
 
331
  md_preview = gr.Textbox(label="Snapshot Markdown OCR", lines=10, interactive=False)
332
  txt_preview = gr.Textbox(label="Snapshot Texto OCR", lines=10, interactive=False)
333
 
334
+ gr.Markdown("## Chat Clínico (BioMedLM-7B GGUF)")
335
  with gr.Row():
336
  with gr.Column(scale=2):
337
+ chatbot = gr.Chatbot(label="Asistente OCR (BioMedLM-7B GGUF)", type="messages", height=420)
338
  user_in = gr.Textbox(label="Mensaje", placeholder="Escribe tu consulta… (vacío = analiza solo el OCR)", lines=2)
339
  with gr.Row():
340
  send_btn = gr.Button("Enviar", variant="primary")
341
  clear_btn = gr.Button("Limpiar")
342
  with gr.Column(scale=1):
343
+ debug_box = gr.Textbox(label="Debug", lines=10, interactive=False)
344
 
345
  # OCR
346
  submit_btn.click(
 
353
  outputs=[ocr_md_state, ocr_txt_state, md_preview, txt_preview],
354
  )
355
 
356
+ # Warmup LLM (descarga/crea el objeto Llama en GPU)
357
+ warm_btn.click(fn=biomedlm_warmup, outputs=[debug_box])
358
+
359
  # Chat
360
  send_btn.click(
361
+ fn=biomedlm_reply,
362
  inputs=[user_in, chatbot, ocr_md_state, ocr_txt_state],
363
+ outputs=[chatbot, user_in, debug_box]
364
  )
365
+ clear_btn.click(fn=clear_chat, outputs=[chatbot, user_in, debug_box])
366
 
367
  if __name__ == "__main__":
368
  demo.queue(max_size=20)