Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,10 +1,5 @@
|
|
| 1 |
-
# app.py — DeepSeek-OCR + Med42
|
| 2 |
-
#
|
| 3 |
-
# - Chat LLM: Med42 Instruct por HTTP (InferenceClient + router HF).
|
| 4 |
-
# - OCR: DeepSeek-OCR; GPU solo dentro de @spaces.GPU (evita CUDA en main).
|
| 5 |
-
# - Prompt reforzado (few-shot) y generación determinista (do_sample=False).
|
| 6 |
-
# - Arreglado el typo: gr.Textbox (no Textbox_).
|
| 7 |
-
# ---------------------------------------------------------------
|
| 8 |
|
| 9 |
import os, re, json, tempfile, traceback
|
| 10 |
import gradio as gr
|
|
@@ -19,27 +14,25 @@ import requests
|
|
| 19 |
# CONFIG (env)
|
| 20 |
# =========================
|
| 21 |
LLM_MODEL_ID = os.getenv("BIO_MODEL_ID", "m42-health/Llama3-Med42-8B").strip()
|
| 22 |
-
|
| 23 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 24 |
|
| 25 |
# Generación (determinista para obediencia)
|
| 26 |
GEN_TEMPERATURE = float(os.getenv("GEN_TEMPERATURE", "0.0"))
|
| 27 |
GEN_TOP_P = float(os.getenv("GEN_TOP_P", "1.0"))
|
| 28 |
GEN_MAX_NEW_TOKENS = int(os.getenv("GEN_MAX_NEW_TOKENS", "384"))
|
| 29 |
-
GEN_REP_PENALTY = float(os.getenv("GEN_REP_PENALTY", "1.0"))
|
| 30 |
GEN_TIMEOUT = int(os.getenv("GEN_TIMEOUT", "60")) # s
|
| 31 |
STOP_SEQS = ["\n###", "\nUser:", "\nAssistant:"]
|
| 32 |
|
| 33 |
# (Opcional) fija una revisión estable del repo DeepSeek-OCR para evitar cambios inesperados
|
| 34 |
-
DS_OCR_REV = os.getenv("DS_OCR_REV", None) # e.g.,
|
| 35 |
|
| 36 |
-
# Cliente remoto (
|
| 37 |
_hf_client = InferenceClient(model=LLM_MODEL_ID, token=HF_TOKEN, timeout=GEN_TIMEOUT)
|
| 38 |
|
| 39 |
# =========================
|
| 40 |
# Prompt helpers
|
| 41 |
# =========================
|
| 42 |
-
def _truncate(s: str, n=3000):
|
| 43 |
s = (s or "")
|
| 44 |
return s if len(s) <= n else s[:n]
|
| 45 |
|
|
@@ -55,12 +48,15 @@ def _clean_ocr(s: str) -> str:
|
|
| 55 |
lines.append(par)
|
| 56 |
return "\n".join(lines)
|
| 57 |
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
(3) No inventes nada; (4) Responde en viñetas claras;
|
|
|
|
|
|
|
| 63 |
|
|
|
|
| 64 |
### EJEMPLO 1
|
| 65 |
CONTEXTO_OCR:
|
| 66 |
Paciente: Juan Pérez. Medicamento: Amoxicilina 500 mg cada 8 horas por 7 días.
|
|
@@ -81,79 +77,85 @@ SALIDA_ES:
|
|
| 81 |
- Evidencia OCR: "Indicaciones ilegibles"
|
| 82 |
""".strip()
|
| 83 |
|
| 84 |
-
def
|
| 85 |
raw = ocr_md if (ocr_md and ocr_md.strip()) else ocr_txt
|
| 86 |
ctx = _truncate(_clean_ocr(raw), 3000)
|
| 87 |
-
|
| 88 |
-
history = []
|
| 89 |
-
for m in (chat_msgs or []):
|
| 90 |
-
role, content = m.get("role"), (m.get("content") or "").strip()
|
| 91 |
-
if not content:
|
| 92 |
-
continue
|
| 93 |
-
history.append(f"- { 'Usuario' if role=='user' else 'Asistente' }: {content}")
|
| 94 |
-
hist_block = "\n".join(history) if history else "—"
|
| 95 |
-
|
| 96 |
question = (user_msg or "Analiza el CONTEXTO_OCR y resume lo clínicamente relevante en viñetas.").strip()
|
| 97 |
|
| 98 |
-
|
| 99 |
-
FEWSHOT
|
| 100 |
-
"###
|
| 101 |
-
"### HISTORIAL (si existe)\n" + hist_block + "\n\n"
|
| 102 |
-
"### PREGUNTA\n" + question + "\n\n"
|
| 103 |
"### SALIDA_ES\n"
|
| 104 |
)
|
| 105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
# =========================
|
| 108 |
-
# LLM remoto (Med42
|
| 109 |
# =========================
|
| 110 |
-
def med42_remote_generate(
|
| 111 |
"""
|
| 112 |
-
|
| 113 |
-
|
| 114 |
"""
|
| 115 |
try:
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
|
|
|
| 119 |
temperature=GEN_TEMPERATURE,
|
| 120 |
top_p=GEN_TOP_P,
|
| 121 |
-
|
| 122 |
-
stop_sequences=STOP_SEQS,
|
| 123 |
-
details=False,
|
| 124 |
-
do_sample=False, # determinista
|
| 125 |
-
stream=False,
|
| 126 |
)
|
| 127 |
-
|
|
|
|
| 128 |
except Exception as e1:
|
| 129 |
-
# Fallback
|
| 130 |
try:
|
| 131 |
headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
|
| 132 |
payload = {
|
| 133 |
"model": LLM_MODEL_ID,
|
| 134 |
-
"
|
| 135 |
"max_tokens": GEN_MAX_NEW_TOKENS,
|
| 136 |
"temperature": GEN_TEMPERATURE,
|
| 137 |
"top_p": GEN_TOP_P,
|
| 138 |
"stop": STOP_SEQS,
|
| 139 |
}
|
| 140 |
for url in [
|
| 141 |
-
"https://router.huggingface.co/v1/completions",
|
| 142 |
-
"https://router.huggingface.co/hf-inference/v1/completions"
|
| 143 |
]:
|
| 144 |
r = requests.post(url, headers=headers, json=payload, timeout=GEN_TIMEOUT)
|
| 145 |
if r.status_code == 200:
|
| 146 |
data = r.json()
|
| 147 |
if isinstance(data, dict) and "choices" in data and data["choices"]:
|
| 148 |
-
|
|
|
|
|
|
|
| 149 |
raise RuntimeError(f"HTTP {r.status_code}: {r.text[:800]}")
|
| 150 |
except Exception as e2:
|
| 151 |
-
raise RuntimeError(
|
|
|
|
|
|
|
| 152 |
|
| 153 |
def med42_reply(user_msg, chat_msgs, ocr_md, ocr_txt):
|
| 154 |
try:
|
| 155 |
-
|
| 156 |
-
answer, dbg = med42_remote_generate(
|
| 157 |
updated = (chat_msgs or []) + [
|
| 158 |
{"role": "user", "content": user_msg or "(analizar solo OCR)"},
|
| 159 |
{"role": "assistant", "content": answer}
|
|
@@ -167,7 +169,7 @@ def med42_reply(user_msg, chat_msgs, ocr_md, ocr_txt):
|
|
| 167 |
]
|
| 168 |
return updated, "", gr.update(value=f"{e}\n{tb}")
|
| 169 |
|
| 170 |
-
def clear_chat():
|
| 171 |
return [], "", gr.update(value="")
|
| 172 |
|
| 173 |
# =========================
|
|
@@ -254,10 +256,10 @@ def process_image(image, model_size, task_type, is_eval_mode):
|
|
| 254 |
# =========================
|
| 255 |
# UI (Gradio 5)
|
| 256 |
# =========================
|
| 257 |
-
with gr.Blocks(title="DeepSeek-OCR + Med42
|
| 258 |
gr.Markdown(
|
| 259 |
"""
|
| 260 |
-
# DeepSeek-OCR → Chat Clínico con **Med42
|
| 261 |
1) **Sube una imagen** y corre **OCR** (imagen anotada, Markdown y texto).
|
| 262 |
2) **Chatea** con **Med42** usando automáticamente el **OCR** como contexto.
|
| 263 |
*Uso educativo; no reemplaza consejo médico.*
|
|
@@ -284,11 +286,11 @@ with gr.Blocks(title="DeepSeek-OCR + Med42 Instruct", theme=gr.themes.Soft()) as
|
|
| 284 |
|
| 285 |
with gr.Column(scale=2):
|
| 286 |
with gr.Tabs():
|
| 287 |
-
with gr.TabItem("Annotated Image"):
|
| 288 |
output_image = gr.Image(interactive=False)
|
| 289 |
-
with gr.TabItem("Markdown Preview"):
|
| 290 |
output_markdown = gr.Markdown()
|
| 291 |
-
with gr.TabItem("Markdown Source / Eval"):
|
| 292 |
output_text = gr.Textbox(lines=18, show_copy_button=True, interactive=False)
|
| 293 |
with gr.Row():
|
| 294 |
md_preview = gr.Textbox(label="Snapshot Markdown OCR", lines=10, interactive=False)
|
|
|
|
| 1 |
+
# app.py — DeepSeek-OCR + Med42 (HF conversational) — Gradio 5
|
| 2 |
+
# ZeroGPU-safe (sin inicializar CUDA en el proceso principal)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
import os, re, json, tempfile, traceback
|
| 5 |
import gradio as gr
|
|
|
|
| 14 |
# CONFIG (env)
|
| 15 |
# =========================
|
| 16 |
LLM_MODEL_ID = os.getenv("BIO_MODEL_ID", "m42-health/Llama3-Med42-8B").strip()
|
|
|
|
| 17 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 18 |
|
| 19 |
# Generación (determinista para obediencia)
|
| 20 |
GEN_TEMPERATURE = float(os.getenv("GEN_TEMPERATURE", "0.0"))
|
| 21 |
GEN_TOP_P = float(os.getenv("GEN_TOP_P", "1.0"))
|
| 22 |
GEN_MAX_NEW_TOKENS = int(os.getenv("GEN_MAX_NEW_TOKENS", "384"))
|
|
|
|
| 23 |
GEN_TIMEOUT = int(os.getenv("GEN_TIMEOUT", "60")) # s
|
| 24 |
STOP_SEQS = ["\n###", "\nUser:", "\nAssistant:"]
|
| 25 |
|
| 26 |
# (Opcional) fija una revisión estable del repo DeepSeek-OCR para evitar cambios inesperados
|
| 27 |
+
DS_OCR_REV = os.getenv("DS_OCR_REV", None) # e.g., hash de commit
|
| 28 |
|
| 29 |
+
# Cliente remoto (no toca CUDA aquí)
|
| 30 |
_hf_client = InferenceClient(model=LLM_MODEL_ID, token=HF_TOKEN, timeout=GEN_TIMEOUT)
|
| 31 |
|
| 32 |
# =========================
|
| 33 |
# Prompt helpers
|
| 34 |
# =========================
|
| 35 |
+
def _truncate(s: str, n=3000):
|
| 36 |
s = (s or "")
|
| 37 |
return s if len(s) <= n else s[:n]
|
| 38 |
|
|
|
|
| 48 |
lines.append(par)
|
| 49 |
return "\n".join(lines)
|
| 50 |
|
| 51 |
+
SYSTEM_INSTR = (
|
| 52 |
+
"Eres un analista clínico educativo. Responde SIEMPRE en español. "
|
| 53 |
+
"Reglas: (1) Usa ÚNICAMENTE el CONTEXTO_OCR; "
|
| 54 |
+
"(2) Si falta un dato, escribe literalmente: 'dato no disponible en el OCR'; "
|
| 55 |
+
"(3) No inventes nada; (4) Responde en viñetas claras; "
|
| 56 |
+
"(5) Cita fragmentos exactos del OCR entre comillas como evidencia."
|
| 57 |
+
)
|
| 58 |
|
| 59 |
+
FEWSHOT = """
|
| 60 |
### EJEMPLO 1
|
| 61 |
CONTEXTO_OCR:
|
| 62 |
Paciente: Juan Pérez. Medicamento: Amoxicilina 500 mg cada 8 horas por 7 días.
|
|
|
|
| 77 |
- Evidencia OCR: "Indicaciones ilegibles"
|
| 78 |
""".strip()
|
| 79 |
|
| 80 |
+
def build_chat_messages(chat_msgs, ocr_md, ocr_txt, user_msg):
|
| 81 |
raw = ocr_md if (ocr_md and ocr_md.strip()) else ocr_txt
|
| 82 |
ctx = _truncate(_clean_ocr(raw), 3000)
|
| 83 |
+
# Construimos el contenido del usuario con el contexto y few-shot
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
question = (user_msg or "Analiza el CONTEXTO_OCR y resume lo clínicamente relevante en viñetas.").strip()
|
| 85 |
|
| 86 |
+
user_content = (
|
| 87 |
+
f"{FEWSHOT}\n\n### CONTEXTO_OCR\n{(ctx if ctx else '—')}\n\n"
|
| 88 |
+
f"### PREGUNTA\n{question}\n\n"
|
|
|
|
|
|
|
| 89 |
"### SALIDA_ES\n"
|
| 90 |
)
|
| 91 |
+
|
| 92 |
+
msgs = [{"role": "system", "content": SYSTEM_INSTR}]
|
| 93 |
+
# opcional: incluir historial como mensajes previos
|
| 94 |
+
for m in (chat_msgs or []):
|
| 95 |
+
r = m.get("role")
|
| 96 |
+
c = (m.get("content") or "").strip()
|
| 97 |
+
if not c:
|
| 98 |
+
continue
|
| 99 |
+
if r == "user":
|
| 100 |
+
msgs.append({"role": "user", "content": c})
|
| 101 |
+
elif r == "assistant":
|
| 102 |
+
msgs.append({"role": "assistant", "content": c})
|
| 103 |
+
|
| 104 |
+
msgs.append({"role": "user", "content": user_content})
|
| 105 |
+
return msgs
|
| 106 |
|
| 107 |
# =========================
|
| 108 |
+
# LLM remoto (Med42) — conversational
|
| 109 |
# =========================
|
| 110 |
+
def med42_remote_generate(messages) -> (str, str):
|
| 111 |
"""
|
| 112 |
+
1) InferenceClient.chat.completions.create (task conversacional)
|
| 113 |
+
2) Fallback HTTP router: /v1/chat/completions
|
| 114 |
"""
|
| 115 |
try:
|
| 116 |
+
resp = _hf_client.chat.completions.create(
|
| 117 |
+
model=LLM_MODEL_ID,
|
| 118 |
+
messages=messages,
|
| 119 |
+
max_tokens=GEN_MAX_NEW_TOKENS,
|
| 120 |
temperature=GEN_TEMPERATURE,
|
| 121 |
top_p=GEN_TOP_P,
|
| 122 |
+
stop=STOP_SEQS,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
)
|
| 124 |
+
answer = (resp.choices[0].message.content or "").strip()
|
| 125 |
+
return answer, ""
|
| 126 |
except Exception as e1:
|
| 127 |
+
# Fallback al router nuevo
|
| 128 |
try:
|
| 129 |
headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
|
| 130 |
payload = {
|
| 131 |
"model": LLM_MODEL_ID,
|
| 132 |
+
"messages": messages,
|
| 133 |
"max_tokens": GEN_MAX_NEW_TOKENS,
|
| 134 |
"temperature": GEN_TEMPERATURE,
|
| 135 |
"top_p": GEN_TOP_P,
|
| 136 |
"stop": STOP_SEQS,
|
| 137 |
}
|
| 138 |
for url in [
|
| 139 |
+
"https://router.huggingface.co/v1/chat/completions",
|
| 140 |
+
"https://router.huggingface.co/hf-inference/v1/chat/completions",
|
| 141 |
]:
|
| 142 |
r = requests.post(url, headers=headers, json=payload, timeout=GEN_TIMEOUT)
|
| 143 |
if r.status_code == 200:
|
| 144 |
data = r.json()
|
| 145 |
if isinstance(data, dict) and "choices" in data and data["choices"]:
|
| 146 |
+
msg = data["choices"][0].get("message") or {}
|
| 147 |
+
text = (msg.get("content") or "").strip()
|
| 148 |
+
return text, f"[Fallback router: {url}] {e1}"
|
| 149 |
raise RuntimeError(f"HTTP {r.status_code}: {r.text[:800]}")
|
| 150 |
except Exception as e2:
|
| 151 |
+
raise RuntimeError(
|
| 152 |
+
f"Remote generation failed: {e1.__class__.__name__}: {e1} | HTTP fallback: {e2.__class__.__name__}: {e2}"
|
| 153 |
+
)
|
| 154 |
|
| 155 |
def med42_reply(user_msg, chat_msgs, ocr_md, ocr_txt):
|
| 156 |
try:
|
| 157 |
+
messages = build_chat_messages(chat_msgs, ocr_md, ocr_txt, user_msg)
|
| 158 |
+
answer, dbg = med42_remote_generate(messages)
|
| 159 |
updated = (chat_msgs or []) + [
|
| 160 |
{"role": "user", "content": user_msg or "(analizar solo OCR)"},
|
| 161 |
{"role": "assistant", "content": answer}
|
|
|
|
| 169 |
]
|
| 170 |
return updated, "", gr.update(value=f"{e}\n{tb}")
|
| 171 |
|
| 172 |
+
def clear_chat():
|
| 173 |
return [], "", gr.update(value="")
|
| 174 |
|
| 175 |
# =========================
|
|
|
|
| 256 |
# =========================
|
| 257 |
# UI (Gradio 5)
|
| 258 |
# =========================
|
| 259 |
+
with gr.Blocks(title="DeepSeek-OCR + Med42 (Conversational)", theme=gr.themes.Soft()) as demo:
|
| 260 |
gr.Markdown(
|
| 261 |
"""
|
| 262 |
+
# DeepSeek-OCR → Chat Clínico con **Med42**
|
| 263 |
1) **Sube una imagen** y corre **OCR** (imagen anotada, Markdown y texto).
|
| 264 |
2) **Chatea** con **Med42** usando automáticamente el **OCR** como contexto.
|
| 265 |
*Uso educativo; no reemplaza consejo médico.*
|
|
|
|
| 286 |
|
| 287 |
with gr.Column(scale=2):
|
| 288 |
with gr.Tabs():
|
| 289 |
+
with gr.TabItem("Annotated Image"):
|
| 290 |
output_image = gr.Image(interactive=False)
|
| 291 |
+
with gr.TabItem("Markdown Preview"):
|
| 292 |
output_markdown = gr.Markdown()
|
| 293 |
+
with gr.TabItem("Markdown Source / Eval"):
|
| 294 |
output_text = gr.Textbox(lines=18, show_copy_button=True, interactive=False)
|
| 295 |
with gr.Row():
|
| 296 |
md_preview = gr.Textbox(label="Snapshot Markdown OCR", lines=10, interactive=False)
|