OpScanIA

Sleeping

App Files Files Community

jorgeiv500 commited on Nov 12

Commit

30bf3c9

verified ·

1 Parent(s): 6da1fcb

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -60

app.py CHANGED Viewed

@@ -1,10 +1,5 @@
-# app.py — DeepSeek-OCR + Med42 Instruct (remoto, ZeroGPU-safe) — Gradio 5
-# ---------------------------------------------------------------
-# - Chat LLM: Med42 Instruct por HTTP (InferenceClient + router HF).
-# - OCR: DeepSeek-OCR; GPU solo dentro de @spaces.GPU (evita CUDA en main).
-# - Prompt reforzado (few-shot) y generación determinista (do_sample=False).
-# - Arreglado el typo: gr.Textbox (no Textbox_).
-# ---------------------------------------------------------------
 import os, re, json, tempfile, traceback
 import gradio as gr
@@ -19,27 +14,25 @@ import requests
 # CONFIG (env)
 # =========================
 LLM_MODEL_ID = os.getenv("BIO_MODEL_ID", "m42-health/Llama3-Med42-8B").strip()
 HF_TOKEN = os.getenv("HF_TOKEN")
 # Generación (determinista para obediencia)
 GEN_TEMPERATURE = float(os.getenv("GEN_TEMPERATURE", "0.0"))
 GEN_TOP_P = float(os.getenv("GEN_TOP_P", "1.0"))
 GEN_MAX_NEW_TOKENS = int(os.getenv("GEN_MAX_NEW_TOKENS", "384"))
-GEN_REP_PENALTY = float(os.getenv("GEN_REP_PENALTY", "1.0"))
 GEN_TIMEOUT = int(os.getenv("GEN_TIMEOUT", "60"))  # s
 STOP_SEQS = ["\n###", "\nUser:", "\nAssistant:"]
 # (Opcional) fija una revisión estable del repo DeepSeek-OCR para evitar cambios inesperados
-DS_OCR_REV = os.getenv("DS_OCR_REV", None)  # e.g., un hash de commit
-# Cliente remoto (HTTP) — no toca CUDA
 _hf_client = InferenceClient(model=LLM_MODEL_ID, token=HF_TOKEN, timeout=GEN_TIMEOUT)
 # =========================
 # Prompt helpers
 # =========================
-def _truncate(s: str, n=3000):
     s = (s or "")
     return s if len(s) <= n else s[:n]
@@ -55,12 +48,15 @@ def _clean_ocr(s: str) -> str:
             lines.append(par)
     return "\n".join(lines)
-FEWSHOT = """
-### INSTRUCCIÓN
-Eres un **analista clínico educativo**. Responde **SIEMPRE en español**.
-Reglas: (1) Usa ÚNICAMENTE el CONTEXTO_OCR; (2) Si falta un dato, escribe literalmente: "dato no disponible en el OCR";
-(3) No inventes nada; (4) Responde en viñetas claras; (5) Cita fragmentos exactos del OCR entre comillas como evidencia.
 ### EJEMPLO 1
 CONTEXTO_OCR:
 Paciente: Juan Pérez. Medicamento: Amoxicilina 500 mg cada 8 horas por 7 días.
@@ -81,79 +77,85 @@ SALIDA_ES:
 - Evidencia OCR: "Indicaciones ilegibles"
 """.strip()
-def build_prompt(chat_msgs, ocr_md, ocr_txt, user_msg):
     raw = ocr_md if (ocr_md and ocr_md.strip()) else ocr_txt
     ctx = _truncate(_clean_ocr(raw), 3000)
-    history = []
-    for m in (chat_msgs or []):
-        role, content = m.get("role"), (m.get("content") or "").strip()
-        if not content:
-            continue
-        history.append(f"- { 'Usuario' if role=='user' else 'Asistente' }: {content}")
-    hist_block = "\n".join(history) if history else "—"
     question = (user_msg or "Analiza el CONTEXTO_OCR y resume lo clínicamente relevante en viñetas.").strip()
-    prompt = (
-        FEWSHOT + "\n\n"
-        "### CONTEXTO_OCR\n" + (ctx if ctx else "—") + "\n\n"
-        "### HISTORIAL (si existe)\n" + hist_block + "\n\n"
-        "### PREGUNTA\n" + question + "\n\n"
         "### SALIDA_ES\n"
     )
-    return prompt
 # =========================
-# LLM remoto (Med42 Instruct) — text_generation
 # =========================
-def med42_remote_generate(prompt: str) -> (str, str):
     """
-    Intenta InferenceClient.text_generation (serverless/TGI). Si falla,
-    hace fallback al router OpenAI-like /v1/completions.
     """
     try:
-        out = _hf_client.text_generation(
-            prompt=prompt,
-            max_new_tokens=GEN_MAX_NEW_TOKENS,
             temperature=GEN_TEMPERATURE,
             top_p=GEN_TOP_P,
-            repetition_penalty=GEN_REP_PENALTY,
-            stop_sequences=STOP_SEQS,
-            details=False,
-            do_sample=False,          # determinista
-            stream=False,
         )
-        return (out.strip() if isinstance(out, str) else str(out)), ""
     except Exception as e1:
-        # Fallback HTTP al router
         try:
             headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
             payload = {
                 "model": LLM_MODEL_ID,
-                "prompt": prompt,
                 "max_tokens": GEN_MAX_NEW_TOKENS,
                 "temperature": GEN_TEMPERATURE,
                 "top_p": GEN_TOP_P,
                 "stop": STOP_SEQS,
             }
             for url in [
-                "https://router.huggingface.co/v1/completions",
-                "https://router.huggingface.co/hf-inference/v1/completions"
             ]:
                 r = requests.post(url, headers=headers, json=payload, timeout=GEN_TIMEOUT)
                 if r.status_code == 200:
                     data = r.json()
                     if isinstance(data, dict) and "choices" in data and data["choices"]:
-                        return (data["choices"][0].get("text") or "").strip(), f"[Fallback router: {url}] {e1}"
             raise RuntimeError(f"HTTP {r.status_code}: {r.text[:800]}")
         except Exception as e2:
-            raise RuntimeError(f"Remote generation failed: {e1.__class__.__name__}: {e1} | HTTP fallback: {e2.__class__.__name__}: {e2}")
 def med42_reply(user_msg, chat_msgs, ocr_md, ocr_txt):
     try:
-        prompt = build_prompt(chat_msgs, ocr_md, ocr_txt, user_msg)
-        answer, dbg = med42_remote_generate(prompt)
         updated = (chat_msgs or []) + [
             {"role": "user", "content": user_msg or "(analizar solo OCR)"},
             {"role": "assistant", "content": answer}
@@ -167,7 +169,7 @@ def med42_reply(user_msg, chat_msgs, ocr_md, ocr_txt):
         ]
         return updated, "", gr.update(value=f"{e}\n{tb}")
-def clear_chat():
     return [], "", gr.update(value="")
 # =========================
@@ -254,10 +256,10 @@ def process_image(image, model_size, task_type, is_eval_mode):
 # =========================
 # UI (Gradio 5)
 # =========================
-with gr.Blocks(title="DeepSeek-OCR + Med42 Instruct", theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         """
-        # DeepSeek-OCR → Chat Clínico con **Med42 Instruct**
         1) **Sube una imagen** y corre **OCR** (imagen anotada, Markdown y texto).
         2) **Chatea** con **Med42** usando automáticamente el **OCR** como contexto.
         *Uso educativo; no reemplaza consejo médico.*
@@ -284,11 +286,11 @@ with gr.Blocks(title="DeepSeek-OCR + Med42 Instruct", theme=gr.themes.Soft()) as
         with gr.Column(scale=2):
             with gr.Tabs():
-                with gr.TabItem("Annotated Image"):
                     output_image = gr.Image(interactive=False)
-                with gr.TabItem("Markdown Preview"):
                     output_markdown = gr.Markdown()
-                with gr.TabItem("Markdown Source / Eval"):
                     output_text = gr.Textbox(lines=18, show_copy_button=True, interactive=False)
             with gr.Row():
                 md_preview = gr.Textbox(label="Snapshot Markdown OCR", lines=10, interactive=False)

+# app.py — DeepSeek-OCR + Med42 (HF conversational) — Gradio 5
+# ZeroGPU-safe (sin inicializar CUDA en el proceso principal)
 import os, re, json, tempfile, traceback
 import gradio as gr
 # CONFIG (env)
 # =========================
 LLM_MODEL_ID = os.getenv("BIO_MODEL_ID", "m42-health/Llama3-Med42-8B").strip()
 HF_TOKEN = os.getenv("HF_TOKEN")
 # Generación (determinista para obediencia)
 GEN_TEMPERATURE = float(os.getenv("GEN_TEMPERATURE", "0.0"))
 GEN_TOP_P = float(os.getenv("GEN_TOP_P", "1.0"))
 GEN_MAX_NEW_TOKENS = int(os.getenv("GEN_MAX_NEW_TOKENS", "384"))
 GEN_TIMEOUT = int(os.getenv("GEN_TIMEOUT", "60"))  # s
 STOP_SEQS = ["\n###", "\nUser:", "\nAssistant:"]
 # (Opcional) fija una revisión estable del repo DeepSeek-OCR para evitar cambios inesperados
+DS_OCR_REV = os.getenv("DS_OCR_REV", None)  # e.g., hash de commit
+# Cliente remoto (no toca CUDA aquí)
 _hf_client = InferenceClient(model=LLM_MODEL_ID, token=HF_TOKEN, timeout=GEN_TIMEOUT)
 # =========================
 # Prompt helpers
 # =========================
+def _truncate(s: str, n=3000):
     s = (s or "")
     return s if len(s) <= n else s[:n]
             lines.append(par)
     return "\n".join(lines)
+SYSTEM_INSTR = (
+    "Eres un analista clínico educativo. Responde SIEMPRE en español. "
+    "Reglas: (1) Usa ÚNICAMENTE el CONTEXTO_OCR; "
+    "(2) Si falta un dato, escribe literalmente: 'dato no disponible en el OCR'; "
+    "(3) No inventes nada; (4) Responde en viñetas claras; "
+    "(5) Cita fragmentos exactos del OCR entre comillas como evidencia."
+)
+FEWSHOT = """
 ### EJEMPLO 1
 CONTEXTO_OCR:
 Paciente: Juan Pérez. Medicamento: Amoxicilina 500 mg cada 8 horas por 7 días.
 - Evidencia OCR: "Indicaciones ilegibles"
 """.strip()
+def build_chat_messages(chat_msgs, ocr_md, ocr_txt, user_msg):
     raw = ocr_md if (ocr_md and ocr_md.strip()) else ocr_txt
     ctx = _truncate(_clean_ocr(raw), 3000)
+    # Construimos el contenido del usuario con el contexto y few-shot
     question = (user_msg or "Analiza el CONTEXTO_OCR y resume lo clínicamente relevante en viñetas.").strip()
+    user_content = (
+        f"{FEWSHOT}\n\n### CONTEXTO_OCR\n{(ctx if ctx else '—')}\n\n"
+        f"### PREGUNTA\n{question}\n\n"
         "### SALIDA_ES\n"
     )
+    msgs = [{"role": "system", "content": SYSTEM_INSTR}]
+    # opcional: incluir historial como mensajes previos
+    for m in (chat_msgs or []):
+        r = m.get("role")
+        c = (m.get("content") or "").strip()
+        if not c:
+            continue
+        if r == "user":
+            msgs.append({"role": "user", "content": c})
+        elif r == "assistant":
+            msgs.append({"role": "assistant", "content": c})
+    msgs.append({"role": "user", "content": user_content})
+    return msgs
 # =========================
+# LLM remoto (Med42) — conversational
 # =========================
+def med42_remote_generate(messages) -> (str, str):
     """
+    1) InferenceClient.chat.completions.create (task conversacional)
+    2) Fallback HTTP router: /v1/chat/completions
     """
     try:
+        resp = _hf_client.chat.completions.create(
+            model=LLM_MODEL_ID,
+            messages=messages,
+            max_tokens=GEN_MAX_NEW_TOKENS,
             temperature=GEN_TEMPERATURE,
             top_p=GEN_TOP_P,
+            stop=STOP_SEQS,
         )
+        answer = (resp.choices[0].message.content or "").strip()
+        return answer, ""
     except Exception as e1:
+        # Fallback al router nuevo
         try:
             headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
             payload = {
                 "model": LLM_MODEL_ID,
+                "messages": messages,
                 "max_tokens": GEN_MAX_NEW_TOKENS,
                 "temperature": GEN_TEMPERATURE,
                 "top_p": GEN_TOP_P,
                 "stop": STOP_SEQS,
             }
             for url in [
+                "https://router.huggingface.co/v1/chat/completions",
+                "https://router.huggingface.co/hf-inference/v1/chat/completions",
             ]:
                 r = requests.post(url, headers=headers, json=payload, timeout=GEN_TIMEOUT)
                 if r.status_code == 200:
                     data = r.json()
                     if isinstance(data, dict) and "choices" in data and data["choices"]:
+                        msg = data["choices"][0].get("message") or {}
+                        text = (msg.get("content") or "").strip()
+                        return text, f"[Fallback router: {url}] {e1}"
             raise RuntimeError(f"HTTP {r.status_code}: {r.text[:800]}")
         except Exception as e2:
+            raise RuntimeError(
+                f"Remote generation failed: {e1.__class__.__name__}: {e1} | HTTP fallback: {e2.__class__.__name__}: {e2}"
+            )
 def med42_reply(user_msg, chat_msgs, ocr_md, ocr_txt):
     try:
+        messages = build_chat_messages(chat_msgs, ocr_md, ocr_txt, user_msg)
+        answer, dbg = med42_remote_generate(messages)
         updated = (chat_msgs or []) + [
             {"role": "user", "content": user_msg or "(analizar solo OCR)"},
             {"role": "assistant", "content": answer}
         ]
         return updated, "", gr.update(value=f"{e}\n{tb}")
+def clear_chat():
     return [], "", gr.update(value="")
 # =========================
 # =========================
 # UI (Gradio 5)
 # =========================
+with gr.Blocks(title="DeepSeek-OCR + Med42 (Conversational)", theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         """
+        # DeepSeek-OCR → Chat Clínico con **Med42**
         1) **Sube una imagen** y corre **OCR** (imagen anotada, Markdown y texto).
         2) **Chatea** con **Med42** usando automáticamente el **OCR** como contexto.
         *Uso educativo; no reemplaza consejo médico.*
         with gr.Column(scale=2):
             with gr.Tabs():
+                with gr.TabItem("Annotated Image"):
                     output_image = gr.Image(interactive=False)
+                with gr.TabItem("Markdown Preview"):
                     output_markdown = gr.Markdown()
+                with gr.TabItem("Markdown Source / Eval"):
                     output_text = gr.Textbox(lines=18, show_copy_button=True, interactive=False)
             with gr.Row():
                 md_preview = gr.Textbox(label="Snapshot Markdown OCR", lines=10, interactive=False)