OpScanIA

Sleeping

App Files Files Community

jorgeiv500 commited on Nov 12

Commit

cb3ad1d

verified ·

1 Parent(s): b71f9aa

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -24

app.py CHANGED Viewed

@@ -4,10 +4,11 @@
 # • Chat: BioMedLM-7B en formato GGUF con llama.cpp (también SOLO en worker GPU).
 # • Sin llamadas GPU anidadas; todo atrapado con try/except para evitar RuntimeError genéricos.
 # • Prompt reforzado en español y generación determinista (sensible a OCR y sin alucinaciones).
-# • Configurable por variables de entorno: GGUF_REPO, GGUF_FILE, N_CTX, N_BATCH, N_GPU_LAYERS, etc.
 # -----------------------------------------------------------------------------------------------
-import os, re, tempfile, traceback
 import gradio as gr
 import torch
 from PIL import Image
@@ -20,26 +21,27 @@ from llama_cpp import Llama
 # CONFIG (entorno)
 # =========================
 # --- BioMedLM-7B (GGUF / llama.cpp) ---
-GGUF_REPO = os.getenv("GGUF_REPO", "").strip()   # p.ej.: "tu_usuario/biomedlm-7b-gguf" (si lo tienes en HF)
-GGUF_FILE = os.getenv("GGUF_FILE", "").strip()   # p.ej.: "BioMedLM-7B.Q4_K_M.gguf" (nombre exacto del archivo)
-# Candidatos comunes si no estableces GGUF_FILE:
 _GGUF_CANDIDATES = [
-    "BioMedLM-7B.Q4_K_M.gguf",
-    "BioMedLM-7B.Q5_K_M.gguf",
-    "BioMedLM-7B.Q8_0.gguf",
-    "BioMedLM-7B-f16.gguf",
-    "biomedlm-7b.Q4_K_M.gguf",
-    "biomedlm-7b.Q5_K_M.gguf",
-    "biomedlm-7b.Q8_0.gguf",
-    "biomedlm-7b-f16.gguf",
 ]
 GGUF_CANDIDATES = [GGUF_FILE] if GGUF_FILE else _GGUF_CANDIDATES
 # Rendimiento / memoria (ajusta según GPU del Space: T4 / A10G)
 N_CTX = int(os.getenv("N_CTX", "4096"))
 N_THREADS = int(os.getenv("N_THREADS", str(os.cpu_count() or 4)))
-N_GPU_LAYERS = int(os.getenv("N_GPU_LAYERS", "35"))  # 7B ~32 capas; 35 ≈ todas
-N_BATCH = int(os.getenv("N_BATCH", "512"))           # sube a 1024 si tu GPU lo permite
 # Decodificación (determinista por defecto)
 GEN_TEMPERATURE = float(os.getenv("TEMPERATURE", "0.0"))
@@ -47,7 +49,7 @@ GEN_TOP_P = float(os.getenv("TOP_P", "1.0"))
 GEN_MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "384"))
 STOP_SEQS = ["\n###", "\nUser:", "\nAssistant:", "\nUsuario:", "\nAsistente:"]
-# Token para repos privados en HF (opcional)
 HF_TOKEN = os.getenv("HF_TOKEN")
 # DeepSeek-OCR: fija una revisión/commit opcional para evitar cambios inesperados
@@ -128,21 +130,42 @@ _llm = None
 _llm_name = None
 def _download_gguf_path():
-    """Busca el .gguf en HF (con token si hace falta) o en local (Files del Space)."""
     last_err = None
     if GGUF_REPO:
-        for fname in GGUF_CANDIDATES:
             try:
                 path = hf_hub_download(repo_id=GGUF_REPO, filename=fname, token=HF_TOKEN)
                 return path, f"{GGUF_REPO}:{fname}"
             except Exception as e:
                 last_err = e
-    # Fallback: archivo subido al Space (pestaña Files)
-    for fname in GGUF_CANDIDATES:
-        local_path = os.path.join(os.getcwd(), fname)
-        if os.path.exists(local_path):
-            return local_path, f"./{fname}"
-    raise RuntimeError(f"No se encontró el GGUF. Configura GGUF_REPO/GGUF_FILE o sube el .gguf. Último error: {last_err}")
 def _ensure_llm():
     """Inicializa llama.cpp en el MISMO worker; nunca lanza excepción hacia arriba."""

 # • Chat: BioMedLM-7B en formato GGUF con llama.cpp (también SOLO en worker GPU).
 # • Sin llamadas GPU anidadas; todo atrapado con try/except para evitar RuntimeError genéricos.
 # • Prompt reforzado en español y generación determinista (sensible a OCR y sin alucinaciones).
+# • Config por variables: GGUF_REPO, GGUF_FILE, GGUF_LOCAL_PATH, N_CTX, N_BATCH, N_GPU_LAYERS, etc.
+# • Valores por defecto: repo público mradermacher/BioMedLM-7B-GGUF + Q4_K_M.
 # -----------------------------------------------------------------------------------------------
+import os, re, tempfile, traceback, glob
 import gradio as gr
 import torch
 from PIL import Image
 # CONFIG (entorno)
 # =========================
 # --- BioMedLM-7B (GGUF / llama.cpp) ---
+GGUF_REPO = os.getenv("GGUF_REPO", "mradermacher/BioMedLM-7B-GGUF").strip()
+GGUF_FILE = os.getenv("GGUF_FILE", "BioMedLM-7B.Q4_K_M.gguf").strip()
+GGUF_LOCAL_PATH = os.getenv("GGUF_LOCAL_PATH", "").strip()  # ej: ./models/BioMedLM-7B.Q4_K_M.gguf
+# Candidatos comunes (si no coincide el nombre exacto)
 _GGUF_CANDIDATES = [
+    "BioMedLM-7B.Q4_K_M.gguf", "BioMedLM-7B.Q4_K_S.gguf",
+    "BioMedLM-7B.Q5_K_M.gguf", "BioMedLM-7B.Q5_K_S.gguf",
+    "BioMedLM-7B.Q6_K.gguf",   "BioMedLM-7B.Q8_0.gguf",
+    "BioMedLM-7B.IQ4_XS.gguf", "BioMedLM-7B.Q2_K.gguf",
+    "BioMedLM-7B.f16.gguf",
+    "biomedlm-7b.Q4_K_M.gguf", "biomedlm-7b.Q5_K_M.gguf",
+    "biomedlm-7b.Q8_0.gguf",   "biomedlm-7b-f16.gguf",
 ]
 GGUF_CANDIDATES = [GGUF_FILE] if GGUF_FILE else _GGUF_CANDIDATES
 # Rendimiento / memoria (ajusta según GPU del Space: T4 / A10G)
 N_CTX = int(os.getenv("N_CTX", "4096"))
 N_THREADS = int(os.getenv("N_THREADS", str(os.cpu_count() or 4)))
+N_GPU_LAYERS = int(os.getenv("N_GPU_LAYERS", "35"))  # 7B ~32 capas; 35 ≈ todas (si hay VRAM)
+N_BATCH = int(os.getenv("N_BATCH", "512"))           # 512/768/1024 según VRAM
 # Decodificación (determinista por defecto)
 GEN_TEMPERATURE = float(os.getenv("TEMPERATURE", "0.0"))
 GEN_MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "384"))
 STOP_SEQS = ["\n###", "\nUser:", "\nAssistant:", "\nUsuario:", "\nAsistente:"]
+# Token para repos privados en HF (opcional; este repo es público)
 HF_TOKEN = os.getenv("HF_TOKEN")
 # DeepSeek-OCR: fija una revisión/commit opcional para evitar cambios inesperados
 _llm_name = None
 def _download_gguf_path():
+    """
+    Prioridad de búsqueda:
+    0) GGUF_LOCAL_PATH (ruta directa)
+    1) Archivo subido al Space (cwd): GGUF_FILE exacto o cualquier *.gguf
+    2) Repo de HF: GGUF_REPO + GGUF_FILE (o candidatos)
+    """
+    # 0) Ruta local explícita
+    if GGUF_LOCAL_PATH:
+        p = os.path.abspath(GGUF_LOCAL_PATH)
+        if os.path.exists(p):
+            return p, p
+        raise RuntimeError(f"GGUF_LOCAL_PATH apunta a un archivo inexistente: {p}")
+    # 1) Archivo subido al Space
+    if GGUF_FILE:
+        local_path = os.path.join(os.getcwd(), GGUF_FILE)
+        if os.path.exists(local_path):
+            return local_path, f"./{GGUF_FILE}"
+    found = sorted(glob.glob(os.path.join(os.getcwd(), "*.gguf")))
+    if found:
+        return found[0], f"./{os.path.basename(found[0])}"
+    # 2) HF repo
     last_err = None
     if GGUF_REPO:
+        candidates = [GGUF_FILE] if GGUF_FILE else _GGUF_CANDIDATES
+        for fname in candidates:
             try:
                 path = hf_hub_download(repo_id=GGUF_REPO, filename=fname, token=HF_TOKEN)
                 return path, f"{GGUF_REPO}:{fname}"
             except Exception as e:
                 last_err = e
+    raise RuntimeError("No se encontró el GGUF. "
+                       "Sube el .gguf (Files) y pon GGUF_FILE, o define GGUF_REPO+GGUF_FILE, "
+                       f"o usa GGUF_LOCAL_PATH. Último error HF: {last_err}")
 def _ensure_llm():
     """Inicializa llama.cpp en el MISMO worker; nunca lanza excepción hacia arriba."""