IFMedTechdemo commited on
Commit
6c2c7e0
·
verified ·
1 Parent(s): 58abc47

Update app.py

Browse files

added complete version 4 code and changed py_modules part .

Files changed (1) hide show
  1. app.py +724 -97
app.py CHANGED
@@ -1,4 +1,5 @@
1
- ################################### version 3
 
2
 
3
  import os
4
  import time
@@ -9,7 +10,7 @@ import gradio as gr
9
  import spaces
10
  import torch
11
  from PIL import Image
12
- import pandas as pd # for reading Excel and debugging
13
 
14
  from transformers import (
15
  Qwen3VLForConditionalGeneration,
@@ -21,9 +22,9 @@ from transformers import (
21
  from gradio.themes import Soft
22
  from gradio.themes.utils import colors, fonts, sizes
23
 
24
- # -----------------------------
25
- # Character Error Rate (CER)
26
- # -----------------------------
27
 
28
 
29
  def levenshtein(a: str, b: str) -> int:
@@ -54,24 +55,23 @@ def character_error_rate(pred: str, target: str) -> float:
54
  return (distance / len(target)) * 100 if len(target) > 0 else 0.0
55
 
56
 
57
- # -----------------------------
58
- # Private repo: dynamic import
59
- # -----------------------------
60
  import importlib.util
61
  from huggingface_hub import hf_hub_download
62
 
63
  REPO_ID = "IFMedTech/Medibot_OCR_model" # private backend repo
64
 
65
- # Map filenames to exported class names
66
- PY_MODULES = {
67
- "ner.py": "ClinicalNER", # NER is only applied for Dots.OCR output
68
- "tfidf_phonetic.py": "TfidfPhoneticMatcher",
69
  "symspell_matcher.py": "SymSpellMatcher",
70
- "rapidfuzz_matcher.py": "RapidFuzzMatcher",
71
- # 'Medibot_Drugs_Cleaned_Updated.xlsx' is data, not a module
72
  }
73
 
74
- HF_TOKEN = os.environ.get("HUGGINGFACE_TOKEN")
75
 
76
 
77
  def _dynamic_import(module_path: str, class_name: str):
@@ -81,40 +81,61 @@ def _dynamic_import(module_path: str, class_name: str):
81
  return getattr(module, class_name)
82
 
83
 
84
- # Load private classes and Excel dictionary (once at import time)
85
  priv_classes: Dict[str, Any] = {}
86
  drug_xlsx_path: Optional[str] = None
87
- try:
88
- if HF_TOKEN is None:
89
- print("[Private] WARNING: HUGGINGFACE_TOKEN not set; NER/Spell-check will be unavailable.")
90
- else:
91
- for fname, cls in PY_MODULES.items():
92
- path = hf_hub_download(repo_id=REPO_ID, filename=fname, token=HF_TOKEN)
93
- if cls:
94
- priv_classes[cls] = _dynamic_import(path, cls)
95
- print(f"[Private] Loaded class: {cls} from {fname}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  drug_xlsx_path = hf_hub_download(
97
  repo_id=REPO_ID,
98
  filename="Medibot_Drugs_Cleaned_Updated.xlsx",
99
  token=HF_TOKEN,
 
100
  )
101
  print(f"[Private] Downloaded Excel at: {drug_xlsx_path}")
102
 
103
- # DEBUG: read Excel once and print its shape
104
- try:
105
- df_debug = pd.read_excel(drug_xlsx_path)
106
- print(f"[Private] Excel loaded successfully. Shape: {df_debug.shape}")
107
- except Exception as e:
108
- print(f"[Private] ERROR reading Excel for debug: {e}")
109
-
110
- except Exception as e:
111
- print(f"[Private] ERROR loading private backend: {e}")
112
- priv_classes = {}
113
- drug_xlsx_path = None
114
-
115
- # ----------------------------
116
- # THEME
117
- # ----------------------------
118
  colors.steel_blue = colors.Color(
119
  name="steel_blue",
120
  c50="#EBF3F8",
@@ -194,9 +215,9 @@ css = """
194
  #output-title h2 { font-size: 2.1em !important; }
195
  """
196
 
197
- # ----------------------------
198
- # RUNTIME / DEVICE
199
- # ----------------------------
200
  os.environ.setdefault("CUDA_VISIBLE_DEVICES", "0")
201
  print("CUDA_VISIBLE_DEVICES =", os.environ.get("CUDA_VISIBLE_DEVICES"))
202
  print("torch.__version__ =", torch.__version__)
@@ -214,9 +235,9 @@ if use_cuda:
214
  DTYPE_FP16 = torch.float16 if use_cuda else torch.float32
215
  DTYPE_BF16 = torch.bfloat16 if use_cuda else torch.float32
216
 
217
- # ----------------------------
218
- # OCR MODELS: Chandra-OCR + Dots.OCR
219
- # ----------------------------
220
  # 1) Chandra-OCR (Qwen3VL)
221
  MODEL_ID_V = "datalab-to/chandra"
222
  processor_v = AutoProcessor.from_pretrained(MODEL_ID_V, trust_remote_code=True)
@@ -246,14 +267,15 @@ model_d = AutoModelForCausalLM.from_pretrained(
246
  if not use_cuda:
247
  model_d.to(device)
248
 
249
- # ----------------------------
250
- # GENERATION (OCR → NER (Dots only) → Spell-check + CER)
251
- # ----------------------------
 
252
  MAX_MAX_NEW_TOKENS = 4096
253
  DEFAULT_MAX_NEW_TOKENS = 2048
254
 
255
 
256
- @spaces.GPU # you can add duration=... if needed, e.g. @spaces.GPU(duration=240)
257
  def generate_image(
258
  model_name: str,
259
  text: str,
@@ -266,19 +288,17 @@ def generate_image(
266
  spell_algo: str,
267
  ):
268
  """
269
- 1) Stream OCR tokens to Raw output (unchanged).
270
- 2) If model_name == 'Dots.OCR', run ClinicalNER → list[str] meds.
271
- For Chandra-OCR, skip NER.
272
- 3) Apply selected spell-check (TF-IDF+Phonetic / SymSpell / RapidFuzz)
273
- using Excel dict, and compute CER for each suggestion.
274
- 4) Markdown shows OCR text, NER list (if any), and spell-check top-5
275
- suggestions with scores and CER.
276
  """
277
  if image is None:
278
- # Two outputs: raw textbox + markdown
279
  yield "Please upload an image.", "Please upload an image."
280
  return
281
 
 
282
  if model_name == "Chandra-OCR":
283
  processor, model = processor_v, model_v
284
  elif model_name == "Dots.OCR":
@@ -287,7 +307,7 @@ def generate_image(
287
  yield "Invalid model selected.", "Invalid model selected."
288
  return
289
 
290
- # Build prompt from text parameter (kept via gr.State)
291
  messages = [
292
  {
293
  "role": "user",
@@ -328,76 +348,104 @@ def generate_image(
328
  thread = Thread(target=model.generate, kwargs=gen_kwargs)
329
  thread.start()
330
 
331
- # 1) Live OCR streaming to Raw (and mirror to Markdown during stream)
332
  buffer = ""
333
  for new_text in streamer:
334
  buffer += new_text.replace("<|im_end|>", "")
335
  time.sleep(0.01)
336
- # During streaming, just show the raw text in both components
337
- yield buffer, buffer
338
 
339
- # Final raw text
340
  final_ocr_text = buffer.strip()
341
 
342
- # -------------------------
343
- # 2) Clinical NER (Dots.OCR only)
344
- # -------------------------
345
  meds: List[str] = []
 
346
  if model_name == "Dots.OCR":
 
347
  try:
348
  if "ClinicalNER" in priv_classes and HF_TOKEN is not None:
349
  ClinicalNER = priv_classes["ClinicalNER"]
350
- ner = ClinicalNER(token=HF_TOKEN) # model_id can be passed if needed
351
  ner_output = ner(final_ocr_text) or []
352
- # Expecting list[str]; be robust:
353
- meds = [m.strip() for m in ner_output if isinstance(m, str) and m.strip()]
354
- print("[NER] Extracted meds (from ClinicalNER):", meds)
 
 
 
355
  else:
356
- print("[NER] ClinicalNER not available or no HF token.")
357
  except Exception as e:
358
  print(f"[NER] Error running ClinicalNER: {e}")
359
 
360
- # Fallback: if no meds found (or Chandra-OCR), derive meds from OCR lines
361
- if not meds:
362
- meds = [line.strip() for line in final_ocr_text.splitlines() if line.strip()]
363
- print("[NER] Using line-based meds fallback, count:", len(meds))
 
 
 
 
 
 
 
 
 
 
 
 
 
364
 
365
  print("[DEBUG] meds count:", len(meds))
366
  print("[DEBUG] drug_xlsx_path in generate_image:", drug_xlsx_path)
367
 
368
- # -------------------------
369
- # Build Markdown: OCR text + NER section
370
- # -------------------------
371
  md = "### Raw OCR Output\n"
372
  md += "```\n" + (final_ocr_text or "(empty)") + "\n```\n"
373
 
374
- md += "\n---\n### Clinical NER (Medications)\n"
375
  if meds:
376
  for m in meds:
377
  md += f"- {m}\n"
378
  else:
379
  md += "- None detected\n"
380
 
381
- # -------------------------
382
- # 3) Spell-check (med list) with CER
383
- # -------------------------
384
  spell_section = "\n---\n### Spell-check suggestions (" + spell_algo + ")\n"
385
  corr: Dict[str, List] = {}
386
 
 
 
 
387
  try:
388
  if meds and drug_xlsx_path:
389
-
390
  try:
391
- df_debug = pd.read_excel(drug_xlsx_path)
392
- print(f"[Private] Excel loaded successfully. Shape: {df_debug.shape}")
 
 
 
 
 
 
 
393
  except Exception as e:
394
- print(f"[Private] ERROR reading Excel for debug: {e}")
 
395
 
396
-
397
  if (
398
  spell_algo == "TF-IDF + Phonetic"
399
  and "TfidfPhoneticMatcher" in priv_classes
400
  ):
 
401
  Cls = priv_classes["TfidfPhoneticMatcher"]
402
  checker = Cls(
403
  xlsx_path=drug_xlsx_path,
@@ -408,6 +456,7 @@ def generate_image(
408
  corr = checker.match_list(meds, top_k=5, tfidf_threshold=0.15)
409
 
410
  elif spell_algo == "SymSpell" and "SymSpellMatcher" in priv_classes:
 
411
  Cls = priv_classes["SymSpellMatcher"]
412
  checker = Cls(
413
  xlsx_path=drug_xlsx_path,
@@ -418,17 +467,27 @@ def generate_image(
418
  corr = checker.match_list(meds, top_k=5, min_score=0.4)
419
 
420
  elif spell_algo == "RapidFuzz" and "RapidFuzzMatcher" in priv_classes:
 
421
  Cls = priv_classes["RapidFuzzMatcher"]
422
  checker = Cls(xlsx_path=drug_xlsx_path, column="Combined_Drugs")
423
  corr = checker.match_list(meds, top_k=5, threshold=70.0)
 
424
  else:
425
- spell_section += "- Spell-check backend unavailable (no matcher class).\n"
 
 
 
426
  else:
427
  if not meds:
428
  spell_section += "- No medications extracted (empty med list).\n"
429
  if not drug_xlsx_path:
430
- spell_section += "- Drug Excel dictionary path missing (drug_xlsx_path is None).\n"
 
 
 
 
431
  except Exception as e:
 
432
  spell_section += f"- Spell-check error: {e}\n"
433
 
434
  # Format suggestions (top-5 per med, with scores + CER)
@@ -447,14 +506,13 @@ def generate_image(
447
 
448
  final_md = md + spell_section
449
 
450
- # 4) Final yield: raw unchanged; Markdown with NER + spell-check + CER
451
  yield final_ocr_text, final_md
452
 
453
 
454
- # ----------------------------
455
- # UI
456
- # ----------------------------
457
- # IMPORTANT: examples must match the number of inputs (here: only image)
458
  image_examples = [
459
  ["examples/3.jpg"],
460
  ["examples/1.jpg"],
@@ -477,7 +535,6 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
477
  label="Example Images",
478
  )
479
 
480
- # Spell-check selection
481
  spell_choice = gr.Radio(
482
  choices=["TF-IDF + Phonetic", "SymSpell", "RapidFuzz"],
483
  label="Select Spell-check Approach",
@@ -538,7 +595,7 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
538
  value="Chandra-OCR",
539
  )
540
 
541
- # Hard-coded instruction text, passed as gr.State to match the 'text' parameter
542
  query_state = gr.State(
543
  "Extract medicine or drugs names along with dosage amount or quantity"
544
  )
@@ -566,7 +623,577 @@ if __name__ == "__main__":
566
 
567
 
568
 
569
- ##################################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
570
 
571
  # import os
572
 
 
1
+ ###################################### version 4 #########################################
2
+
3
 
4
  import os
5
  import time
 
10
  import spaces
11
  import torch
12
  from PIL import Image
13
+ import pandas as pd # Excel read + debug
14
 
15
  from transformers import (
16
  Qwen3VLForConditionalGeneration,
 
22
  from gradio.themes import Soft
23
  from gradio.themes.utils import colors, fonts, sizes
24
 
25
+ # ============================================================
26
+ # Character Error Rate (CER)
27
+ # ============================================================
28
 
29
 
30
  def levenshtein(a: str, b: str) -> int:
 
55
  return (distance / len(target)) * 100 if len(target) > 0 else 0.0
56
 
57
 
58
+ # ============================================================
59
+ # Private repo: dynamic import + Excel download
60
+ # ============================================================
61
  import importlib.util
62
  from huggingface_hub import hf_hub_download
63
 
64
  REPO_ID = "IFMedTech/Medibot_OCR_model" # private backend repo
65
 
66
+ # Filenames in the repo → class names they define
67
+ PY_MODULES: Dict[str, str] = {
68
+ "clinical_NER.py": "ClinicalNER",
69
+ "tf_idf_phonetic.py": "TfidfPhoneticMatcher",
70
  "symspell_matcher.py": "SymSpellMatcher",
71
+ "rapid_fuzz_matcher.py": "RapidFuzzMatcher",
 
72
  }
73
 
74
+ HF_TOKEN = os.environ.get("HUGGINGFACE_TOKEN") # must be set in Space secrets
75
 
76
 
77
  def _dynamic_import(module_path: str, class_name: str):
 
81
  return getattr(module, class_name)
82
 
83
 
 
84
  priv_classes: Dict[str, Any] = {}
85
  drug_xlsx_path: Optional[str] = None
86
+ BACKEND_INIT_ERROR: Optional[str] = None
87
+
88
+ print("[Private] HF_TOKEN present?:", HF_TOKEN is not None)
89
+
90
+ if HF_TOKEN is None:
91
+ BACKEND_INIT_ERROR = "HUGGINGFACE_TOKEN env var is not set in this Space."
92
+ print("[Private] WARNING:", BACKEND_INIT_ERROR)
93
+ else:
94
+ print(f"[Private] Using repo: {REPO_ID}")
95
+
96
+ # 1) Load python modules (best-effort: failure of one file will not block others)
97
+ for fname, cls_name in PY_MODULES.items():
98
+ try:
99
+ print(f"[Private] Downloading module file: {fname}")
100
+ path = hf_hub_download(
101
+ repo_id=REPO_ID,
102
+ filename=fname,
103
+ token=HF_TOKEN,
104
+ repo_type="model",
105
+ )
106
+ priv_classes[cls_name] = _dynamic_import(path, cls_name)
107
+ print(f"[Private] Loaded class {cls_name} from {fname}")
108
+ except Exception as e:
109
+ msg = f"Failed to load {fname}: {e}"
110
+ print("[Private]", msg)
111
+ BACKEND_INIT_ERROR = (BACKEND_INIT_ERROR or "") + f" | {msg}"
112
+
113
+ # 2) Load Excel dictionary
114
+ try:
115
+ print("[Private] Downloading Excel file: Medibot_Drugs_Cleaned_Updated.xlsx")
116
  drug_xlsx_path = hf_hub_download(
117
  repo_id=REPO_ID,
118
  filename="Medibot_Drugs_Cleaned_Updated.xlsx",
119
  token=HF_TOKEN,
120
+ repo_type="model",
121
  )
122
  print(f"[Private] Downloaded Excel at: {drug_xlsx_path}")
123
 
124
+ # Debug: verify read
125
+ df_debug = pd.read_excel(drug_xlsx_path, nrows=3)
126
+ print(
127
+ f"[Private] Excel loaded successfully. "
128
+ f"Shape={df_debug.shape}, cols={list(df_debug.columns)}"
129
+ )
130
+ except Exception as e:
131
+ msg = f"ERROR loading Excel: {e}"
132
+ print("[Private]", msg)
133
+ BACKEND_INIT_ERROR = (BACKEND_INIT_ERROR or "") + f" | {msg}"
134
+ drug_xlsx_path = None
135
+
136
+ # ============================================================
137
+ # THEME
138
+ # ============================================================
139
  colors.steel_blue = colors.Color(
140
  name="steel_blue",
141
  c50="#EBF3F8",
 
215
  #output-title h2 { font-size: 2.1em !important; }
216
  """
217
 
218
+ # ============================================================
219
+ # RUNTIME / DEVICE
220
+ # ============================================================
221
  os.environ.setdefault("CUDA_VISIBLE_DEVICES", "0")
222
  print("CUDA_VISIBLE_DEVICES =", os.environ.get("CUDA_VISIBLE_DEVICES"))
223
  print("torch.__version__ =", torch.__version__)
 
235
  DTYPE_FP16 = torch.float16 if use_cuda else torch.float32
236
  DTYPE_BF16 = torch.bfloat16 if use_cuda else torch.float32
237
 
238
+ # ============================================================
239
+ # OCR MODELS: Chandra-OCR + Dots.OCR
240
+ # ============================================================
241
  # 1) Chandra-OCR (Qwen3VL)
242
  MODEL_ID_V = "datalab-to/chandra"
243
  processor_v = AutoProcessor.from_pretrained(MODEL_ID_V, trust_remote_code=True)
 
267
  if not use_cuda:
268
  model_d.to(device)
269
 
270
+ # ============================================================
271
+ # GENERATION (OCR → Med extraction → Spell-check + CER)
272
+ # ClinicalNER is used ONLY for Dots.OCR.
273
+ # ============================================================
274
  MAX_MAX_NEW_TOKENS = 4096
275
  DEFAULT_MAX_NEW_TOKENS = 2048
276
 
277
 
278
+ @spaces.GPU # you can add duration=... if you hit timeouts
279
  def generate_image(
280
  model_name: str,
281
  text: str,
 
288
  spell_algo: str,
289
  ):
290
  """
291
+ 1) Stream OCR tokens to Raw output.
292
+ 2) For Dots.OCR: run ClinicalNER → meds list (with fallback to line-based).
293
+ For Chandra-OCR: DO NOT call ClinicalNER; meds from OCR lines only.
294
+ 3) Apply selected spell-check algorithm on meds using Excel dictionary.
295
+ 4) Compute CER for each suggestion and display in markdown.
 
 
296
  """
297
  if image is None:
 
298
  yield "Please upload an image.", "Please upload an image."
299
  return
300
 
301
+ # Choose processor/model
302
  if model_name == "Chandra-OCR":
303
  processor, model = processor_v, model_v
304
  elif model_name == "Dots.OCR":
 
307
  yield "Invalid model selected.", "Invalid model selected."
308
  return
309
 
310
+ # Prompt (text is provided via gr.State)
311
  messages = [
312
  {
313
  "role": "user",
 
348
  thread = Thread(target=model.generate, kwargs=gen_kwargs)
349
  thread.start()
350
 
351
+ # 1) Live OCR streaming : show raw text while generating
352
  buffer = ""
353
  for new_text in streamer:
354
  buffer += new_text.replace("<|im_end|>", "")
355
  time.sleep(0.01)
356
+ yield buffer, buffer # two outputs: raw + md (same during stream)
 
357
 
 
358
  final_ocr_text = buffer.strip()
359
 
360
+ # --------------------------------------------------------
361
+ # 2) Medications extraction
362
+ # --------------------------------------------------------
363
  meds: List[str] = []
364
+
365
  if model_name == "Dots.OCR":
366
+ # ClinicalNER ONLY for Dots.OCR
367
  try:
368
  if "ClinicalNER" in priv_classes and HF_TOKEN is not None:
369
  ClinicalNER = priv_classes["ClinicalNER"]
370
+ ner = ClinicalNER(token=HF_TOKEN)
371
  ner_output = ner(final_ocr_text) or []
372
+ meds = [
373
+ m.strip()
374
+ for m in ner_output
375
+ if isinstance(m, str) and m.strip()
376
+ ]
377
+ print("[NER] (Dots.OCR) ClinicalNER meds:", meds)
378
  else:
379
+ print("[NER] ClinicalNER unavailable or missing HF token; skipping.")
380
  except Exception as e:
381
  print(f"[NER] Error running ClinicalNER: {e}")
382
 
383
+ # Fallback if ClinicalNER returns nothing
384
+ if not meds:
385
+ meds = [
386
+ line.strip()
387
+ for line in final_ocr_text.splitlines()
388
+ if line.strip()
389
+ ]
390
+ print("[NER] (Dots.OCR) Fallback to lines, count:", len(meds))
391
+
392
+ elif model_name == "Chandra-OCR":
393
+ # NO ClinicalNER for Chandra; just use text lines
394
+ meds = [
395
+ line.strip()
396
+ for line in final_ocr_text.splitlines()
397
+ if line.strip()
398
+ ]
399
+ print("[NER] (Chandra-OCR) Line-based meds only, count:", len(meds))
400
 
401
  print("[DEBUG] meds count:", len(meds))
402
  print("[DEBUG] drug_xlsx_path in generate_image:", drug_xlsx_path)
403
 
404
+ # --------------------------------------------------------
405
+ # 3) Build Markdown base: OCR text + med list
406
+ # --------------------------------------------------------
407
  md = "### Raw OCR Output\n"
408
  md += "```\n" + (final_ocr_text or "(empty)") + "\n```\n"
409
 
410
+ md += "\n---\n### Medications (extracted)\n"
411
  if meds:
412
  for m in meds:
413
  md += f"- {m}\n"
414
  else:
415
  md += "- None detected\n"
416
 
417
+ # --------------------------------------------------------
418
+ # 4) Spell-check (med list) with CER
419
+ # --------------------------------------------------------
420
  spell_section = "\n---\n### Spell-check suggestions (" + spell_algo + ")\n"
421
  corr: Dict[str, List] = {}
422
 
423
+ if BACKEND_INIT_ERROR:
424
+ spell_section += f"- [DEBUG] Backend init error: {BACKEND_INIT_ERROR}\n"
425
+
426
  try:
427
  if meds and drug_xlsx_path:
428
+ # Optional Excel debug read
429
  try:
430
+ df_dbg = pd.read_excel(drug_xlsx_path, nrows=5)
431
+ print(
432
+ f"[Spell DEBUG] Excel read OK: path={drug_xlsx_path}, "
433
+ f"shape={df_dbg.shape}, cols={list(df_dbg.columns)}"
434
+ )
435
+ spell_section += (
436
+ f"- [DEBUG] Excel read OK; shape={df_dbg.shape}, "
437
+ f"cols={list(df_dbg.columns)}\n"
438
+ )
439
  except Exception as e:
440
+ print(f"[Spell DEBUG] ERROR reading Excel in generate_image: {e}")
441
+ spell_section += f"- [DEBUG] Excel read error: {e}\n"
442
 
443
+ # Pick matcher based on spell_algo
444
  if (
445
  spell_algo == "TF-IDF + Phonetic"
446
  and "TfidfPhoneticMatcher" in priv_classes
447
  ):
448
+ print("[Spell DEBUG] Using TfidfPhoneticMatcher")
449
  Cls = priv_classes["TfidfPhoneticMatcher"]
450
  checker = Cls(
451
  xlsx_path=drug_xlsx_path,
 
456
  corr = checker.match_list(meds, top_k=5, tfidf_threshold=0.15)
457
 
458
  elif spell_algo == "SymSpell" and "SymSpellMatcher" in priv_classes:
459
+ print("[Spell DEBUG] Using SymSpellMatcher")
460
  Cls = priv_classes["SymSpellMatcher"]
461
  checker = Cls(
462
  xlsx_path=drug_xlsx_path,
 
467
  corr = checker.match_list(meds, top_k=5, min_score=0.4)
468
 
469
  elif spell_algo == "RapidFuzz" and "RapidFuzzMatcher" in priv_classes:
470
+ print("[Spell DEBUG] Using RapidFuzzMatcher")
471
  Cls = priv_classes["RapidFuzzMatcher"]
472
  checker = Cls(xlsx_path=drug_xlsx_path, column="Combined_Drugs")
473
  corr = checker.match_list(meds, top_k=5, threshold=70.0)
474
+
475
  else:
476
+ spell_section += (
477
+ "- Spell-check backend unavailable "
478
+ "(no matcher class for selected algorithm).\n"
479
+ )
480
  else:
481
  if not meds:
482
  spell_section += "- No medications extracted (empty med list).\n"
483
  if not drug_xlsx_path:
484
+ spell_section += (
485
+ "- Drug Excel dictionary path missing "
486
+ "(drug_xlsx_path is None).\n"
487
+ )
488
+
489
  except Exception as e:
490
+ print(f"[Spell DEBUG] Spell-check error: {e}")
491
  spell_section += f"- Spell-check error: {e}\n"
492
 
493
  # Format suggestions (top-5 per med, with scores + CER)
 
506
 
507
  final_md = md + spell_section
508
 
509
+ # Final yield: raw OCR text + full markdown
510
  yield final_ocr_text, final_md
511
 
512
 
513
+ # ============================================================
514
+ # UI
515
+ # ============================================================
 
516
  image_examples = [
517
  ["examples/3.jpg"],
518
  ["examples/1.jpg"],
 
535
  label="Example Images",
536
  )
537
 
 
538
  spell_choice = gr.Radio(
539
  choices=["TF-IDF + Phonetic", "SymSpell", "RapidFuzz"],
540
  label="Select Spell-check Approach",
 
595
  value="Chandra-OCR",
596
  )
597
 
598
+ # Hard-coded query text (passed into the 'text' parameter)
599
  query_state = gr.State(
600
  "Extract medicine or drugs names along with dosage amount or quantity"
601
  )
 
623
 
624
 
625
 
626
+
627
+
628
+ ################################### version 3 ########################################
629
+
630
+ # import os
631
+ # import time
632
+ # from threading import Thread
633
+ # from typing import Iterable, Dict, Any, Optional, List
634
+
635
+ # import gradio as gr
636
+ # import spaces
637
+ # import torch
638
+ # from PIL import Image
639
+ # import pandas as pd # for reading Excel and debugging
640
+
641
+ # from transformers import (
642
+ # Qwen3VLForConditionalGeneration,
643
+ # AutoModelForCausalLM,
644
+ # AutoProcessor,
645
+ # TextIteratorStreamer,
646
+ # )
647
+
648
+ # from gradio.themes import Soft
649
+ # from gradio.themes.utils import colors, fonts, sizes
650
+
651
+ # # -----------------------------
652
+ # # Character Error Rate (CER)
653
+ # # -----------------------------
654
+
655
+
656
+ # def levenshtein(a: str, b: str) -> int:
657
+ # """Levenshtein distance to calculate CER."""
658
+ # a, b = a.lower(), b.lower()
659
+ # if a == b:
660
+ # return 0
661
+ # if not a:
662
+ # return len(b)
663
+ # if not b:
664
+ # return len(a)
665
+ # dp = list(range(len(b) + 1))
666
+ # for i, ca in enumerate(a, 1):
667
+ # prev = dp[0]
668
+ # dp[0] = i
669
+ # for j, cb in enumerate(b, 1):
670
+ # cur = dp[j]
671
+ # cost = 0 if ca == cb else 1
672
+ # dp[j] = min(dp[j] + 1, dp[j - 1] + 1, prev + cost)
673
+ # prev = cur
674
+ # return dp[-1]
675
+
676
+
677
+ # def character_error_rate(pred: str, target: str) -> float:
678
+ # """Calculate the Character Error Rate (CER) in percent."""
679
+ # target = target or ""
680
+ # distance = levenshtein(pred, target)
681
+ # return (distance / len(target)) * 100 if len(target) > 0 else 0.0
682
+
683
+
684
+ # # -----------------------------
685
+ # # Private repo: dynamic import
686
+ # # -----------------------------
687
+ # import importlib.util
688
+ # from huggingface_hub import hf_hub_download
689
+
690
+ # REPO_ID = "IFMedTech/Medibot_OCR_model" # private backend repo
691
+
692
+ # # Map filenames to exported class names
693
+ # PY_MODULES = {
694
+ # "ner.py": "ClinicalNER", # NER is only applied for Dots.OCR output
695
+ # "tfidf_phonetic.py": "TfidfPhoneticMatcher",
696
+ # "symspell_matcher.py": "SymSpellMatcher",
697
+ # "rapidfuzz_matcher.py": "RapidFuzzMatcher",
698
+ # # 'Medibot_Drugs_Cleaned_Updated.xlsx' is data, not a module
699
+ # }
700
+
701
+ # HF_TOKEN = os.environ.get("HUGGINGFACE_TOKEN")
702
+
703
+
704
+ # def _dynamic_import(module_path: str, class_name: str):
705
+ # spec = importlib.util.spec_from_file_location(class_name, module_path)
706
+ # module = importlib.util.module_from_spec(spec)
707
+ # spec.loader.exec_module(module) # type: ignore
708
+ # return getattr(module, class_name)
709
+
710
+
711
+ # # Load private classes and Excel dictionary (once at import time)
712
+ # priv_classes: Dict[str, Any] = {}
713
+ # drug_xlsx_path: Optional[str] = None
714
+ # try:
715
+ # if HF_TOKEN is None:
716
+ # print("[Private] WARNING: HUGGINGFACE_TOKEN not set; NER/Spell-check will be unavailable.")
717
+ # else:
718
+ # for fname, cls in PY_MODULES.items():
719
+ # path = hf_hub_download(repo_id=REPO_ID, filename=fname, token=HF_TOKEN)
720
+ # if cls:
721
+ # priv_classes[cls] = _dynamic_import(path, cls)
722
+ # print(f"[Private] Loaded class: {cls} from {fname}")
723
+ # drug_xlsx_path = hf_hub_download(
724
+ # repo_id=REPO_ID,
725
+ # filename="Medibot_Drugs_Cleaned_Updated.xlsx",
726
+ # token=HF_TOKEN,
727
+ # )
728
+ # print(f"[Private] Downloaded Excel at: {drug_xlsx_path}")
729
+
730
+ # # DEBUG: read Excel once and print its shape
731
+ # try:
732
+ # df_debug = pd.read_excel(drug_xlsx_path)
733
+ # print(f"[Private] Excel loaded successfully. Shape: {df_debug.shape}")
734
+ # except Exception as e:
735
+ # print(f"[Private] ERROR reading Excel for debug: {e}")
736
+
737
+ # except Exception as e:
738
+ # print(f"[Private] ERROR loading private backend: {e}")
739
+ # priv_classes = {}
740
+ # drug_xlsx_path = None
741
+
742
+ # # ----------------------------
743
+ # # THEME
744
+ # # ----------------------------
745
+ # colors.steel_blue = colors.Color(
746
+ # name="steel_blue",
747
+ # c50="#EBF3F8",
748
+ # c100="#D3E5F0",
749
+ # c200="#A8CCE1",
750
+ # c300="#7DB3D2",
751
+ # c400="#529AC3",
752
+ # c500="#4682B4",
753
+ # c600="#3E72A0",
754
+ # c700="#36638C",
755
+ # c800="#2E5378",
756
+ # c900="#264364",
757
+ # c950="#1E3450",
758
+ # )
759
+
760
+
761
+ # class SteelBlueTheme(Soft):
762
+ # def __init__(
763
+ # self,
764
+ # *,
765
+ # primary_hue: colors.Color | str = colors.gray,
766
+ # secondary_hue: colors.Color | str = colors.steel_blue,
767
+ # neutral_hue: colors.Color | str = colors.slate,
768
+ # text_size: sizes.Size | str = sizes.text_lg,
769
+ # font: fonts.Font | str | Iterable[fonts.Font | str] = (
770
+ # fonts.GoogleFont("Outfit"),
771
+ # "Arial",
772
+ # "sans-serif",
773
+ # ),
774
+ # font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
775
+ # fonts.GoogleFont("IBM Plex Mono"),
776
+ # "ui-monospace",
777
+ # "monospace",
778
+ # ),
779
+ # ):
780
+ # super().__init__(
781
+ # primary_hue=primary_hue,
782
+ # secondary_hue=secondary_hue,
783
+ # neutral_hue=neutral_hue,
784
+ # text_size=text_size,
785
+ # font=font,
786
+ # font_mono=font_mono,
787
+ # )
788
+ # super().set(
789
+ # background_fill_primary="*primary_50",
790
+ # background_fill_primary_dark="*primary_900",
791
+ # body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
792
+ # body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
793
+ # button_primary_text_color="white",
794
+ # button_primary_text_color_hover="white",
795
+ # button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
796
+ # button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
797
+ # button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_800)",
798
+ # button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_500)",
799
+ # button_secondary_text_color="black",
800
+ # button_secondary_text_color_hover="white",
801
+ # button_secondary_background_fill="linear-gradient(90deg, *primary_300, *primary_300)",
802
+ # button_secondary_background_fill_hover="linear-gradient(90deg, *primary_400, *primary_400)",
803
+ # button_secondary_background_fill_dark="linear-gradient(90deg, *primary_500, *primary_600)",
804
+ # button_secondary_background_fill_hover_dark="linear-gradient(90deg, *primary_500, *primary_500)",
805
+ # slider_color="*secondary_500",
806
+ # slider_color_dark="*secondary_600",
807
+ # block_title_text_weight="600",
808
+ # block_border_width="3px",
809
+ # block_shadow="*shadow_drop_lg",
810
+ # button_primary_shadow="*shadow_drop_lg",
811
+ # button_large_padding="11px",
812
+ # color_accent_soft="*primary_100",
813
+ # block_label_background_fill="*primary_200",
814
+ # )
815
+
816
+
817
+ # steel_blue_theme = SteelBlueTheme()
818
+
819
+ # css = """
820
+ # #main-title h1 { font-size: 2.3em !important; }
821
+ # #output-title h2 { font-size: 2.1em !important; }
822
+ # """
823
+
824
+ # # ----------------------------
825
+ # # RUNTIME / DEVICE
826
+ # # ----------------------------
827
+ # os.environ.setdefault("CUDA_VISIBLE_DEVICES", "0")
828
+ # print("CUDA_VISIBLE_DEVICES =", os.environ.get("CUDA_VISIBLE_DEVICES"))
829
+ # print("torch.__version__ =", torch.__version__)
830
+ # print("torch.version.cuda =", torch.version.cuda)
831
+ # print("cuda available =", torch.cuda.is_available())
832
+ # print("cuda device count =", torch.cuda.device_count())
833
+ # if torch.cuda.is_available():
834
+ # print("using device =", torch.cuda.get_device_name(0))
835
+
836
+ # use_cuda = torch.cuda.is_available()
837
+ # device = torch.device("cuda:0" if use_cuda else "cpu")
838
+ # if use_cuda:
839
+ # torch.backends.cudnn.benchmark = True
840
+
841
+ # DTYPE_FP16 = torch.float16 if use_cuda else torch.float32
842
+ # DTYPE_BF16 = torch.bfloat16 if use_cuda else torch.float32
843
+
844
+ # # ----------------------------
845
+ # # OCR MODELS: Chandra-OCR + Dots.OCR
846
+ # # ----------------------------
847
+ # # 1) Chandra-OCR (Qwen3VL)
848
+ # MODEL_ID_V = "datalab-to/chandra"
849
+ # processor_v = AutoProcessor.from_pretrained(MODEL_ID_V, trust_remote_code=True)
850
+ # model_v = Qwen3VLForConditionalGeneration.from_pretrained(
851
+ # MODEL_ID_V, trust_remote_code=True, torch_dtype=DTYPE_FP16
852
+ # ).to(device).eval()
853
+
854
+ # # 2) Dots.OCR (flash_attn2 if available, else SDPA)
855
+ # MODEL_PATH_D = "prithivMLmods/Dots.OCR-Latest-BF16"
856
+ # processor_d = AutoProcessor.from_pretrained(MODEL_PATH_D, trust_remote_code=True)
857
+ # attn_impl = "sdpa"
858
+ # try:
859
+ # import flash_attn # noqa: F401
860
+
861
+ # if use_cuda:
862
+ # attn_impl = "flash_attention_2"
863
+ # except Exception:
864
+ # attn_impl = "sdpa"
865
+
866
+ # model_d = AutoModelForCausalLM.from_pretrained(
867
+ # MODEL_PATH_D,
868
+ # attn_implementation=attn_impl,
869
+ # torch_dtype=DTYPE_BF16,
870
+ # device_map="auto" if use_cuda else None,
871
+ # trust_remote_code=True,
872
+ # ).eval()
873
+ # if not use_cuda:
874
+ # model_d.to(device)
875
+
876
+ # # ----------------------------
877
+ # # GENERATION (OCR → NER (Dots only) → Spell-check + CER)
878
+ # # ----------------------------
879
+ # MAX_MAX_NEW_TOKENS = 4096
880
+ # DEFAULT_MAX_NEW_TOKENS = 2048
881
+
882
+
883
+ # @spaces.GPU # you can add duration=... if needed, e.g. @spaces.GPU(duration=240)
884
+ # def generate_image(
885
+ # model_name: str,
886
+ # text: str,
887
+ # image: Image.Image,
888
+ # max_new_tokens: int,
889
+ # temperature: float,
890
+ # top_p: float,
891
+ # top_k: int,
892
+ # repetition_penalty: float,
893
+ # spell_algo: str,
894
+ # ):
895
+ # """
896
+ # 1) Stream OCR tokens to Raw output (unchanged).
897
+ # 2) If model_name == 'Dots.OCR', run ClinicalNER → list[str] meds.
898
+ # For Chandra-OCR, skip NER.
899
+ # 3) Apply selected spell-check (TF-IDF+Phonetic / SymSpell / RapidFuzz)
900
+ # using Excel dict, and compute CER for each suggestion.
901
+ # 4) Markdown shows OCR text, NER list (if any), and spell-check top-5
902
+ # suggestions with scores and CER.
903
+ # """
904
+ # if image is None:
905
+ # # Two outputs: raw textbox + markdown
906
+ # yield "Please upload an image.", "Please upload an image."
907
+ # return
908
+
909
+ # if model_name == "Chandra-OCR":
910
+ # processor, model = processor_v, model_v
911
+ # elif model_name == "Dots.OCR":
912
+ # processor, model = processor_d, model_d
913
+ # else:
914
+ # yield "Invalid model selected.", "Invalid model selected."
915
+ # return
916
+
917
+ # # Build prompt from text parameter (kept via gr.State)
918
+ # messages = [
919
+ # {
920
+ # "role": "user",
921
+ # "content": [
922
+ # {"type": "image"},
923
+ # {"type": "text", "text": text},
924
+ # ],
925
+ # }
926
+ # ]
927
+ # prompt_full = processor.apply_chat_template(
928
+ # messages, tokenize=False, add_generation_prompt=True
929
+ # )
930
+
931
+ # # Preprocess
932
+ # inputs = processor(
933
+ # text=[prompt_full], images=[image], return_tensors="pt", padding=True
934
+ # )
935
+ # inputs = {k: (v.to(device) if hasattr(v, "to") else v) for k, v in inputs.items()}
936
+
937
+ # # Streamer
938
+ # tokenizer = getattr(processor, "tokenizer", None) or processor
939
+ # streamer = TextIteratorStreamer(
940
+ # tokenizer, skip_prompt=True, skip_special_tokens=True
941
+ # )
942
+
943
+ # gen_kwargs = dict(
944
+ # **inputs,
945
+ # streamer=streamer,
946
+ # max_new_tokens=max_new_tokens,
947
+ # do_sample=True,
948
+ # temperature=temperature,
949
+ # top_p=top_p,
950
+ # top_k=top_k,
951
+ # repetition_penalty=repetition_penalty,
952
+ # )
953
+
954
+ # # Start generation in background thread
955
+ # thread = Thread(target=model.generate, kwargs=gen_kwargs)
956
+ # thread.start()
957
+
958
+ # # 1) Live OCR streaming to Raw (and mirror to Markdown during stream)
959
+ # buffer = ""
960
+ # for new_text in streamer:
961
+ # buffer += new_text.replace("<|im_end|>", "")
962
+ # time.sleep(0.01)
963
+ # # During streaming, just show the raw text in both components
964
+ # yield buffer, buffer
965
+
966
+ # # Final raw text
967
+ # final_ocr_text = buffer.strip()
968
+
969
+ # # -------------------------
970
+ # # 2) Clinical NER (Dots.OCR only)
971
+ # # -------------------------
972
+ # meds: List[str] = []
973
+ # if model_name == "Dots.OCR":
974
+ # try:
975
+ # if "ClinicalNER" in priv_classes and HF_TOKEN is not None:
976
+ # ClinicalNER = priv_classes["ClinicalNER"]
977
+ # ner = ClinicalNER(token=HF_TOKEN) # model_id can be passed if needed
978
+ # ner_output = ner(final_ocr_text) or []
979
+ # # Expecting list[str]; be robust:
980
+ # meds = [m.strip() for m in ner_output if isinstance(m, str) and m.strip()]
981
+ # print("[NER] Extracted meds (from ClinicalNER):", meds)
982
+ # else:
983
+ # print("[NER] ClinicalNER not available or no HF token.")
984
+ # except Exception as e:
985
+ # print(f"[NER] Error running ClinicalNER: {e}")
986
+
987
+ # # Fallback: if no meds found (or Chandra-OCR), derive meds from OCR lines
988
+ # if not meds:
989
+ # meds = [line.strip() for line in final_ocr_text.splitlines() if line.strip()]
990
+ # print("[NER] Using line-based meds fallback, count:", len(meds))
991
+
992
+ # print("[DEBUG] meds count:", len(meds))
993
+ # print("[DEBUG] drug_xlsx_path in generate_image:", drug_xlsx_path)
994
+
995
+ # # -------------------------
996
+ # # Build Markdown: OCR text + NER section
997
+ # # -------------------------
998
+ # md = "### Raw OCR Output\n"
999
+ # md += "```\n" + (final_ocr_text or "(empty)") + "\n```\n"
1000
+
1001
+ # md += "\n---\n### Clinical NER (Medications)\n"
1002
+ # if meds:
1003
+ # for m in meds:
1004
+ # md += f"- {m}\n"
1005
+ # else:
1006
+ # md += "- None detected\n"
1007
+
1008
+ # # -------------------------
1009
+ # # 3) Spell-check (med list) with CER
1010
+ # # -------------------------
1011
+ # spell_section = "\n---\n### Spell-check suggestions (" + spell_algo + ")\n"
1012
+ # corr: Dict[str, List] = {}
1013
+
1014
+ # try:
1015
+ # if meds and drug_xlsx_path:
1016
+
1017
+ # try:
1018
+ # df_debug = pd.read_excel(drug_xlsx_path)
1019
+ # print(f"[Private] Excel loaded successfully. Shape: {df_debug.shape}")
1020
+ # except Exception as e:
1021
+ # print(f"[Private] ERROR reading Excel for debug: {e}")
1022
+
1023
+
1024
+ # if (
1025
+ # spell_algo == "TF-IDF + Phonetic"
1026
+ # and "TfidfPhoneticMatcher" in priv_classes
1027
+ # ):
1028
+ # Cls = priv_classes["TfidfPhoneticMatcher"]
1029
+ # checker = Cls(
1030
+ # xlsx_path=drug_xlsx_path,
1031
+ # column="Combined_Drugs",
1032
+ # ngram_size=3,
1033
+ # phonetic_weight=0.4,
1034
+ # )
1035
+ # corr = checker.match_list(meds, top_k=5, tfidf_threshold=0.15)
1036
+
1037
+ # elif spell_algo == "SymSpell" and "SymSpellMatcher" in priv_classes:
1038
+ # Cls = priv_classes["SymSpellMatcher"]
1039
+ # checker = Cls(
1040
+ # xlsx_path=drug_xlsx_path,
1041
+ # column="Combined_Drugs",
1042
+ # max_edit=2,
1043
+ # prefix_len=7,
1044
+ # )
1045
+ # corr = checker.match_list(meds, top_k=5, min_score=0.4)
1046
+
1047
+ # elif spell_algo == "RapidFuzz" and "RapidFuzzMatcher" in priv_classes:
1048
+ # Cls = priv_classes["RapidFuzzMatcher"]
1049
+ # checker = Cls(xlsx_path=drug_xlsx_path, column="Combined_Drugs")
1050
+ # corr = checker.match_list(meds, top_k=5, threshold=70.0)
1051
+ # else:
1052
+ # spell_section += "- Spell-check backend unavailable (no matcher class).\n"
1053
+ # else:
1054
+ # if not meds:
1055
+ # spell_section += "- No medications extracted (empty med list).\n"
1056
+ # if not drug_xlsx_path:
1057
+ # spell_section += "- Drug Excel dictionary path missing (drug_xlsx_path is None).\n"
1058
+ # except Exception as e:
1059
+ # spell_section += f"- Spell-check error: {e}\n"
1060
+
1061
+ # # Format suggestions (top-5 per med, with scores + CER)
1062
+ # if corr:
1063
+ # for raw in meds:
1064
+ # suggestions = corr.get(raw, [])
1065
+ # if suggestions:
1066
+ # spell_section += f"- **{raw}**\n"
1067
+ # for cand, score in suggestions:
1068
+ # cer = character_error_rate(cand, raw)
1069
+ # spell_section += (
1070
+ # f" - {cand} (score={score:.3f}, CER={cer:.3f}%)\n"
1071
+ # )
1072
+ # else:
1073
+ # spell_section += f"- **{raw}**\n - (no suggestions)\n"
1074
+
1075
+ # final_md = md + spell_section
1076
+
1077
+ # # 4) Final yield: raw unchanged; Markdown with NER + spell-check + CER
1078
+ # yield final_ocr_text, final_md
1079
+
1080
+
1081
+ # # ----------------------------
1082
+ # # UI
1083
+ # # ----------------------------
1084
+ # # IMPORTANT: examples must match the number of inputs (here: only image)
1085
+ # image_examples = [
1086
+ # ["examples/3.jpg"],
1087
+ # ["examples/1.jpg"],
1088
+ # ["examples/2.jpg"],
1089
+ # ]
1090
+
1091
+ # with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
1092
+ # gr.Markdown(
1093
+ # "# **Handwritten Doctor's Prescription Reading**", elem_id="main-title"
1094
+ # )
1095
+ # with gr.Row():
1096
+ # with gr.Column(scale=2):
1097
+ # image_upload = gr.Image(
1098
+ # type="pil", label="Upload Image", height=290
1099
+ # )
1100
+ # image_submit = gr.Button("Submit", variant="primary")
1101
+ # gr.Examples(
1102
+ # examples=image_examples,
1103
+ # inputs=[image_upload],
1104
+ # label="Example Images",
1105
+ # )
1106
+
1107
+ # # Spell-check selection
1108
+ # spell_choice = gr.Radio(
1109
+ # choices=["TF-IDF + Phonetic", "SymSpell", "RapidFuzz"],
1110
+ # label="Select Spell-check Approach",
1111
+ # value="TF-IDF + Phonetic",
1112
+ # )
1113
+
1114
+ # with gr.Accordion("Advanced options", open=False):
1115
+ # max_new_tokens = gr.Slider(
1116
+ # label="Max new tokens",
1117
+ # minimum=1,
1118
+ # maximum=MAX_MAX_NEW_TOKENS,
1119
+ # step=1,
1120
+ # value=DEFAULT_MAX_NEW_TOKENS,
1121
+ # )
1122
+ # temperature = gr.Slider(
1123
+ # label="Temperature",
1124
+ # minimum=0.1,
1125
+ # maximum=4.0,
1126
+ # step=0.1,
1127
+ # value=0.7,
1128
+ # )
1129
+ # top_p = gr.Slider(
1130
+ # label="Top-p (nucleus sampling)",
1131
+ # minimum=0.05,
1132
+ # maximum=1.0,
1133
+ # step=0.05,
1134
+ # value=0.9,
1135
+ # )
1136
+ # top_k = gr.Slider(
1137
+ # label="Top-k",
1138
+ # minimum=1,
1139
+ # maximum=1000,
1140
+ # step=1,
1141
+ # value=50,
1142
+ # )
1143
+ # repetition_penalty = gr.Slider(
1144
+ # label="Repetition penalty",
1145
+ # minimum=1.0,
1146
+ # maximum=2.0,
1147
+ # step=0.05,
1148
+ # value=1.1,
1149
+ # )
1150
+
1151
+ # with gr.Column(scale=3):
1152
+ # gr.Markdown("## Output", elem_id="output-title")
1153
+ # output = gr.Textbox(
1154
+ # label="Raw Output Stream",
1155
+ # interactive=False,
1156
+ # lines=11,
1157
+ # show_copy_button=True,
1158
+ # )
1159
+ # with gr.Accordion("(Result.md)", open=False):
1160
+ # markdown_output = gr.Markdown(label="(Result.Md)")
1161
+
1162
+ # model_choice = gr.Radio(
1163
+ # choices=["Chandra-OCR", "Dots.OCR"],
1164
+ # label="Select OCR Model",
1165
+ # value="Chandra-OCR",
1166
+ # )
1167
+
1168
+ # # Hard-coded instruction text, passed as gr.State to match the 'text' parameter
1169
+ # query_state = gr.State(
1170
+ # "Extract medicine or drugs names along with dosage amount or quantity"
1171
+ # )
1172
+
1173
+ # image_submit.click(
1174
+ # fn=generate_image,
1175
+ # inputs=[
1176
+ # model_choice,
1177
+ # query_state,
1178
+ # image_upload,
1179
+ # max_new_tokens,
1180
+ # temperature,
1181
+ # top_p,
1182
+ # top_k,
1183
+ # repetition_penalty,
1184
+ # spell_choice,
1185
+ # ],
1186
+ # outputs=[output, markdown_output],
1187
+ # )
1188
+
1189
+ # if __name__ == "__main__":
1190
+ # demo.queue(max_size=50).launch(
1191
+ # mcp_server=True, ssr_mode=False, show_error=True
1192
+ # )
1193
+
1194
+
1195
+
1196
+ ######################################### version 2 #########################################################################
1197
 
1198
  # import os
1199