Spaces:

santanche
/

clinical-ner

Sleeping

App Files Files Community

santanche commited on Oct 13, 2025

Commit

6f1db1f

1 Parent(s): 4aa068b

feat (start): initial setup

Browse files

Files changed (6) hide show

.gitignore +2 -0
Dockerfile +13 -0
app/clinical_ner.py +106 -0
app/server_clinical_ner.py +114 -0
app/test_clinical_ner.py +20 -0
requirements.txt +13 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ __pycache__
2	+ .venv

Dockerfile ADDED Viewed

	@@ -0,0 +1,13 @@

+FROM python:3.9
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user ./app /app
+CMD ["uvicorn", "server_clinical_ner:app", "--host", "0.0.0.0", "--port", "7860"]

app/clinical_ner.py ADDED Viewed

	@@ -0,0 +1,106 @@

+from transformers import pipeline
+class ClinicalNER:
+    """
+    A class for Named Entity Recognition using bert-base-uncased_clinical-ner model.
+    """
+    def __init__(self):
+        """
+        Initialize the NER pipeline with bert-base-uncased_clinical-ner model.
+        Note: Using aggregation_strategy="simple" to merge subword tokens.
+        """
+        self.ner_pipeline = pipeline(
+            "ner",
+            model="samrawal/bert-base-uncased_clinical-ner",
+            aggregation_strategy="simple"
+        )
+    def _merge_subwords(self, entities):
+        """
+        Merge subword tokens (those starting with ##) into complete words.
+        Args:
+            entities (list): List of entity dictionaries from the pipeline
+        Returns:
+            list: Merged entities with complete words
+        """
+        if not entities:
+            return []
+        merged = []
+        i = 0
+        while i < len(entities):
+            current = entities[i].copy()
+            word = current['word']
+            end = current['end']
+            # Look ahead for subword tokens (starting with ##)
+            j = i + 1
+            while j < len(entities):
+                next_entity = entities[j]
+                # Check if it's a subword of the same entity type
+                if (next_entity['word'].startswith('##') and
+                    next_entity['entity_group'] == current['entity_group']):
+                    # Remove ## prefix and append
+                    word += next_entity['word'][2:]
+                    end = next_entity['end']
+                    j += 1
+                else:
+                    break
+            # Update the merged entity
+            current['word'] = word
+            current['end'] = end
+            merged.append(current)
+            # Skip the merged tokens
+            i = j
+        return merged
+    def basic_ner(self, text):
+        """
+        Performs NER on the input text and returns annotations with merged subwords.
+        Args:
+            text (str): Input text to analyze
+        Returns:
+            list: List of dictionaries containing entity annotations
+                  Each dict has: entity_group, score, word, start, end
+        """
+        entities = self.ner_pipeline(text)
+        return self._merge_subwords(entities)
+    def prolog_ner(self, text):
+        """
+        Performs NER and returns results as Prolog facts compatible with Tau Prolog.
+        Subword tokens are automatically merged.
+        Args:
+            text (str): Input text to analyze
+        Returns:
+            str: Prolog facts as a string, one per line
+        """
+        entities = self.ner_pipeline(text)
+        merged_entities = self._merge_subwords(entities)
+        prolog_facts = []
+        for i, entity in enumerate(merged_entities):
+            # Escape single quotes in words for Prolog
+            word = entity['word'].replace("'", "\\'")
+            # Format: entity(Id, Type, Word, Start, End, Score)
+            fact = (
+                f"entity({i}, '{entity['entity_group']}', "
+                f"'{word}', {entity['start']}, "
+                f"{entity['end']}, {entity['score']:.4f})."
+            )
+            prolog_facts.append(fact)
+        return "\n".join(prolog_facts)

app/server_clinical_ner.py ADDED Viewed

	@@ -0,0 +1,114 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from clinical_ner import ClinicalNER
+import uvicorn
+# Initialize FastAPI app
+app = FastAPI(
+    title="Clinical NER API",
+    description="Named Entity Recognition API using Bio_ClinicalBERT",
+    version="1.0.0"
+)
+# Initialize the NER model (singleton pattern)
+ner_model = None
+@app.on_event("startup")
+async def startup_event():
+    """Load the NER model on startup"""
+    global ner_model
+    ner_model = ClinicalNER()
+    print("NER model loaded successfully!")
+# Request model
+class TextRequest(BaseModel):
+    text: str
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "text": "Patient presents with hypertension and diabetes. Prescribed metformin 500mg."
+            }
+        }
+# Response models
+class Entity(BaseModel):
+    entity_group: str
+    score: float
+    word: str
+    start: int
+    end: int
+class BasicNERResponse(BaseModel):
+    entities: list[Entity]
+    count: int
+class PrologNERResponse(BaseModel):
+    prolog_facts: str
+    count: int
+@app.get("/")
+async def root():
+    """Root endpoint with API information"""
+    return {
+        "message": "Clinical NER API",
+        "endpoints": {
+            "/ner/basic": "POST - Get basic NER annotations",
+            "/ner/prolog": "POST - Get Prolog facts",
+            "/docs": "GET - Interactive API documentation"
+        }
+    }
+@app.post("/ner/basic", response_model=BasicNERResponse)
+async def ner_basic(request: TextRequest):
+    """
+    Perform basic NER on the input text.
+    Returns a list of detected entities with their types, positions, and confidence scores.
+    """
+    try:
+        if not request.text.strip():
+            raise HTTPException(status_code=400, detail="Text cannot be empty")
+        entities = ner_model.basic_ner(request.text)
+        return {
+            "entities": entities,
+            "count": len(entities)
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error processing text: {str(e)}")
+@app.post("/ner/prolog", response_model=PrologNERResponse)
+async def ner_prolog(request: TextRequest):
+    """
+    Perform NER and return results as Prolog facts.
+    Returns Prolog facts in the format: entity(Id, Type, Word, Start, End, Score).
+    """
+    try:
+        if not request.text.strip():
+            raise HTTPException(status_code=400, detail="Text cannot be empty")
+        prolog_facts = ner_model.prolog_ner(request.text)
+        # Count the number of facts (lines)
+        count = len(prolog_facts.split('\n')) if prolog_facts else 0
+        return {
+            "prolog_facts": prolog_facts,
+            "count": count
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error processing text: {str(e)}")
+@app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    return {
+        "status": "healthy",
+        "model_loaded": ner_model is not None
+    }
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)

app/test_clinical_ner.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from clinical_ner import ClinicalNER
+# Initialize the NER system
+ner = ClinicalNER()
+# Sample clinical text
+text = "Patient presents with hypertension and diabetes. Prescribed metformin 500mg."
+# Get basic NER annotations
+print("Basic NER:")
+results = ner.basic_ner(text)
+for entity in results:
+    print(f"  {entity['word']} -> {entity['entity_group']} (score: {entity['score']:.4f})")
+print("\n" + "="*60 + "\n")
+# Get Prolog facts
+print("Prolog NER:")
+prolog_output = ner.prolog_ner(text)
+print(prolog_output)

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+# FastAPI and server
+fastapi==0.109.0
+uvicorn[standard]==0.27.0
+pydantic==2.5.3
+# Transformers and ML
+transformers==4.36.2
+torch==2.1.2
+tokenizers==0.15.0
+# Additional dependencies
+numpy==1.26.3
+huggingface-hub==0.20.2