Upload PII detection model OpenMed-PII-BiomedBERT-Base-110M-v1

Browse files

Files changed (8) hide show

README.md +21 -21
all_results.json +21 -21
eval_results.json +8 -8
model.safetensors +1 -1
test_results.json +8 -8
tokenizer.json +0 -0
train_results.json +5 -5
vocab.txt +0 -0

README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 language:
   - en
 license: apache-2.0
-base_model: microsoft/BiomedNLP-BiomedBERT-base-uncased-abstract
 tags:
   - token-classification
   - ner
@@ -38,13 +38,13 @@ model-index:
           split: test
         metrics:
           - type: f1
-            value: 0.9520
             name: F1 (micro)
           - type: precision
-            value: 0.9563
             name: Precision
           - type: recall
-            value: 0.9477
             name: Recall
 widget:
   - text: "Dr. Sarah Johnson (SSN: 123-45-6789) can be reached at sarah.johnson@hospital.org or 555-123-4567. She lives at 123 Oak Street, Boston, MA 02108."
@@ -55,7 +55,7 @@ widget:
 **PII Detection Model** | 110M Parameters | Open Source
-[![F1 Score](https://img.shields.io/badge/F1-95.20%25-brightgreen)]() [![Precision](https://img.shields.io/badge/Precision-95.63%25-blue)]() [![Recall](https://img.shields.io/badge/Recall-94.77%25-orange)]()
 ## Model Description
@@ -74,12 +74,12 @@ Evaluated on a stratified 2,000-sample test set from NVIDIA Nemotron-PII:
 | Metric | Score |
 |:---|:---:|
-| **Micro F1** | **0.9520** |
-| Precision | 0.9563 |
-| Recall | 0.9477 |
-| Macro F1 | 0.9557 |
-| Weighted F1 | 0.9510 |
-| Accuracy | 0.9936 |
 ### Top 10 PII Models
@@ -100,11 +100,11 @@ Evaluated on a stratified 2,000-sample test set from NVIDIA Nemotron-PII:
 | Entity | F1 | Precision | Recall | Support |
 |:---|:---:|:---:|:---:|:---:|
-| `blood_type` | 1.000 | 1.000 | 1.000 | 136 |
-| `ssn` | 1.000 | 1.000 | 1.000 | 141 |
-| `tax_id` | 1.000 | 1.000 | 1.000 | 43 |
-| `biometric_identifier` | 0.998 | 0.996 | 1.000 | 233 |
-| `credit_debit_card` | 0.998 | 0.995 | 1.000 | 214 |
 ### Challenging Entities
@@ -112,11 +112,11 @@ These entity types have lower performance and may benefit from additional post-p
 | Entity | F1 | Precision | Recall | Support |
 |:---|:---:|:---:|:---:|:---:|
-| `education_level` | 0.896 | 0.930 | 0.865 | 200 |
-| `time` | 0.866 | 0.878 | 0.855 | 470 |
-| `pin` | 0.861 | 0.855 | 0.868 | 136 |
-| `sexuality` | 0.849 | 0.800 | 0.905 | 84 |
-| `occupation` | 0.673 | 0.733 | 0.623 | 718 |
 ## Supported Entity Types

 language:
   - en
 license: apache-2.0
+base_model: microsoft/BiomedNLP-BiomedBERT-base-uncased-abstract-fulltext
 tags:
   - token-classification
   - ner
           split: test
         metrics:
           - type: f1
+            value: 0.9497
             name: F1 (micro)
           - type: precision
+            value: 0.9537
             name: Precision
           - type: recall
+            value: 0.9457
             name: Recall
 widget:
   - text: "Dr. Sarah Johnson (SSN: 123-45-6789) can be reached at sarah.johnson@hospital.org or 555-123-4567. She lives at 123 Oak Street, Boston, MA 02108."
 **PII Detection Model** | 110M Parameters | Open Source
+[![F1 Score](https://img.shields.io/badge/F1-94.97%25-brightgreen)]() [![Precision](https://img.shields.io/badge/Precision-95.37%25-blue)]() [![Recall](https://img.shields.io/badge/Recall-94.57%25-orange)]()
 ## Model Description
 | Metric | Score |
 |:---|:---:|
+| **Micro F1** | **0.9497** |
+| Precision | 0.9537 |
+| Recall | 0.9457 |
+| Macro F1 | 0.9489 |
+| Weighted F1 | 0.9488 |
+| Accuracy | 0.9932 |
 ### Top 10 PII Models
 | Entity | F1 | Precision | Recall | Support |
 |:---|:---:|:---:|:---:|:---:|
+| `credit_debit_card` | 1.000 | 1.000 | 1.000 | 214 |
+| `biometric_identifier` | 0.998 | 0.996 | 1.000 | 234 |
+| `health_plan_beneficiary_number` | 0.998 | 0.995 | 1.000 | 216 |
+| `email` | 0.995 | 0.997 | 0.992 | 761 |
+| `ssn` | 0.993 | 1.000 | 0.986 | 141 |
 ### Challenging Entities
 | Entity | F1 | Precision | Recall | Support |
 |:---|:---:|:---:|:---:|:---:|
+| `time` | 0.868 | 0.928 | 0.815 | 471 |
+| `pin` | 0.851 | 0.842 | 0.860 | 136 |
+| `sexuality` | 0.809 | 0.731 | 0.905 | 84 |
+| `gender` | 0.776 | 0.712 | 0.852 | 189 |
+| `occupation` | 0.676 | 0.741 | 0.622 | 719 |
 ## Supported Entity Types

all_results.json CHANGED Viewed

@@ -1,24 +1,24 @@
 {
     "epoch": 3.0,
-    "eval_accuracy": 0.9946379251588227,
-    "eval_f1": 0.9578908824606371,
-    "eval_loss": 0.021427959203720093,
-    "eval_precision": 0.9607286259915777,
-    "eval_recall": 0.9550698534780704,
-    "eval_runtime": 14.9793,
-    "eval_samples_per_second": 333.793,
-    "eval_steps_per_second": 5.274,
-    "test_accuracy": 0.994592134929861,
-    "test_f1": 0.9575683536318046,
-    "test_loss": 0.020539097487926483,
-    "test_precision": 0.960039772620678,
-    "test_recall": 0.9551096262601754,
-    "test_runtime": 187.2157,
-    "test_samples_per_second": 240.364,
-    "test_steps_per_second": 3.76,
-    "total_flos": 1.923768900984269e+16,
-    "train_loss": 0.07172592910629114,
-    "train_runtime": 850.4697,
-    "train_samples_per_second": 176.373,
-    "train_steps_per_second": 5.513
 }

 {
     "epoch": 3.0,
+    "eval_accuracy": 0.994305698056874,
+    "eval_f1": 0.9567553048616707,
+    "eval_loss": 0.023389853537082672,
+    "eval_precision": 0.961497877352703,
+    "eval_recall": 0.9520592880573442,
+    "eval_runtime": 14.9319,
+    "eval_samples_per_second": 334.853,
+    "eval_steps_per_second": 5.291,
+    "test_accuracy": 0.9944906320722552,
+    "test_f1": 0.9580974134172793,
+    "test_loss": 0.02194284088909626,
+    "test_precision": 0.9625433948474329,
+    "test_recall": 0.9536923150647733,
+    "test_runtime": 189.6478,
+    "test_samples_per_second": 237.282,
+    "test_steps_per_second": 3.712,
+    "total_flos": 1.8922726522850304e+16,
+    "train_loss": 0.1081566352736444,
+    "train_runtime": 846.98,
+    "train_samples_per_second": 177.1,
+    "train_steps_per_second": 5.536
 }

eval_results.json CHANGED Viewed

@@ -1,11 +1,11 @@
 {
     "epoch": 3.0,
-    "eval_accuracy": 0.9946379251588227,
-    "eval_f1": 0.9578908824606371,
-    "eval_loss": 0.021427959203720093,
-    "eval_precision": 0.9607286259915777,
-    "eval_recall": 0.9550698534780704,
-    "eval_runtime": 14.9793,
-    "eval_samples_per_second": 333.793,
-    "eval_steps_per_second": 5.274
 }

 {
     "epoch": 3.0,
+    "eval_accuracy": 0.994305698056874,
+    "eval_f1": 0.9567553048616707,
+    "eval_loss": 0.023389853537082672,
+    "eval_precision": 0.961497877352703,
+    "eval_recall": 0.9520592880573442,
+    "eval_runtime": 14.9319,
+    "eval_samples_per_second": 334.853,
+    "eval_steps_per_second": 5.291
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a41b8e69ec9ddb7b922ea4ee7626a3310c8dbaf87a117593689d4634845da62d
 size 435915992

 version https://git-lfs.github.com/spec/v1
+oid sha256:9c85e6ea27c17123ab4059e9f93861e27b260435705ac6459744b89c59dda74d
 size 435915992

test_results.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
-    "test_accuracy": 0.994592134929861,
-    "test_f1": 0.9575683536318046,
-    "test_loss": 0.020539097487926483,
-    "test_precision": 0.960039772620678,
-    "test_recall": 0.9551096262601754,
-    "test_runtime": 187.2157,
-    "test_samples_per_second": 240.364,
-    "test_steps_per_second": 3.76
 }

 {
+    "test_accuracy": 0.9944906320722552,
+    "test_f1": 0.9580974134172793,
+    "test_loss": 0.02194284088909626,
+    "test_precision": 0.9625433948474329,
+    "test_recall": 0.9536923150647733,
+    "test_runtime": 189.6478,
+    "test_samples_per_second": 237.282,
+    "test_steps_per_second": 3.712
 }

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 3.0,
-    "total_flos": 1.923768900984269e+16,
-    "train_loss": 0.07172592910629114,
-    "train_runtime": 850.4697,
-    "train_samples_per_second": 176.373,
-    "train_steps_per_second": 5.513
 }

 {
     "epoch": 3.0,
+    "total_flos": 1.8922726522850304e+16,
+    "train_loss": 0.1081566352736444,
+    "train_runtime": 846.98,
+    "train_samples_per_second": 177.1,
+    "train_steps_per_second": 5.536
 }

vocab.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff