Upload PII detection model OpenMed-PII-LiteClinical-Small-66M-v1

Browse files

Files changed (10) hide show

README.md +21 -21
all_results.json +21 -21
config.json +1 -2
eval_results.json +8 -8
model.safetensors +2 -2
test_results.json +8 -8
tokenizer.json +0 -0
tokenizer_config.json +1 -1
train_results.json +5 -5
vocab.txt +0 -0

README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 language:
   - en
 license: apache-2.0
-base_model: distilbert/distilbert-base-cased
 tags:
   - token-classification
   - ner
@@ -38,13 +38,13 @@ model-index:
           split: test
         metrics:
           - type: f1
-            value: 0.9483
             name: F1 (micro)
           - type: precision
-            value: 0.9530
             name: Precision
           - type: recall
-            value: 0.9436
             name: Recall
 widget:
   - text: "Dr. Sarah Johnson (SSN: 123-45-6789) can be reached at sarah.johnson@hospital.org or 555-123-4567. She lives at 123 Oak Street, Boston, MA 02108."
@@ -55,7 +55,7 @@ widget:
 **PII Detection Model** | 66M Parameters | Open Source
-[![F1 Score](https://img.shields.io/badge/F1-94.83%25-brightgreen)]() [![Precision](https://img.shields.io/badge/Precision-95.30%25-blue)]() [![Recall](https://img.shields.io/badge/Recall-94.36%25-orange)]()
 ## Model Description
@@ -74,12 +74,12 @@ Evaluated on a stratified 2,000-sample test set from NVIDIA Nemotron-PII:
 | Metric | Score |
 |:---|:---:|
-| **Micro F1** | **0.9483** |
-| Precision | 0.9530 |
-| Recall | 0.9436 |
-| Macro F1 | 0.9503 |
-| Weighted F1 | 0.9473 |
-| Accuracy | 0.9931 |
 ### Top 10 PII Models
@@ -100,11 +100,11 @@ Evaluated on a stratified 2,000-sample test set from NVIDIA Nemotron-PII:
 | Entity | F1 | Precision | Recall | Support |
 |:---|:---:|:---:|:---:|:---:|
-| `employee_id` | 0.997 | 0.994 | 1.000 | 163 |
-| `biometric_identifier` | 0.996 | 0.991 | 1.000 | 231 |
-| `email` | 0.995 | 0.995 | 0.995 | 750 |
-| `date_of_birth` | 0.995 | 0.989 | 1.000 | 273 |
-| `medical_record_number` | 0.994 | 0.989 | 1.000 | 262 |
 ### Challenging Entities
@@ -112,11 +112,11 @@ These entity types have lower performance and may benefit from additional post-p
 | Entity | F1 | Precision | Recall | Support |
 |:---|:---:|:---:|:---:|:---:|
-| `language` | 0.896 | 0.972 | 0.831 | 207 |
-| `pin` | 0.891 | 0.927 | 0.858 | 134 |
-| `time` | 0.852 | 0.859 | 0.844 | 463 |
-| `sexuality` | 0.833 | 0.773 | 0.904 | 83 |
-| `occupation` | 0.644 | 0.697 | 0.599 | 708 |
 ## Supported Entity Types

 language:
   - en
 license: apache-2.0
+base_model: distilbert/distilbert-base-uncased
 tags:
   - token-classification
   - ner
           split: test
         metrics:
           - type: f1
+            value: 0.9485
             name: F1 (micro)
           - type: precision
+            value: 0.9554
             name: Precision
           - type: recall
+            value: 0.9418
             name: Recall
 widget:
   - text: "Dr. Sarah Johnson (SSN: 123-45-6789) can be reached at sarah.johnson@hospital.org or 555-123-4567. She lives at 123 Oak Street, Boston, MA 02108."
 **PII Detection Model** | 66M Parameters | Open Source
+[![F1 Score](https://img.shields.io/badge/F1-94.85%25-brightgreen)]() [![Precision](https://img.shields.io/badge/Precision-95.54%25-blue)]() [![Recall](https://img.shields.io/badge/Recall-94.18%25-orange)]()
 ## Model Description
 | Metric | Score |
 |:---|:---:|
+| **Micro F1** | **0.9485** |
+| Precision | 0.9554 |
+| Recall | 0.9418 |
+| Macro F1 | 0.9484 |
+| Weighted F1 | 0.9468 |
+| Accuracy | 0.9930 |
 ### Top 10 PII Models
 | Entity | F1 | Precision | Recall | Support |
 |:---|:---:|:---:|:---:|:---:|
+| `biometric_identifier` | 1.000 | 1.000 | 1.000 | 234 |
+| `credit_debit_card` | 0.995 | 1.000 | 0.991 | 215 |
+| `email` | 0.994 | 0.995 | 0.993 | 763 |
+| `date_of_birth` | 0.993 | 0.986 | 1.000 | 273 |
+| `health_plan_beneficiary_number` | 0.991 | 0.982 | 1.000 | 216 |
 ### Challenging Entities
 | Entity | F1 | Precision | Recall | Support |
 |:---|:---:|:---:|:---:|:---:|
+| `pin` | 0.862 | 0.872 | 0.853 | 136 |
+| `time` | 0.859 | 0.894 | 0.826 | 472 |
+| `gender` | 0.829 | 0.806 | 0.853 | 190 |
+| `sexuality` | 0.824 | 0.758 | 0.904 | 83 |
+| `occupation` | 0.647 | 0.755 | 0.566 | 724 |
 ## Supported Entity Types

all_results.json CHANGED Viewed

@@ -1,24 +1,24 @@
 {
     "epoch": 3.0,
-    "eval_accuracy": 0.993924587244714,
-    "eval_f1": 0.948089578626854,
-    "eval_loss": 0.02283928357064724,
-    "eval_precision": 0.9507276353697274,
-    "eval_recall": 0.945466121409506,
-    "eval_runtime": 11.3615,
-    "eval_samples_per_second": 440.082,
-    "eval_steps_per_second": 6.953,
-    "test_accuracy": 0.9939600221131221,
-    "test_f1": 0.949191080722605,
-    "test_loss": 0.022294577211141586,
-    "test_precision": 0.9512299574706483,
-    "test_recall": 0.9471609255822436,
-    "test_runtime": 162.5659,
-    "test_samples_per_second": 276.811,
-    "test_steps_per_second": 4.331,
-    "total_flos": 9918920981578752.0,
-    "train_loss": 0.09467728419150338,
-    "train_runtime": 496.6001,
-    "train_samples_per_second": 302.054,
-    "train_steps_per_second": 9.442
 }

 {
     "epoch": 3.0,
+    "eval_accuracy": 0.9943964513221458,
+    "eval_f1": 0.9546176592780861,
+    "eval_loss": 0.023015499114990234,
+    "eval_precision": 0.9615498227648681,
+    "eval_recall": 0.9477847333430388,
+    "eval_runtime": 11.052,
+    "eval_samples_per_second": 452.409,
+    "eval_steps_per_second": 7.148,
+    "test_accuracy": 0.9943050269458373,
+    "test_f1": 0.9548047025061205,
+    "test_loss": 0.02243383601307869,
+    "test_precision": 0.961166670708998,
+    "test_recall": 0.9485264003318991,
+    "test_runtime": 164.7983,
+    "test_samples_per_second": 273.061,
+    "test_steps_per_second": 4.272,
+    "total_flos": 9393035107602432.0,
+    "train_loss": 0.10323514050322825,
+    "train_runtime": 488.206,
+    "train_samples_per_second": 307.247,
+    "train_steps_per_second": 9.605
 }

config.json CHANGED Viewed

@@ -229,12 +229,11 @@
   "model_type": "distilbert",
   "n_heads": 12,
   "n_layers": 6,
-  "output_past": true,
   "pad_token_id": 0,
   "qa_dropout": 0.1,
   "seq_classif_dropout": 0.2,
   "sinusoidal_pos_embds": false,
   "tie_weights_": true,
   "transformers_version": "4.57.1",
-  "vocab_size": 28996
 }

   "model_type": "distilbert",
   "n_heads": 12,
   "n_layers": 6,
   "pad_token_id": 0,
   "qa_dropout": 0.1,
   "seq_classif_dropout": 0.2,
   "sinusoidal_pos_embds": false,
   "tie_weights_": true,
   "transformers_version": "4.57.1",
+  "vocab_size": 30522
 }

eval_results.json CHANGED Viewed

@@ -1,11 +1,11 @@
 {
     "epoch": 3.0,
-    "eval_accuracy": 0.993924587244714,
-    "eval_f1": 0.948089578626854,
-    "eval_loss": 0.02283928357064724,
-    "eval_precision": 0.9507276353697274,
-    "eval_recall": 0.945466121409506,
-    "eval_runtime": 11.3615,
-    "eval_samples_per_second": 440.082,
-    "eval_steps_per_second": 6.953
 }

 {
     "epoch": 3.0,
+    "eval_accuracy": 0.9943964513221458,
+    "eval_f1": 0.9546176592780861,
+    "eval_loss": 0.023015499114990234,
+    "eval_precision": 0.9615498227648681,
+    "eval_recall": 0.9477847333430388,
+    "eval_runtime": 11.052,
+    "eval_samples_per_second": 452.409,
+    "eval_steps_per_second": 7.148
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:377c6337610ab7721d205a502d9aafd0b27fe4ea759a1d4b514be38c45bfe751
-size 261102048

 version https://git-lfs.github.com/spec/v1
+oid sha256:50f1b835752fec6d82a261035ff3a13f4a4aba8033a19904ac3524358bff1fa3
+size 265789928

test_results.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
-    "test_accuracy": 0.9939600221131221,
-    "test_f1": 0.949191080722605,
-    "test_loss": 0.022294577211141586,
-    "test_precision": 0.9512299574706483,
-    "test_recall": 0.9471609255822436,
-    "test_runtime": 162.5659,
-    "test_samples_per_second": 276.811,
-    "test_steps_per_second": 4.331
 }

 {
+    "test_accuracy": 0.9943050269458373,
+    "test_f1": 0.9548047025061205,
+    "test_loss": 0.02243383601307869,
+    "test_precision": 0.961166670708998,
+    "test_recall": 0.9485264003318991,
+    "test_runtime": 164.7983,
+    "test_samples_per_second": 273.061,
+    "test_steps_per_second": 4.272
 }

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -43,7 +43,7 @@
   },
   "clean_up_tokenization_spaces": false,
   "cls_token": "[CLS]",
-  "do_lower_case": false,
   "extra_special_tokens": {},
   "mask_token": "[MASK]",
   "model_max_length": 512,

   },
   "clean_up_tokenization_spaces": false,
   "cls_token": "[CLS]",
+  "do_lower_case": true,
   "extra_special_tokens": {},
   "mask_token": "[MASK]",
   "model_max_length": 512,

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 3.0,
-    "total_flos": 9918920981578752.0,
-    "train_loss": 0.09467728419150338,
-    "train_runtime": 496.6001,
-    "train_samples_per_second": 302.054,
-    "train_steps_per_second": 9.442
 }

 {
     "epoch": 3.0,
+    "total_flos": 9393035107602432.0,
+    "train_loss": 0.10323514050322825,
+    "train_runtime": 488.206,
+    "train_samples_per_second": 307.247,
+    "train_steps_per_second": 9.605
 }

vocab.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff