Upload PII detection model OpenMed-PII-LiteClinical-Small-66M-v1
Browse files- README.md +21 -21
- all_results.json +21 -21
- config.json +1 -2
- eval_results.json +8 -8
- model.safetensors +2 -2
- test_results.json +8 -8
- tokenizer.json +0 -0
- tokenizer_config.json +1 -1
- train_results.json +5 -5
- vocab.txt +0 -0
README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
language:
|
| 3 |
- en
|
| 4 |
license: apache-2.0
|
| 5 |
-
base_model: distilbert/distilbert-base-
|
| 6 |
tags:
|
| 7 |
- token-classification
|
| 8 |
- ner
|
|
@@ -38,13 +38,13 @@ model-index:
|
|
| 38 |
split: test
|
| 39 |
metrics:
|
| 40 |
- type: f1
|
| 41 |
-
value: 0.
|
| 42 |
name: F1 (micro)
|
| 43 |
- type: precision
|
| 44 |
-
value: 0.
|
| 45 |
name: Precision
|
| 46 |
- type: recall
|
| 47 |
-
value: 0.
|
| 48 |
name: Recall
|
| 49 |
widget:
|
| 50 |
- text: "Dr. Sarah Johnson (SSN: 123-45-6789) can be reached at sarah.johnson@hospital.org or 555-123-4567. She lives at 123 Oak Street, Boston, MA 02108."
|
|
@@ -55,7 +55,7 @@ widget:
|
|
| 55 |
|
| 56 |
**PII Detection Model** | 66M Parameters | Open Source
|
| 57 |
|
| 58 |
-
[
|
| 43 |
- type: precision
|
| 44 |
+
value: 0.9554
|
| 45 |
name: Precision
|
| 46 |
- type: recall
|
| 47 |
+
value: 0.9418
|
| 48 |
name: Recall
|
| 49 |
widget:
|
| 50 |
- text: "Dr. Sarah Johnson (SSN: 123-45-6789) can be reached at sarah.johnson@hospital.org or 555-123-4567. She lives at 123 Oak Street, Boston, MA 02108."
|
|
|
|
| 55 |
|
| 56 |
**PII Detection Model** | 66M Parameters | Open Source
|
| 57 |
|
| 58 |
+
[]() []() []()
|
| 59 |
|
| 60 |
## Model Description
|
| 61 |
|
|
|
|
| 74 |
|
| 75 |
| Metric | Score |
|
| 76 |
|:---|:---:|
|
| 77 |
+
| **Micro F1** | **0.9485** |
|
| 78 |
+
| Precision | 0.9554 |
|
| 79 |
+
| Recall | 0.9418 |
|
| 80 |
+
| Macro F1 | 0.9484 |
|
| 81 |
+
| Weighted F1 | 0.9468 |
|
| 82 |
+
| Accuracy | 0.9930 |
|
| 83 |
|
| 84 |
### Top 10 PII Models
|
| 85 |
|
|
|
|
| 100 |
|
| 101 |
| Entity | F1 | Precision | Recall | Support |
|
| 102 |
|:---|:---:|:---:|:---:|:---:|
|
| 103 |
+
| `biometric_identifier` | 1.000 | 1.000 | 1.000 | 234 |
|
| 104 |
+
| `credit_debit_card` | 0.995 | 1.000 | 0.991 | 215 |
|
| 105 |
+
| `email` | 0.994 | 0.995 | 0.993 | 763 |
|
| 106 |
+
| `date_of_birth` | 0.993 | 0.986 | 1.000 | 273 |
|
| 107 |
+
| `health_plan_beneficiary_number` | 0.991 | 0.982 | 1.000 | 216 |
|
| 108 |
|
| 109 |
### Challenging Entities
|
| 110 |
|
|
|
|
| 112 |
|
| 113 |
| Entity | F1 | Precision | Recall | Support |
|
| 114 |
|:---|:---:|:---:|:---:|:---:|
|
| 115 |
+
| `pin` | 0.862 | 0.872 | 0.853 | 136 |
|
| 116 |
+
| `time` | 0.859 | 0.894 | 0.826 | 472 |
|
| 117 |
+
| `gender` | 0.829 | 0.806 | 0.853 | 190 |
|
| 118 |
+
| `sexuality` | 0.824 | 0.758 | 0.904 | 83 |
|
| 119 |
+
| `occupation` | 0.647 | 0.755 | 0.566 | 724 |
|
| 120 |
|
| 121 |
## Supported Entity Types
|
| 122 |
|
all_results.json
CHANGED
|
@@ -1,24 +1,24 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 3.0,
|
| 3 |
-
"eval_accuracy": 0.
|
| 4 |
-
"eval_f1": 0.
|
| 5 |
-
"eval_loss": 0.
|
| 6 |
-
"eval_precision": 0.
|
| 7 |
-
"eval_recall": 0.
|
| 8 |
-
"eval_runtime": 11.
|
| 9 |
-
"eval_samples_per_second":
|
| 10 |
-
"eval_steps_per_second":
|
| 11 |
-
"test_accuracy": 0.
|
| 12 |
-
"test_f1": 0.
|
| 13 |
-
"test_loss": 0.
|
| 14 |
-
"test_precision": 0.
|
| 15 |
-
"test_recall": 0.
|
| 16 |
-
"test_runtime":
|
| 17 |
-
"test_samples_per_second":
|
| 18 |
-
"test_steps_per_second": 4.
|
| 19 |
-
"total_flos":
|
| 20 |
-
"train_loss": 0.
|
| 21 |
-
"train_runtime":
|
| 22 |
-
"train_samples_per_second":
|
| 23 |
-
"train_steps_per_second": 9.
|
| 24 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 3.0,
|
| 3 |
+
"eval_accuracy": 0.9943964513221458,
|
| 4 |
+
"eval_f1": 0.9546176592780861,
|
| 5 |
+
"eval_loss": 0.023015499114990234,
|
| 6 |
+
"eval_precision": 0.9615498227648681,
|
| 7 |
+
"eval_recall": 0.9477847333430388,
|
| 8 |
+
"eval_runtime": 11.052,
|
| 9 |
+
"eval_samples_per_second": 452.409,
|
| 10 |
+
"eval_steps_per_second": 7.148,
|
| 11 |
+
"test_accuracy": 0.9943050269458373,
|
| 12 |
+
"test_f1": 0.9548047025061205,
|
| 13 |
+
"test_loss": 0.02243383601307869,
|
| 14 |
+
"test_precision": 0.961166670708998,
|
| 15 |
+
"test_recall": 0.9485264003318991,
|
| 16 |
+
"test_runtime": 164.7983,
|
| 17 |
+
"test_samples_per_second": 273.061,
|
| 18 |
+
"test_steps_per_second": 4.272,
|
| 19 |
+
"total_flos": 9393035107602432.0,
|
| 20 |
+
"train_loss": 0.10323514050322825,
|
| 21 |
+
"train_runtime": 488.206,
|
| 22 |
+
"train_samples_per_second": 307.247,
|
| 23 |
+
"train_steps_per_second": 9.605
|
| 24 |
}
|
config.json
CHANGED
|
@@ -229,12 +229,11 @@
|
|
| 229 |
"model_type": "distilbert",
|
| 230 |
"n_heads": 12,
|
| 231 |
"n_layers": 6,
|
| 232 |
-
"output_past": true,
|
| 233 |
"pad_token_id": 0,
|
| 234 |
"qa_dropout": 0.1,
|
| 235 |
"seq_classif_dropout": 0.2,
|
| 236 |
"sinusoidal_pos_embds": false,
|
| 237 |
"tie_weights_": true,
|
| 238 |
"transformers_version": "4.57.1",
|
| 239 |
-
"vocab_size":
|
| 240 |
}
|
|
|
|
| 229 |
"model_type": "distilbert",
|
| 230 |
"n_heads": 12,
|
| 231 |
"n_layers": 6,
|
|
|
|
| 232 |
"pad_token_id": 0,
|
| 233 |
"qa_dropout": 0.1,
|
| 234 |
"seq_classif_dropout": 0.2,
|
| 235 |
"sinusoidal_pos_embds": false,
|
| 236 |
"tie_weights_": true,
|
| 237 |
"transformers_version": "4.57.1",
|
| 238 |
+
"vocab_size": 30522
|
| 239 |
}
|
eval_results.json
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 3.0,
|
| 3 |
-
"eval_accuracy": 0.
|
| 4 |
-
"eval_f1": 0.
|
| 5 |
-
"eval_loss": 0.
|
| 6 |
-
"eval_precision": 0.
|
| 7 |
-
"eval_recall": 0.
|
| 8 |
-
"eval_runtime": 11.
|
| 9 |
-
"eval_samples_per_second":
|
| 10 |
-
"eval_steps_per_second":
|
| 11 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 3.0,
|
| 3 |
+
"eval_accuracy": 0.9943964513221458,
|
| 4 |
+
"eval_f1": 0.9546176592780861,
|
| 5 |
+
"eval_loss": 0.023015499114990234,
|
| 6 |
+
"eval_precision": 0.9615498227648681,
|
| 7 |
+
"eval_recall": 0.9477847333430388,
|
| 8 |
+
"eval_runtime": 11.052,
|
| 9 |
+
"eval_samples_per_second": 452.409,
|
| 10 |
+
"eval_steps_per_second": 7.148
|
| 11 |
}
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50f1b835752fec6d82a261035ff3a13f4a4aba8033a19904ac3524358bff1fa3
|
| 3 |
+
size 265789928
|
test_results.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"test_accuracy": 0.
|
| 3 |
-
"test_f1": 0.
|
| 4 |
-
"test_loss": 0.
|
| 5 |
-
"test_precision": 0.
|
| 6 |
-
"test_recall": 0.
|
| 7 |
-
"test_runtime":
|
| 8 |
-
"test_samples_per_second":
|
| 9 |
-
"test_steps_per_second": 4.
|
| 10 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"test_accuracy": 0.9943050269458373,
|
| 3 |
+
"test_f1": 0.9548047025061205,
|
| 4 |
+
"test_loss": 0.02243383601307869,
|
| 5 |
+
"test_precision": 0.961166670708998,
|
| 6 |
+
"test_recall": 0.9485264003318991,
|
| 7 |
+
"test_runtime": 164.7983,
|
| 8 |
+
"test_samples_per_second": 273.061,
|
| 9 |
+
"test_steps_per_second": 4.272
|
| 10 |
}
|
tokenizer.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
CHANGED
|
@@ -43,7 +43,7 @@
|
|
| 43 |
},
|
| 44 |
"clean_up_tokenization_spaces": false,
|
| 45 |
"cls_token": "[CLS]",
|
| 46 |
-
"do_lower_case":
|
| 47 |
"extra_special_tokens": {},
|
| 48 |
"mask_token": "[MASK]",
|
| 49 |
"model_max_length": 512,
|
|
|
|
| 43 |
},
|
| 44 |
"clean_up_tokenization_spaces": false,
|
| 45 |
"cls_token": "[CLS]",
|
| 46 |
+
"do_lower_case": true,
|
| 47 |
"extra_special_tokens": {},
|
| 48 |
"mask_token": "[MASK]",
|
| 49 |
"model_max_length": 512,
|
train_results.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 3.0,
|
| 3 |
-
"total_flos":
|
| 4 |
-
"train_loss": 0.
|
| 5 |
-
"train_runtime":
|
| 6 |
-
"train_samples_per_second":
|
| 7 |
-
"train_steps_per_second": 9.
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 3.0,
|
| 3 |
+
"total_flos": 9393035107602432.0,
|
| 4 |
+
"train_loss": 0.10323514050322825,
|
| 5 |
+
"train_runtime": 488.206,
|
| 6 |
+
"train_samples_per_second": 307.247,
|
| 7 |
+
"train_steps_per_second": 9.605
|
| 8 |
}
|
vocab.txt
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|