Upload PII detection model OpenMed-PII-BiomedBERT-Base-110M-v1
Browse files- README.md +21 -21
- all_results.json +21 -21
- eval_results.json +8 -8
- model.safetensors +1 -1
- test_results.json +8 -8
- tokenizer.json +0 -0
- train_results.json +5 -5
- vocab.txt +0 -0
README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
language:
|
| 3 |
- en
|
| 4 |
license: apache-2.0
|
| 5 |
-
base_model: microsoft/BiomedNLP-BiomedBERT-base-uncased-abstract
|
| 6 |
tags:
|
| 7 |
- token-classification
|
| 8 |
- ner
|
|
@@ -38,13 +38,13 @@ model-index:
|
|
| 38 |
split: test
|
| 39 |
metrics:
|
| 40 |
- type: f1
|
| 41 |
-
value: 0.
|
| 42 |
name: F1 (micro)
|
| 43 |
- type: precision
|
| 44 |
-
value: 0.
|
| 45 |
name: Precision
|
| 46 |
- type: recall
|
| 47 |
-
value: 0.
|
| 48 |
name: Recall
|
| 49 |
widget:
|
| 50 |
- text: "Dr. Sarah Johnson (SSN: 123-45-6789) can be reached at sarah.johnson@hospital.org or 555-123-4567. She lives at 123 Oak Street, Boston, MA 02108."
|
|
@@ -55,7 +55,7 @@ widget:
|
|
| 55 |
|
| 56 |
**PII Detection Model** | 110M Parameters | Open Source
|
| 57 |
|
| 58 |
-
[
|
| 43 |
- type: precision
|
| 44 |
+
value: 0.9537
|
| 45 |
name: Precision
|
| 46 |
- type: recall
|
| 47 |
+
value: 0.9457
|
| 48 |
name: Recall
|
| 49 |
widget:
|
| 50 |
- text: "Dr. Sarah Johnson (SSN: 123-45-6789) can be reached at sarah.johnson@hospital.org or 555-123-4567. She lives at 123 Oak Street, Boston, MA 02108."
|
|
|
|
| 55 |
|
| 56 |
**PII Detection Model** | 110M Parameters | Open Source
|
| 57 |
|
| 58 |
+
[]() []() []()
|
| 59 |
|
| 60 |
## Model Description
|
| 61 |
|
|
|
|
| 74 |
|
| 75 |
| Metric | Score |
|
| 76 |
|:---|:---:|
|
| 77 |
+
| **Micro F1** | **0.9497** |
|
| 78 |
+
| Precision | 0.9537 |
|
| 79 |
+
| Recall | 0.9457 |
|
| 80 |
+
| Macro F1 | 0.9489 |
|
| 81 |
+
| Weighted F1 | 0.9488 |
|
| 82 |
+
| Accuracy | 0.9932 |
|
| 83 |
|
| 84 |
### Top 10 PII Models
|
| 85 |
|
|
|
|
| 100 |
|
| 101 |
| Entity | F1 | Precision | Recall | Support |
|
| 102 |
|:---|:---:|:---:|:---:|:---:|
|
| 103 |
+
| `credit_debit_card` | 1.000 | 1.000 | 1.000 | 214 |
|
| 104 |
+
| `biometric_identifier` | 0.998 | 0.996 | 1.000 | 234 |
|
| 105 |
+
| `health_plan_beneficiary_number` | 0.998 | 0.995 | 1.000 | 216 |
|
| 106 |
+
| `email` | 0.995 | 0.997 | 0.992 | 761 |
|
| 107 |
+
| `ssn` | 0.993 | 1.000 | 0.986 | 141 |
|
| 108 |
|
| 109 |
### Challenging Entities
|
| 110 |
|
|
|
|
| 112 |
|
| 113 |
| Entity | F1 | Precision | Recall | Support |
|
| 114 |
|:---|:---:|:---:|:---:|:---:|
|
| 115 |
+
| `time` | 0.868 | 0.928 | 0.815 | 471 |
|
| 116 |
+
| `pin` | 0.851 | 0.842 | 0.860 | 136 |
|
| 117 |
+
| `sexuality` | 0.809 | 0.731 | 0.905 | 84 |
|
| 118 |
+
| `gender` | 0.776 | 0.712 | 0.852 | 189 |
|
| 119 |
+
| `occupation` | 0.676 | 0.741 | 0.622 | 719 |
|
| 120 |
|
| 121 |
## Supported Entity Types
|
| 122 |
|
all_results.json
CHANGED
|
@@ -1,24 +1,24 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 3.0,
|
| 3 |
-
"eval_accuracy": 0.
|
| 4 |
-
"eval_f1": 0.
|
| 5 |
-
"eval_loss": 0.
|
| 6 |
-
"eval_precision": 0.
|
| 7 |
-
"eval_recall": 0.
|
| 8 |
-
"eval_runtime": 14.
|
| 9 |
-
"eval_samples_per_second":
|
| 10 |
-
"eval_steps_per_second": 5.
|
| 11 |
-
"test_accuracy": 0.
|
| 12 |
-
"test_f1": 0.
|
| 13 |
-
"test_loss": 0.
|
| 14 |
-
"test_precision": 0.
|
| 15 |
-
"test_recall": 0.
|
| 16 |
-
"test_runtime":
|
| 17 |
-
"test_samples_per_second":
|
| 18 |
-
"test_steps_per_second": 3.
|
| 19 |
-
"total_flos": 1.
|
| 20 |
-
"train_loss": 0.
|
| 21 |
-
"train_runtime":
|
| 22 |
-
"train_samples_per_second":
|
| 23 |
-
"train_steps_per_second": 5.
|
| 24 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 3.0,
|
| 3 |
+
"eval_accuracy": 0.994305698056874,
|
| 4 |
+
"eval_f1": 0.9567553048616707,
|
| 5 |
+
"eval_loss": 0.023389853537082672,
|
| 6 |
+
"eval_precision": 0.961497877352703,
|
| 7 |
+
"eval_recall": 0.9520592880573442,
|
| 8 |
+
"eval_runtime": 14.9319,
|
| 9 |
+
"eval_samples_per_second": 334.853,
|
| 10 |
+
"eval_steps_per_second": 5.291,
|
| 11 |
+
"test_accuracy": 0.9944906320722552,
|
| 12 |
+
"test_f1": 0.9580974134172793,
|
| 13 |
+
"test_loss": 0.02194284088909626,
|
| 14 |
+
"test_precision": 0.9625433948474329,
|
| 15 |
+
"test_recall": 0.9536923150647733,
|
| 16 |
+
"test_runtime": 189.6478,
|
| 17 |
+
"test_samples_per_second": 237.282,
|
| 18 |
+
"test_steps_per_second": 3.712,
|
| 19 |
+
"total_flos": 1.8922726522850304e+16,
|
| 20 |
+
"train_loss": 0.1081566352736444,
|
| 21 |
+
"train_runtime": 846.98,
|
| 22 |
+
"train_samples_per_second": 177.1,
|
| 23 |
+
"train_steps_per_second": 5.536
|
| 24 |
}
|
eval_results.json
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 3.0,
|
| 3 |
-
"eval_accuracy": 0.
|
| 4 |
-
"eval_f1": 0.
|
| 5 |
-
"eval_loss": 0.
|
| 6 |
-
"eval_precision": 0.
|
| 7 |
-
"eval_recall": 0.
|
| 8 |
-
"eval_runtime": 14.
|
| 9 |
-
"eval_samples_per_second":
|
| 10 |
-
"eval_steps_per_second": 5.
|
| 11 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 3.0,
|
| 3 |
+
"eval_accuracy": 0.994305698056874,
|
| 4 |
+
"eval_f1": 0.9567553048616707,
|
| 5 |
+
"eval_loss": 0.023389853537082672,
|
| 6 |
+
"eval_precision": 0.961497877352703,
|
| 7 |
+
"eval_recall": 0.9520592880573442,
|
| 8 |
+
"eval_runtime": 14.9319,
|
| 9 |
+
"eval_samples_per_second": 334.853,
|
| 10 |
+
"eval_steps_per_second": 5.291
|
| 11 |
}
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 435915992
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c85e6ea27c17123ab4059e9f93861e27b260435705ac6459744b89c59dda74d
|
| 3 |
size 435915992
|
test_results.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"test_accuracy": 0.
|
| 3 |
-
"test_f1": 0.
|
| 4 |
-
"test_loss": 0.
|
| 5 |
-
"test_precision": 0.
|
| 6 |
-
"test_recall": 0.
|
| 7 |
-
"test_runtime":
|
| 8 |
-
"test_samples_per_second":
|
| 9 |
-
"test_steps_per_second": 3.
|
| 10 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"test_accuracy": 0.9944906320722552,
|
| 3 |
+
"test_f1": 0.9580974134172793,
|
| 4 |
+
"test_loss": 0.02194284088909626,
|
| 5 |
+
"test_precision": 0.9625433948474329,
|
| 6 |
+
"test_recall": 0.9536923150647733,
|
| 7 |
+
"test_runtime": 189.6478,
|
| 8 |
+
"test_samples_per_second": 237.282,
|
| 9 |
+
"test_steps_per_second": 3.712
|
| 10 |
}
|
tokenizer.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
train_results.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 3.0,
|
| 3 |
-
"total_flos": 1.
|
| 4 |
-
"train_loss": 0.
|
| 5 |
-
"train_runtime":
|
| 6 |
-
"train_samples_per_second":
|
| 7 |
-
"train_steps_per_second": 5.
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 3.0,
|
| 3 |
+
"total_flos": 1.8922726522850304e+16,
|
| 4 |
+
"train_loss": 0.1081566352736444,
|
| 5 |
+
"train_runtime": 846.98,
|
| 6 |
+
"train_samples_per_second": 177.1,
|
| 7 |
+
"train_steps_per_second": 5.536
|
| 8 |
}
|
vocab.txt
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|