| from transformers import Trainer, AutoModelForSequenceClassification, AutoTokenizer |
| from datasets import load_dataset, load_metric |
| import json |
|
|
| |
| with open('../config/config.json') as f: |
| config = json.load(f) |
|
|
| |
| model = AutoModelForSequenceClassification.from_pretrained('../model') |
| tokenizer = AutoTokenizer.from_pretrained(config['model_name']) |
|
|
| |
| dataset = load_dataset('csv', data_files={'test': '../data/test.csv'}) |
| tokenized_datasets = dataset.map(lambda x: tokenizer(x['text'], padding="max_length", truncation=True), batched=True) |
|
|
| |
| metric = load_metric("accuracy") |
|
|
| def compute_metrics(eval_pred): |
| logits, labels = eval_pred |
| predictions = logits.argmax(axis=-1) |
| return metric.compute(predictions=predictions, references=labels) |
|
|
| trainer = Trainer( |
| model=model, |
| tokenizer=tokenizer, |
| compute_metrics=compute_metrics |
| ) |
|
|
| results = trainer.evaluate(tokenized_datasets['test']) |
| print(results) |
|
|