NLP2425
/

transformers

electra

Model card Files Files and versions

xet

Community

josipabebic commited on Jun 7

Commit

9e6d1e9

verified ·

1 Parent(s): 573e720

Delete CroSlo code.ipynb

Browse files

Files changed (1) hide show

CroSlo code.ipynb +0 -825

CroSlo code.ipynb DELETED Viewed

@@ -1,825 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "36ee7edb",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "=== Treniranje i evaluacija za trening skup: train_combined ===\n",
-      "\n",
-      "--- Fine-tuning model: EMBEDDIA/crosloengual-bert ---\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at EMBEDDIA/crosloengual-bert and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
-      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "8c4ee5202c46457ab2c37d2f8e6a67ae",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Map:   0%|          | 0/7577 [00:00<?, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/torch/utils/data/dataloader.py:683: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, then device pinned memory won't be used.\n",
-      "  warnings.warn(warn_msg)\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "    <div>\n",
-       "      \n",
-       "      <progress value='1422' max='1422' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-       "      [1422/1422 1:27:47, Epoch 3/3]\n",
-       "    </div>\n",
-       "    <table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       " <tr style=\"text-align: left;\">\n",
-       "      <th>Step</th>\n",
-       "      <th>Training Loss</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <td>50</td>\n",
-       "      <td>0.855500</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>100</td>\n",
-       "      <td>0.748700</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>150</td>\n",
-       "      <td>0.619600</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>200</td>\n",
-       "      <td>0.618300</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>250</td>\n",
-       "      <td>0.630800</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>300</td>\n",
-       "      <td>0.639400</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>350</td>\n",
-       "      <td>0.636500</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>400</td>\n",
-       "      <td>0.595900</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>450</td>\n",
-       "      <td>0.598500</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>500</td>\n",
-       "      <td>0.464200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>550</td>\n",
-       "      <td>0.430400</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>600</td>\n",
-       "      <td>0.456200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>650</td>\n",
-       "      <td>0.461900</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>700</td>\n",
-       "      <td>0.459500</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>750</td>\n",
-       "      <td>0.419300</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>800</td>\n",
-       "      <td>0.469700</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>850</td>\n",
-       "      <td>0.463700</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>900</td>\n",
-       "      <td>0.411900</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>950</td>\n",
-       "      <td>0.461800</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>1000</td>\n",
-       "      <td>0.364100</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>1050</td>\n",
-       "      <td>0.329400</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>1100</td>\n",
-       "      <td>0.346800</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>1150</td>\n",
-       "      <td>0.262100</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>1200</td>\n",
-       "      <td>0.290200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>1250</td>\n",
-       "      <td>0.223900</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>1300</td>\n",
-       "      <td>0.330000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>1350</td>\n",
-       "      <td>0.307000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>1400</td>\n",
-       "      <td>0.236200</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table><p>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/torch/utils/data/dataloader.py:683: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, then device pinned memory won't be used.\n",
-      "  warnings.warn(warn_msg)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Evaluacija na test skupu test-1\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "23e05a0258f045b4901a9fa9bfc7c151",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Map:   0%|          | 0/653 [00:00<?, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/torch/utils/data/dataloader.py:683: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, then device pinned memory won't be used.\n",
-      "  warnings.warn(warn_msg)\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Confusion Matrix:\n",
-      "[[111  47   7]\n",
-      " [ 77 328  25]\n",
-      " [  3  28  27]]\n",
-      "\n",
-      "Classification Report:\n",
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "    negative       0.58      0.67      0.62       165\n",
-      "     neutral       0.81      0.76      0.79       430\n",
-      "    positive       0.46      0.47      0.46        58\n",
-      "\n",
-      "    accuracy                           0.71       653\n",
-      "   macro avg       0.62      0.63      0.62       653\n",
-      "weighted avg       0.72      0.71      0.72       653\n",
-      "\n",
-      "Predikcije spremljene u results_train_combined_croslo/predictions_test_1.csv\n",
-      "\n",
-      "Evaluacija na test skupu test-2\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e3c39ebf0f60449880c3d03a8c00e518",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Map:   0%|          | 0/741 [00:00<?, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/torch/utils/data/dataloader.py:683: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, then device pinned memory won't be used.\n",
-      "  warnings.warn(warn_msg)\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Confusion Matrix:\n",
-      "[[198  15   3]\n",
-      " [ 16 411   4]\n",
-      " [  5  11  78]]\n",
-      "\n",
-      "Classification Report:\n",
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "    negative       0.90      0.92      0.91       216\n",
-      "     neutral       0.94      0.95      0.95       431\n",
-      "    positive       0.92      0.83      0.87        94\n",
-      "\n",
-      "    accuracy                           0.93       741\n",
-      "   macro avg       0.92      0.90      0.91       741\n",
-      "weighted avg       0.93      0.93      0.93       741\n",
-      "\n",
-      "Predikcije spremljene u results_train_combined_croslo/predictions_test_2.csv\n",
-      "\n",
-      "Evaluacija na test skupu test-3\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "0bbd241f299b482b991116e930c1355a",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Map:   0%|          | 0/793 [00:00<?, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/torch/utils/data/dataloader.py:683: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, then device pinned memory won't be used.\n",
-      "  warnings.warn(warn_msg)\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Confusion Matrix:\n",
-      "[[204  56   7]\n",
-      " [  7 254   2]\n",
-      " [  9 116 138]]\n",
-      "\n",
-      "Classification Report:\n",
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "    negative       0.93      0.76      0.84       267\n",
-      "     neutral       0.60      0.97      0.74       263\n",
-      "    positive       0.94      0.52      0.67       263\n",
-      "\n",
-      "    accuracy                           0.75       793\n",
-      "   macro avg       0.82      0.75      0.75       793\n",
-      "weighted avg       0.82      0.75      0.75       793\n",
-      "\n",
-      "Predikcije spremljene u results_train_combined_croslo/predictions_test_3.csv\n",
-      "\n",
-      "Sažetak metrika po test skupovima s prosjekom:\n",
-      "        Test Set  Accuracy  F1 Macro  Precision Macro  Recall Macro\n",
-      "0         test-1  0.713629  0.624216         0.617558      0.633678\n",
-      "1         test-2  0.927126  0.909619         0.920753      0.900017\n",
-      "2         test-3  0.751576  0.749418         0.820764      0.751513\n",
-      "Average      NaN  0.797444  0.761084         0.786359      0.761736\n",
-      "Sažetak metrika spremljen u results_train_combined_croslo/summary_metrics_with_average.csv\n",
-      "\n",
-      "\n",
-      "=== Treniranje i evaluacija za trening skup: train_2 ===\n",
-      "\n",
-      "--- Fine-tuning model: EMBEDDIA/crosloengual-bert ---\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at EMBEDDIA/crosloengual-bert and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
-      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "d55bf912626244ecaf1dcf7ba9334726",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Map:   0%|          | 0/2221 [00:00<?, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/torch/utils/data/dataloader.py:683: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, then device pinned memory won't be used.\n",
-      "  warnings.warn(warn_msg)\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "    <div>\n",
-       "      \n",
-       "      <progress value='417' max='417' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-       "      [417/417 22:04, Epoch 3/3]\n",
-       "    </div>\n",
-       "    <table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       " <tr style=\"text-align: left;\">\n",
-       "      <th>Step</th>\n",
-       "      <th>Training Loss</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <td>50</td>\n",
-       "      <td>0.848800</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>100</td>\n",
-       "      <td>0.610900</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>150</td>\n",
-       "      <td>0.549600</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>200</td>\n",
-       "      <td>0.381800</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>250</td>\n",
-       "      <td>0.401700</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>300</td>\n",
-       "      <td>0.326100</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>350</td>\n",
-       "      <td>0.233100</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>400</td>\n",
-       "      <td>0.218200</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table><p>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Evaluacija na test skupu test-1\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b0d528152cdd4bfcb2c1892d4a79faca",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Map:   0%|          | 0/653 [00:00<?, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/torch/utils/data/dataloader.py:683: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, then device pinned memory won't be used.\n",
-      "  warnings.warn(warn_msg)\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Confusion Matrix:\n",
-      "[[114  36  15]\n",
-      " [ 85 302  43]\n",
-      " [  7  22  29]]\n",
-      "\n",
-      "Classification Report:\n",
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "    negative       0.55      0.69      0.61       165\n",
-      "     neutral       0.84      0.70      0.76       430\n",
-      "    positive       0.33      0.50      0.40        58\n",
-      "\n",
-      "    accuracy                           0.68       653\n",
-      "   macro avg       0.58      0.63      0.59       653\n",
-      "weighted avg       0.72      0.68      0.69       653\n",
-      "\n",
-      "Predikcije spremljene u results_train_2_croslo/predictions_test_1.csv\n",
-      "\n",
-      "Evaluacija na test skupu test-2\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "06d0e128fb81415da0396d033248ac89",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Map:   0%|          | 0/741 [00:00<?, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/torch/utils/data/dataloader.py:683: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, then device pinned memory won't be used.\n",
-      "  warnings.warn(warn_msg)\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Confusion Matrix:\n",
-      "[[170  36  10]\n",
-      " [ 45 366  20]\n",
-      " [ 15  24  55]]\n",
-      "\n",
-      "Classification Report:\n",
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "    negative       0.74      0.79      0.76       216\n",
-      "     neutral       0.86      0.85      0.85       431\n",
-      "    positive       0.65      0.59      0.61        94\n",
-      "\n",
-      "    accuracy                           0.80       741\n",
-      "   macro avg       0.75      0.74      0.74       741\n",
-      "weighted avg       0.80      0.80      0.80       741\n",
-      "\n",
-      "Predikcije spremljene u results_train_2_croslo/predictions_test_2.csv\n",
-      "\n",
-      "Evaluacija na test skupu test-3\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "8d10a2ad3b5c4cad9e15f9a863c14653",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Map:   0%|          | 0/793 [00:00<?, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/torch/utils/data/dataloader.py:683: UserWarning: 'pin_memory' argument is set as true but not supported on MPS now, then device pinned memory won't be used.\n",
-      "  warnings.warn(warn_msg)\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Confusion Matrix:\n",
-      "[[193  59  15]\n",
-      " [ 20 234   9]\n",
-      " [ 19 116 128]]\n",
-      "\n",
-      "Classification Report:\n",
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "    negative       0.83      0.72      0.77       267\n",
-      "     neutral       0.57      0.89      0.70       263\n",
-      "    positive       0.84      0.49      0.62       263\n",
-      "\n",
-      "    accuracy                           0.70       793\n",
-      "   macro avg       0.75      0.70      0.70       793\n",
-      "weighted avg       0.75      0.70      0.70       793\n",
-      "\n",
-      "Predikcije spremljene u results_train_2_croslo/predictions_test_3.csv\n",
-      "\n",
-      "Sažetak metrika po test skupovima s prosjekom:\n",
-      "        Test Set  Accuracy  F1 Macro  Precision Macro  Recall Macro\n",
-      "0         test-1  0.681470  0.593037         0.575207      0.631078\n",
-      "1         test-2  0.797571  0.743666         0.748448      0.740444\n",
-      "2         test-3  0.699874  0.695614         0.748710      0.699757\n",
-      "Average      NaN  0.726305  0.677439         0.690788      0.690426\n",
-      "Sažetak metrika spremljen u results_train_2_croslo/summary_metrics_with_average.csv\n"
-     ]
-    }
-   ],
-   "source": [
-    "import pandas as pd\n",
-    "import torch\n",
-    "from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments\n",
-    "from datasets import Dataset\n",
-    "from sklearn.metrics import classification_report, confusion_matrix\n",
-    "\n",
-    "def load_and_prepare_data(train_path):\n",
-    "    df = pd.read_csv(train_path)\n",
-    "    df = df.rename(columns={\"Label\": \"label\"})\n",
-    "    return Dataset.from_pandas(df)\n",
-    "\n",
-    "def load_and_prepare_test_data(test_path):\n",
-    "    df = pd.read_csv(test_path)\n",
-    "    df = df.rename(columns={\"Label\": \"label\"})\n",
-    "    return Dataset.from_pandas(df), df\n",
-    "\n",
-    "def tokenize_dataset(dataset, tokenizer):\n",
-    "    def tokenize_function(examples):\n",
-    "        return tokenizer(examples['Sentence'], padding='max_length', truncation=True, max_length=128)\n",
-    "    tokenized = dataset.map(tokenize_function, batched=True)\n",
-    "    tokenized.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])\n",
-    "    return tokenized\n",
-    "\n",
-    "def compute_metrics(eval_pred):\n",
-    "    logits, labels = eval_pred\n",
-    "    preds = torch.argmax(torch.tensor(logits), axis=1).numpy()\n",
-    "    report = classification_report(labels, preds, output_dict=True)\n",
-    "    acc = report['accuracy']\n",
-    "    f1 = report['macro avg']['f1-score']\n",
-    "    precision = report['macro avg']['precision']\n",
-    "    recall = report['macro avg']['recall']\n",
-    "    return {\n",
-    "        'accuracy': acc,\n",
-    "        'f1_macro': f1,\n",
-    "        'precision_macro': precision,\n",
-    "        'recall_macro': recall\n",
-    "    }\n",
-    "\n",
-    "def train_and_evaluate(model_name, train_dataset, test_datasets, raw_test_dfs, output_base_dir):\n",
-    "    print(f\"\\n--- Fine-tuning model: {model_name} ---\")\n",
-    "\n",
-    "    tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
-    "    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)\n",
-    "\n",
-    "    tokenized_train = tokenize_dataset(train_dataset, tokenizer)\n",
-    "\n",
-    "    training_args = TrainingArguments(\n",
-    "        output_dir=f\"{output_base_dir}/model\",\n",
-    "        learning_rate=2e-5,\n",
-    "        per_device_train_batch_size=16,\n",
-    "        per_device_eval_batch_size=32,\n",
-    "        num_train_epochs=3,\n",
-    "        weight_decay=0.01,\n",
-    "        load_best_model_at_end=False,\n",
-    "        logging_dir=f\"{output_base_dir}/logs\",\n",
-    "        logging_steps=50,\n",
-    "        save_total_limit=2,\n",
-    "        seed=42,\n",
-    "    )\n",
-    "\n",
-    "    trainer = Trainer(\n",
-    "        model=model,\n",
-    "        args=training_args,\n",
-    "        train_dataset=tokenized_train,\n",
-    "        compute_metrics=compute_metrics,\n",
-    "    )\n",
-    "\n",
-    "    trainer.train()\n",
-    "    trainer.save_model()\n",
-    "\n",
-    "    results_list = []\n",
-    "\n",
-    "    for i, (test_dataset, raw_test_df) in enumerate(zip(test_datasets, raw_test_dfs), start=1):\n",
-    "        print(f\"\\nEvaluacija na test skupu test-{i}\")\n",
-    "        tokenized_test = tokenize_dataset(test_dataset, tokenizer)\n",
-    "        predictions_output = trainer.predict(tokenized_test)\n",
-    "\n",
-    "        preds = torch.argmax(torch.tensor(predictions_output.predictions), axis=1).numpy()\n",
-    "        labels = predictions_output.label_ids\n",
-    "\n",
-    "        report = classification_report(labels, preds, target_names=['negative', 'neutral', 'positive'], output_dict=True)\n",
-    "\n",
-    "        accuracy = report['accuracy']\n",
-    "        f1_macro = report['macro avg']['f1-score']\n",
-    "        precision_macro = report['macro avg']['precision']\n",
-    "        recall_macro = report['macro avg']['recall']\n",
-    "\n",
-    "        results_list.append({\n",
-    "            'Test Set': f'test-{i}',\n",
-    "            'Accuracy': accuracy,\n",
-    "            'F1 Macro': f1_macro,\n",
-    "            'Precision Macro': precision_macro,\n",
-    "            'Recall Macro': recall_macro\n",
-    "        })\n",
-    "\n",
-    "        print(\"Confusion Matrix:\")\n",
-    "        print(confusion_matrix(labels, preds))\n",
-    "        print(\"\\nClassification Report:\")\n",
-    "        print(classification_report(labels, preds, target_names=['negative', 'neutral', 'positive']))\n",
-    "\n",
-    "        output_df = raw_test_df.copy()\n",
-    "        output_df['predicted_label'] = preds\n",
-    "        output_df['correct'] = output_df['label'] == output_df['predicted_label']\n",
-    "        output_csv = f\"{output_base_dir}/predictions_test_{i}.csv\"\n",
-    "        output_df.to_csv(output_csv, index=False)\n",
-    "        print(f\"Predikcije spremljene u {output_csv}\")\n",
-    "\n",
-    "    # Izračun prosjeka za sve metrike\n",
-    "    df_results = pd.DataFrame(results_list)\n",
-    "    df_results.loc['Average'] = df_results.mean(numeric_only=True)\n",
-    "\n",
-    "    print(\"\\nSažetak metrika po test skupovima s prosjekom:\")\n",
-    "    print(df_results)\n",
-    "\n",
-    "    df_results.to_csv(f\"{output_base_dir}/summary_metrics_with_average.csv\", index=True)\n",
-    "    print(f\"Sažetak metrika spremljen u {output_base_dir}/summary_metrics_with_average.csv\")\n",
-    "\n",
-    "if __name__ == \"__main__\":\n",
-    "    train_files = {\n",
-    "        \"train_combined\": \"TRAIN.csv\",\n",
-    "        \"train_2\": \"train-2.csv\"\n",
-    "    }\n",
-    "\n",
-    "    test_files = [\"test-1.csv\", \"test-2.csv\", \"test-3.csv\"]\n",
-    "    test_datasets = []\n",
-    "    raw_test_dfs = []\n",
-    "    for f in test_files:\n",
-    "        ds, df = load_and_prepare_test_data(f)\n",
-    "        test_datasets.append(ds)\n",
-    "        raw_test_dfs.append(df)\n",
-    "\n",
-    "    model_name = \"EMBEDDIA/crosloengual-bert\"\n",
-    "\n",
-    "    for train_name, train_path in train_files.items():\n",
-    "        print(f\"\\n\\n=== Treniranje i evaluacija za trening skup: {train_name} ===\")\n",
-    "        train_dataset = load_and_prepare_data(train_path)\n",
-    "        output_dir = f\"results_{train_name}_croslo\"\n",
-    "        train_and_evaluate(model_name, train_dataset, test_datasets, raw_test_dfs, output_dir)\n"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.13.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}