{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import kagglehub\n", "\n", "from abc import ABC\n", "\n", "from sklearn.metrics import mean_absolute_percentage_error as mape\n", "\n", "from statsforecast.models import AutoTheta, AutoARIMA, AutoETS\n", "from statsforecast import StatsForecast\n", "from utilsforecast.plotting import plot_series\n", "\n", "from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Data Processing" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "1. Download information from Kaggle" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "path = kagglehub.dataset_download(\n", " \"rickandjoe/electricity-transformer-dataset-etdataset\"\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "2. Load the dataframes" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "df1 = pd.read_csv(path + \"/ETT-small/ETTh1.csv\", index_col=0)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "df1[\"unique_id\"] = \"transformer_1\"" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "df2 = pd.read_csv(path + \"/ETT-small/ETTh2.csv\", index_col=0)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "df2[\"unique_id\"] = \"transformer_2\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Concatenate both dataframes" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "df_final = pd.concat([df1, df2])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "3. Drop useless information" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "df_final = df_final[[\"unique_id\", \"OT\"]]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "4. Rename the DF to get the final prediction" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "df_final = df_final.reset_index()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "5. Rename the columns to have a common format" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "df_final = df_final.rename(columns={\"date\": \"ds\", \"OT\": \"y\"})" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "df_final[\"ds\"] = pd.to_datetime(df_final[\"ds\"])" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "df_final = df_final.sort_values([\"unique_id\", \"ds\"])" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | ds | \n", "unique_id | \n", "y | \n", "
|---|---|---|---|
| 0 | \n", "2016-07-01 00:00:00 | \n", "transformer_1 | \n", "30.531000 | \n", "
| 1 | \n", "2016-07-01 01:00:00 | \n", "transformer_1 | \n", "27.787001 | \n", "
| 2 | \n", "2016-07-01 02:00:00 | \n", "transformer_1 | \n", "27.787001 | \n", "
| 3 | \n", "2016-07-01 03:00:00 | \n", "transformer_1 | \n", "25.044001 | \n", "
| 4 | \n", "2016-07-01 04:00:00 | \n", "transformer_1 | \n", "21.948000 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "
| 34835 | \n", "2018-06-26 15:00:00 | \n", "transformer_2 | \n", "47.084999 | \n", "
| 34836 | \n", "2018-06-26 16:00:00 | \n", "transformer_2 | \n", "48.183498 | \n", "
| 34837 | \n", "2018-06-26 17:00:00 | \n", "transformer_2 | \n", "48.183498 | \n", "
| 34838 | \n", "2018-06-26 18:00:00 | \n", "transformer_2 | \n", "46.865501 | \n", "
| 34839 | \n", "2018-06-26 19:00:00 | \n", "transformer_2 | \n", "45.986500 | \n", "
34840 rows × 3 columns
\n", "| \n", " | AWSChronosForecast | \n", "AutoARIMA | \n", "AutoETS | \n", "AutoTheta | \n", "
|---|---|---|---|---|
| 0 | \n", "9.148477 | \n", "18.487053 | \n", "27.713882 | \n", "27.880458 | \n", "