Hyperparameter tuning and using Raytune and visulization using Tensorboard¶
This notebook uses preprocessed dataset by following notebook.
notes
- CPU monitoring in terminal:
top
- GPU monitoring in terminal:
pip install gpustat watch -c gpustat -cp --color
Initial imports¶
In [1]:
Copied!
import sys
import pandas as pd
# to save results to data directory
module_path = '..'
if module_path not in sys.path:
sys.path.insert(0, module_path)
# increase displayed columns in jupyter notebook
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 300)
import sys
import pandas as pd
# to save results to data directory
module_path = '..'
if module_path not in sys.path:
sys.path.insert(0, module_path)
# increase displayed columns in jupyter notebook
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 300)
In [2]:
Copied!
import pandas as pd
import os
from copy import copy, deepcopy
import torch
import multiprocessing
import json
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split, cross_val_score
import numpy as np
from sklearn.metrics import classification_report, mean_squared_error
import matplotlib.pyplot as plt
from pytorch_widedeep.dataloaders import DataLoaderImbalanced, DataLoaderDefault
from pytorch_widedeep.preprocessing import WidePreprocessor, TabPreprocessor
from pytorch_widedeep.training import Trainer
from pytorch_widedeep.models import Wide, TabMlp, WideDeep
from pytorch_widedeep.models.transformers.saint import SAINT
from pytorch_widedeep.callbacks import EarlyStopping, ModelCheckpoint, LRHistory, RayTuneReporter
from pytorch_widedeep.initializers import KaimingNormal, XavierNormal
from pytorch_widedeep.optim import RAdam
from sklearn.preprocessing import PowerTransformer
from torch.optim import Adam, SGD, lr_scheduler
from torchmetrics import F1 as F1_torchmetrics
from torchmetrics import Accuracy as Accuracy_torchmetrics
from torchmetrics import Precision as Precision_torchmetrics
from torchmetrics import Recall as Recall_torchmetrics
from torchmetrics import MeanSquaredError as MSE_torchmetrics
from pytorch_widedeep import Tab2Vec
import dill
# increase displayed columns in jupyter notebook
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 300)
# temporarily remove deprecation warnings
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
import src
from src import common
from time import time
import re
import tracemalloc
tracemalloc.start()
import ray
from ray import tune
from ray.tune import JupyterNotebookReporter
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune.integration.wandb import WandbLogger
from ray.tune.logger import DEFAULT_LOGGERS
import tracemalloc
from sklearn.linear_model import LogisticRegression
# increase displayed columns in jupyter notebook
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 300)
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import os
from copy import copy, deepcopy
import torch
import multiprocessing
import json
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split, cross_val_score
import numpy as np
from sklearn.metrics import classification_report, mean_squared_error
import matplotlib.pyplot as plt
from pytorch_widedeep.dataloaders import DataLoaderImbalanced, DataLoaderDefault
from pytorch_widedeep.preprocessing import WidePreprocessor, TabPreprocessor
from pytorch_widedeep.training import Trainer
from pytorch_widedeep.models import Wide, TabMlp, WideDeep
from pytorch_widedeep.models.transformers.saint import SAINT
from pytorch_widedeep.callbacks import EarlyStopping, ModelCheckpoint, LRHistory, RayTuneReporter
from pytorch_widedeep.initializers import KaimingNormal, XavierNormal
from pytorch_widedeep.optim import RAdam
from sklearn.preprocessing import PowerTransformer
from torch.optim import Adam, SGD, lr_scheduler
from torchmetrics import F1 as F1_torchmetrics
from torchmetrics import Accuracy as Accuracy_torchmetrics
from torchmetrics import Precision as Precision_torchmetrics
from torchmetrics import Recall as Recall_torchmetrics
from torchmetrics import MeanSquaredError as MSE_torchmetrics
from pytorch_widedeep import Tab2Vec
import dill
# increase displayed columns in jupyter notebook
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 300)
# temporarily remove deprecation warnings
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
import src
from src import common
from time import time
import re
import tracemalloc
tracemalloc.start()
import ray
from ray import tune
from ray.tune import JupyterNotebookReporter
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune.integration.wandb import WandbLogger
from ray.tune.logger import DEFAULT_LOGGERS
import tracemalloc
from sklearn.linear_model import LogisticRegression
# increase displayed columns in jupyter notebook
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 300)
import warnings
warnings.filterwarnings('ignore')
Dataset¶
identifiers
In [3]:
Copied!
column_types = common.json_load("#datasets/Colab_PowerConverter/column_types.json")
target = column_types["target"]
measurement_label = column_types["measurement_label"]
RANDOM_STATE = 1
TEST_SIZE_TRAIN = 0.2
TEST_SIZE_VALID = 0.5
EMBEDDING = False
TASK = "multiclass" #(or "binary")
column_types = common.json_load("#datasets/Colab_PowerConverter/column_types.json")
target = column_types["target"]
measurement_label = column_types["measurement_label"]
RANDOM_STATE = 1
TEST_SIZE_TRAIN = 0.2
TEST_SIZE_VALID = 0.5
EMBEDDING = False
TASK = "multiclass" #(or "binary")
In [4]:
Copied!
df = pd.read_pickle("#datasets/Colab_PowerConverter/dataset.pkl")
df = pd.read_pickle("#datasets/Colab_PowerConverter/dataset.pkl")
In [5]:
Copied!
# this measurement did not have a fault (?)
df = df[df[measurement_label]!="Single-Phase_Sensor_Fault"]
df.reset_index(inplace=True, drop=True)
# this measurement did not have a fault (?)
df = df[df[measurement_label]!="Single-Phase_Sensor_Fault"]
df.reset_index(inplace=True, drop=True)
In [6]:
Copied!
fault_dict = {}
for label,i in zip(df[measurement_label].unique(), range(len(df[measurement_label].unique()))):
df.loc[(df[measurement_label]==label) & (df[target]==1), target] = int(i+1)
fault_dict[label] = int(i+1)
fault_dict = {}
for label,i in zip(df[measurement_label].unique(), range(len(df[measurement_label].unique()))):
df.loc[(df[measurement_label]==label) & (df[target]==1), target] = int(i+1)
fault_dict[label] = int(i+1)
In [7]:
Copied!
# imbalance of the classes
df[target].value_counts()
# imbalance of the classes
df[target].value_counts()
Out[7]:
0 597599 5 40014 3 40001 6 40001 7 40001 8 40001 9 40001 10 40001 11 40001 13 40001 1 38971 2 38971 4 3166 12 1335 Name: fault, dtype: int64
In [8]:
Copied!
fault_dict
fault_dict
Out[8]:
{'Damping-320': 1,
'Damping-32000': 2,
'Inertia-1.2': 3,
'LL_Fault': 4,
'Three-Phase_Sensor_Fault': 5,
'Weak_Grid-4_5_mH': 6,
'Weak_Grid-1_5_mH': 7,
'Damping-3200': 8,
'Inertia-0.2': 9,
'Inertia-2': 10,
'Single_Phase_Sag': 11,
'Three_Phase_Grid_Fault': 12,
'Weak_Grid-7_5_mH': 13}
Preprocessing¶
In [9]:
Copied!
df.drop(columns=[measurement_label], inplace=True)
df.drop(columns=[measurement_label], inplace=True)
In [10]:
Copied!
df_train, df_valid = train_test_split(df, test_size=TEST_SIZE_TRAIN, stratify=df[target], random_state=RANDOM_STATE)
df_valid, df_test = train_test_split(df_valid, test_size=TEST_SIZE_VALID, stratify=df_valid[target], random_state=RANDOM_STATE)
df_train.reset_index(inplace=True, drop=True)
df_valid.reset_index(inplace=True, drop=True)
df_test.reset_index(inplace=True, drop=True)
df_train, df_valid = train_test_split(df, test_size=TEST_SIZE_TRAIN, stratify=df[target], random_state=RANDOM_STATE)
df_valid, df_test = train_test_split(df_valid, test_size=TEST_SIZE_VALID, stratify=df_valid[target], random_state=RANDOM_STATE)
df_train.reset_index(inplace=True, drop=True)
df_valid.reset_index(inplace=True, drop=True)
df_test.reset_index(inplace=True, drop=True)
In [11]:
Copied!
df_train_scaled, Scaler = common.scale(df_train, [target], scaler_sk='Standard')
df_valid_scaled, Scaler = common.scale(df_valid, [target], scaler_sk=Scaler)
df_test_scaled, Scaler = common.scale(df_test, [target], scaler_sk=Scaler)
df_train_scaled, Scaler = common.scale(df_train, [target], scaler_sk='Standard')
df_valid_scaled, Scaler = common.scale(df_valid, [target], scaler_sk=Scaler)
df_test_scaled, Scaler = common.scale(df_test, [target], scaler_sk=Scaler)
Deep Net¶
In [15]:
Copied!
NUM_CLASSES = df[target].nunique()
NUM_CLASSES
NUM_CLASSES = df[target].nunique()
NUM_CLASSES
Out[15]:
14
Metrics¶
In [16]:
Copied!
accuracy = Accuracy_torchmetrics(average=None, num_classes=NUM_CLASSES)
precision = Precision_torchmetrics(average='micro', num_classes=NUM_CLASSES)
f1 = F1_torchmetrics(average=None, num_classes=NUM_CLASSES)
recall = Recall_torchmetrics(average=None, num_classes=NUM_CLASSES)
metrics = [accuracy, precision, f1, recall]
accuracy = Accuracy_torchmetrics(average=None, num_classes=NUM_CLASSES)
precision = Precision_torchmetrics(average='micro', num_classes=NUM_CLASSES)
f1 = F1_torchmetrics(average=None, num_classes=NUM_CLASSES)
recall = Recall_torchmetrics(average=None, num_classes=NUM_CLASSES)
metrics = [accuracy, precision, f1, recall]
Embedding layer size rules¶
In [17]:
Copied!
if EMBEDDING:
embedding_rule = 'jeremy_old'
embed_cols = []
if embedding_rule == 'google':
embed_input = [(u, round(df_train[u].nunique()**0.25)) for u in embed_cols]
elif embedding_rule == 'jeremy_old':
embed_input = [(u, min(50, df_train[u].nunique()//2)) for u in embed_cols]
elif embedding_rule == 'jeremy_new':
embed_input = [(u, min(600, round(1.6 * df_train[u].nunique()**0.56))) for u in embed_cols]
else:
embed_input = None
if EMBEDDING:
embedding_rule = 'jeremy_old'
embed_cols = []
if embedding_rule == 'google':
embed_input = [(u, round(df_train[u].nunique()**0.25)) for u in embed_cols]
elif embedding_rule == 'jeremy_old':
embed_input = [(u, min(50, df_train[u].nunique()//2)) for u in embed_cols]
elif embedding_rule == 'jeremy_new':
embed_input = [(u, min(600, round(1.6 * df_train[u].nunique()**0.56))) for u in embed_cols]
else:
embed_input = None
In [18]:
Copied!
cont_cols = df.drop(columns=[target]).columns.values
cont_cols = df.drop(columns=[target]).columns.values
Prepare data for model¶
In [19]:
Copied!
# deeptabular
tab_preprocessor = TabPreprocessor(embed_cols=embed_input,
continuous_cols=cont_cols,
shared_embed=False,
scale=False)
X_tab_train = tab_preprocessor.fit_transform(df_train_scaled)
X_tab_valid = tab_preprocessor.transform(df_valid_scaled)
X_tab_test = tab_preprocessor.transform(df_test_scaled)
# target
y_train = df_train_scaled[target].values
y_valid = df_valid_scaled[target].values
y_test = df_test_scaled[target].values
# deeptabular
tab_preprocessor = TabPreprocessor(embed_cols=embed_input,
continuous_cols=cont_cols,
shared_embed=False,
scale=False)
X_tab_train = tab_preprocessor.fit_transform(df_train_scaled)
X_tab_valid = tab_preprocessor.transform(df_valid_scaled)
X_tab_test = tab_preprocessor.transform(df_test_scaled)
# target
y_train = df_train_scaled[target].values
y_valid = df_valid_scaled[target].values
y_test = df_test_scaled[target].values
Define the model¶
In [20]:
Copied!
input_layer = len(tab_preprocessor.continuous_cols)
if TASK == "multiclass":
output_layer = NUM_CLASSES
if TASK == "binary":
output_layer = 1
hidden_layers = np.linspace(input_layer*2, output_layer, 5, endpoint=False, dtype=int).tolist()
input_layer = len(tab_preprocessor.continuous_cols)
if TASK == "multiclass":
output_layer = NUM_CLASSES
if TASK == "binary":
output_layer = 1
hidden_layers = np.linspace(input_layer*2, output_layer, 5, endpoint=False, dtype=int).tolist()
In [21]:
Copied!
if EMBEDDING:
deeptabular = TabMlp(mlp_hidden_dims=hidden_layers,
column_idx=tab_preprocessor.column_idx,
embed_input=tab_preprocessor.embeddings_input,
continuous_cols=tab_preprocessor.continuous_cols,
mlp_batchnorm=True,
mlp_batchnorm_last=True,
mlp_linear_first=True)
else:
deeptabular = TabMlp(mlp_hidden_dims=hidden_layers,
column_idx=tab_preprocessor.column_idx,
continuous_cols=tab_preprocessor.continuous_cols,
mlp_batchnorm=True,
mlp_batchnorm_last=True,
mlp_linear_first=True)
model = WideDeep(deeptabular=deeptabular, pred_dim=output_layer)
model
if EMBEDDING:
deeptabular = TabMlp(mlp_hidden_dims=hidden_layers,
column_idx=tab_preprocessor.column_idx,
embed_input=tab_preprocessor.embeddings_input,
continuous_cols=tab_preprocessor.continuous_cols,
mlp_batchnorm=True,
mlp_batchnorm_last=True,
mlp_linear_first=True)
else:
deeptabular = TabMlp(mlp_hidden_dims=hidden_layers,
column_idx=tab_preprocessor.column_idx,
continuous_cols=tab_preprocessor.continuous_cols,
mlp_batchnorm=True,
mlp_batchnorm_last=True,
mlp_linear_first=True)
model = WideDeep(deeptabular=deeptabular, pred_dim=output_layer)
model
Out[21]:
WideDeep(
(deeptabular): Sequential(
(0): TabMlp(
(cat_embed_and_cont): CatEmbeddingsAndCont(
(cont_norm): BatchNorm1d(13, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(tab_mlp): MLP(
(mlp): Sequential(
(dense_layer_0): Sequential(
(0): Linear(in_features=13, out_features=26, bias=False)
(1): ReLU(inplace=True)
(2): BatchNorm1d(26, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): Dropout(p=0.1, inplace=False)
)
(dense_layer_1): Sequential(
(0): Linear(in_features=26, out_features=23, bias=False)
(1): ReLU(inplace=True)
(2): BatchNorm1d(23, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): Dropout(p=0.1, inplace=False)
)
(dense_layer_2): Sequential(
(0): Linear(in_features=23, out_features=21, bias=False)
(1): ReLU(inplace=True)
(2): BatchNorm1d(21, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): Dropout(p=0.1, inplace=False)
)
(dense_layer_3): Sequential(
(0): Linear(in_features=21, out_features=18, bias=False)
(1): ReLU(inplace=True)
(2): BatchNorm1d(18, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): Dropout(p=0.1, inplace=False)
)
(dense_layer_4): Sequential(
(0): Linear(in_features=18, out_features=16, bias=False)
(1): ReLU(inplace=True)
(2): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): Dropout(p=0.1, inplace=False)
)
)
)
)
(1): Linear(in_features=16, out_features=14, bias=True)
)
)
Optimizers and Schedulers¶
In [22]:
Copied!
# Optimizers
deep_opt = SGD(model.deeptabular.parameters(), lr=0.1)
# LR Schedulers
deep_sch = lr_scheduler.StepLR(deep_opt, step_size=2)
# Optimizers
deep_opt = SGD(model.deeptabular.parameters(), lr=0.1)
# LR Schedulers
deep_sch = lr_scheduler.StepLR(deep_opt, step_size=2)
In [114]:
Copied!
early_stopping = EarlyStopping()
model_checkpoint = ModelCheckpoint(
filepath="#temp_models/",
save_best_only=True,
verbose=1,
max_save=1,
)
if TASK == "binary":
objective = "binary_focal_loss"
dataloader = DataLoaderImbalanced
if TASK == "multiclass":
objective = "multiclass_focal_loss"
dataloader = DataLoaderImbalanced
trainer = Trainer(model,
objective=objective,
callbacks=[LRHistory(n_epochs=10)],
lr_schedulers={'deeptabular': deep_sch},
initializers={'deeptabular': XavierNormal},
optimizers={'deeptabular': deep_opt},
metrics=metrics)
trainer.fit(X_train={"X_tab": X_tab_train, "target": y_train},
X_val={"X_tab": X_tab_valid, "target": y_valid},
n_epochs=10,
batch_size=100,
custom_dataloader=dataloader,
oversample_mul=5)
early_stopping = EarlyStopping()
model_checkpoint = ModelCheckpoint(
filepath="#temp_models/",
save_best_only=True,
verbose=1,
max_save=1,
)
if TASK == "binary":
objective = "binary_focal_loss"
dataloader = DataLoaderImbalanced
if TASK == "multiclass":
objective = "multiclass_focal_loss"
dataloader = DataLoaderImbalanced
trainer = Trainer(model,
objective=objective,
callbacks=[LRHistory(n_epochs=10)],
lr_schedulers={'deeptabular': deep_sch},
initializers={'deeptabular': XavierNormal},
optimizers={'deeptabular': deep_opt},
metrics=metrics)
trainer.fit(X_train={"X_tab": X_tab_train, "target": y_train},
X_val={"X_tab": X_tab_valid, "target": y_valid},
n_epochs=10,
batch_size=100,
custom_dataloader=dataloader,
oversample_mul=5)
epoch 1: 100%|█| 748/748 [00:09<00:00, 81.72it/s, loss=0.105, metrics={'Accuracy': [0.075, 0.4447, 0.0113
valid: 100%|█| 1041/1041 [00:10<00:00, 96.47it/s, loss=0.0466, metrics={'Accuracy': [0.0034, 0.8101, 0.0,
epoch 2: 100%|█| 748/748 [00:10<00:00, 74.68it/s, loss=0.0376, metrics={'Accuracy': [0.0548, 0.6595, 0.00
valid: 100%|█| 1041/1041 [00:11<00:00, 93.48it/s, loss=0.0286, metrics={'Accuracy': [0.0008, 0.9956, 0.0,
epoch 3: 100%|█| 748/748 [00:13<00:00, 55.55it/s, loss=0.03, metrics={'Accuracy': [0.0773, 0.6895, 0.0086
valid: 100%|█| 1041/1041 [00:11<00:00, 87.92it/s, loss=0.0285, metrics={'Accuracy': [0.0019, 0.9941, 0.0,
epoch 4: 100%|█| 748/748 [00:10<00:00, 70.84it/s, loss=0.0294, metrics={'Accuracy': [0.0822, 0.6876, 0.01
valid: 100%|█| 1041/1041 [00:11<00:00, 89.74it/s, loss=0.0274, metrics={'Accuracy': [0.0143, 1.0, 0.0, 0.
epoch 5: 100%|█| 748/748 [00:10<00:00, 68.36it/s, loss=0.029, metrics={'Accuracy': [0.0832, 0.7022, 0.009
valid: 100%|█| 1041/1041 [00:12<00:00, 84.57it/s, loss=0.0274, metrics={'Accuracy': [0.0208, 1.0, 0.0, 0.
epoch 6: 100%|█| 748/748 [00:12<00:00, 61.49it/s, loss=0.029, metrics={'Accuracy': [0.0832, 0.6918, 0.011
valid: 100%|█| 1041/1041 [00:13<00:00, 75.65it/s, loss=0.027, metrics={'Accuracy': [0.0136, 0.9489, 0.0,
epoch 7: 100%|█| 748/748 [00:11<00:00, 67.39it/s, loss=0.0289, metrics={'Accuracy': [0.079, 0.6971, 0.008
valid: 100%|█| 1041/1041 [00:11<00:00, 87.01it/s, loss=0.0275, metrics={'Accuracy': [0.0095, 0.9995, 0.0,
epoch 8: 100%|█| 748/748 [00:15<00:00, 47.47it/s, loss=0.029, metrics={'Accuracy': [0.0735, 0.6869, 0.013
valid: 100%|█| 1041/1041 [00:14<00:00, 73.14it/s, loss=0.0274, metrics={'Accuracy': [0.009, 1.0, 0.0, 0.0
epoch 9: 100%|█| 748/748 [00:13<00:00, 53.98it/s, loss=0.0289, metrics={'Accuracy': [0.0777, 0.6961, 0.01
valid: 100%|█| 1041/1041 [00:12<00:00, 81.94it/s, loss=0.0271, metrics={'Accuracy': [0.0111, 0.9895, 0.0,
epoch 10: 100%|█| 748/748 [00:11<00:00, 66.13it/s, loss=0.0289, metrics={'Accuracy': [0.0786, 0.6905, 0.0
valid: 100%|█| 1041/1041 [00:13<00:00, 77.42it/s, loss=0.0275, metrics={'Accuracy': [0.0114, 1.0, 0.0, 0.
Prediction & evaluation¶
Normal prediction¶
In [117]:
Copied!
result = pd.DataFrame({"predicted": trainer.predict(X_tab=X_tab_test),
"ground_truth": df_test[target].values,})
result = pd.DataFrame({"predicted": trainer.predict(X_tab=X_tab_test),
"ground_truth": df_test[target].values,})
predict: 100%|██████████████████████████████████████████████████████| 1041/1041 [00:04<00:00, 253.97it/s]
In [118]:
Copied!
print('Classification report:\n{}'.format(classification_report(result['predicted'], result['ground_truth'])))
print('Classification report:\n{}'.format(classification_report(result['predicted'], result['ground_truth'])))
Classification report:
precision recall f1-score support
0 0.01 0.95 0.02 747
1 1.00 0.28 0.43 14139
2 0.00 0.00 0.00 0
3 0.07 0.02 0.03 16889
4 0.16 0.01 0.02 6015
5 1.00 0.86 0.92 4652
6 0.00 0.00 0.00 0
7 0.09 0.08 0.08 4382
8 0.05 0.03 0.04 8198
9 0.15 0.10 0.12 6105
10 0.02 0.12 0.04 739
11 0.23 0.05 0.08 18332
12 0.92 0.42 0.58 291
13 0.41 0.07 0.12 23518
accuracy 0.12 104007
macro avg 0.29 0.21 0.18 104007
weighted avg 0.35 0.12 0.16 104007
Monte Carlo prediction (uncertainty)¶
- requires install of pytorch-widedeep branch that was not yet merged to master - https://github.com/jrzaurin/pytorch-widedeep/tree/pmulinka/uncertainty
In [123]:
Copied!
df_pred_unc = trainer.predict_uncertainty(X_tab=X_tab_test, uncertainty_granularity=5)
result = pd.DataFrame({"predicted": df_pred_unc[:,-1],
"ground_truth": df_test[target].values,})
df_pred_unc = trainer.predict_uncertainty(X_tab=X_tab_test, uncertainty_granularity=5)
result = pd.DataFrame({"predicted": df_pred_unc[:,-1],
"ground_truth": df_test[target].values,})
predict_UncertaintyIter: 100%|█████████████████████████████████████████████| 5/5 [00:15<00:00, 3.12s/it]
In [124]:
Copied!
print('Classification report:\n{}'.format(classification_report(result['predicted'], result['ground_truth'])))
print('Classification report:\n{}'.format(classification_report(result['predicted'], result['ground_truth'])))
Classification report:
precision recall f1-score support
0.0 0.04 0.89 0.08 2847
1.0 0.98 0.22 0.36 17144
2.0 0.00 0.02 0.00 413
3.0 0.06 0.02 0.03 12746
4.0 0.09 0.01 0.01 4305
5.0 0.94 0.77 0.85 4877
6.0 0.01 0.08 0.01 309
7.0 0.04 0.06 0.05 2803
8.0 0.10 0.04 0.06 9666
9.0 0.13 0.07 0.09 7258
10.0 0.06 0.09 0.07 2415
11.0 0.31 0.06 0.09 22332
12.0 0.95 0.13 0.22 993
13.0 0.25 0.06 0.10 15899
accuracy 0.14 104007
macro avg 0.28 0.18 0.15 104007
weighted avg 0.35 0.14 0.16 104007
Extract the dictionary from a learned model¶
- only in case we are using embeddings that we want to use in other models
- Use built-in Tab2Vec with model and tab_preprocessor
In [115]:
Copied!
# t2v = Tab2Vec(model=model, tab_preprocessor=tab_preprocessor, return_dataframe=True)
# df_test_scaled_enc, df_test_y = t2v.transform(df_test_scaled, target_col=target_col)
# df_test_scaled_eanc.head()
# t2v = Tab2Vec(model=model, tab_preprocessor=tab_preprocessor, return_dataframe=True)
# df_test_scaled_enc, df_test_y = t2v.transform(df_test_scaled, target_col=target_col)
# df_test_scaled_eanc.head()
dill "lesson learned" :¶
- issue
you fit scalers, transformers, preprocessors, label encoders, models, etc. and you need to save them so you could use them when necessary for predictions, ie. in prediction script that is used everyday - wrong solution
use pickle to store the objects in files, or even better - create named dictionary with all models that includes the objects - why is it wrong?
As it is nicely described in this [post](https://stackoverflow.com/questions/4529815/saving-an-object-data-persistence/25119089#25119089) , pickle serializes the objects but uses references to the objects, ie. if you change anything in your code, or if you had "ad-hoc" class in notebook that you then move to repo/library then the object will be empty/non-initialized better approach. As mentioned in the post use "dill" that serializes also the class definition, you can also save the whole session, e.g. when you finished working in a jupyter notebook for the day and yo udo not want run all cells after you respawn the machine next day - wanna get fancy?
- you can use also klepto which extends dill with "nifty" archive types, e.g.:
- file_archive - a dictionary-style interface to a file
- dir_archive - a dictionary-style interface to a folder of files
- https://klepto.readthedocs.io/en/latest/index.html
- https://pypi.org/project/dill/
- you can use also klepto which extends dill with "nifty" archive types, e.g.:
In [21]:
Copied!
# with open("#temp_models/dl_entity_emb_model.dill", "wb") as f:
# dill.dump(model, f)
# with open("#temp_models/dl_entity_emb_model_tab_preprocessor.dill", "wb") as f:
# dill.dump(tab_preprocessor, f)
# with open("#temp_models/dl_entity_emb_model.dill", "wb") as f:
# dill.dump(model, f)
# with open("#temp_models/dl_entity_emb_model_tab_preprocessor.dill", "wb") as f:
# dill.dump(tab_preprocessor, f)
w Raytune¶
In [25]:
Copied!
# Optimizers
deep_opt_sgd_01 = SGD(model.deeptabular.parameters(), lr=0.1)
deep_opt_adam_01 = Adam(model.deeptabular.parameters(), lr=0.1)
# LR Schedulers
deep_sch_StepLR5 = lr_scheduler.StepLR(deep_opt, step_size=5)
input_layer = len(tab_preprocessor.continuous_cols)
if TASK == "multiclass":
output_layer = NUM_CLASSES
if TASK == "binary":
output_layer = 1
hidden_layers3 = np.linspace(
input_layer * 2, output_layer, 3, endpoint=False, dtype=int
).tolist()
hidden_layers5 = np.linspace(
input_layer * 2, output_layer, 5, endpoint=False, dtype=int
).tolist()
config = {
"batch_size": tune.grid_search([1000, 10000]),
"deep_opt": tune.grid_search([deep_opt_sgd_01, deep_opt_adam_01]),
"deep_sch": tune.grid_search([deep_sch_StepLR5]),
"hidden_layers": tune.grid_search([hidden_layers3, hidden_layers5]),
"wandb": {
"project": "test",
"api_key_file": "src/wandb_api.key",
"log_config": True,
},
}
if TASK == "binary":
objective = "binary_focal_loss"
dataloader = DataLoaderImbalanced
if TASK == "multiclass":
objective = "multiclass_focal_loss"
dataloader = DataLoaderImbalanced
def training_function(config, X_train, X_val):
deeptabular = TabMlp(
mlp_hidden_dims=config["hidden_layers"],
column_idx=tab_preprocessor.column_idx,
continuous_cols=tab_preprocessor.continuous_cols,
mlp_batchnorm=True,
mlp_batchnorm_last=True,
mlp_linear_first=True,
)
model = WideDeep(deeptabular=deeptabular, pred_dim=output_layer)
trainer = Trainer(
model,
objective=objective,
callbacks=[RayTuneReporter, LRHistory(n_epochs=10)],
lr_schedulers={"deeptabular": config["deep_sch"]},
initializers={"deeptabular": XavierNormal},
optimizers={"deeptabular": config["deep_opt"]},
metrics=metrics,
verbose=0,
)
trainer.fit(
X_train=X_train,
X_val=X_val,
n_epochs=5,
batch_size=config["batch_size"],
custom_dataloader=dataloader,
oversample_mul=5,
)
X_train = {"X_tab": X_tab_train, "target": y_train}
X_val = {"X_tab": X_tab_valid, "target": y_valid}
# https://docs.ray.io/en/latest/tune/api_docs/schedulers.html#tune-scheduler-hyperband
asha_scheduler = AsyncHyperBandScheduler(
time_attr="training_iteration",
metric="_metric/val_loss",
mode="min",
max_t=100,
grace_period=10,
reduction_factor=3,
brackets=1,
)
analysis = tune.run(
tune.with_parameters(training_function, X_train=X_train, X_val=X_val),
# resources_per_trial={"cpu": 4, "gpu": 0},
num_samples=1,
progress_reporter=JupyterNotebookReporter(overwrite=True),
scheduler=asha_scheduler,
config=config,
loggers=DEFAULT_LOGGERS + (WandbLogger,),
)
# Optimizers
deep_opt_sgd_01 = SGD(model.deeptabular.parameters(), lr=0.1)
deep_opt_adam_01 = Adam(model.deeptabular.parameters(), lr=0.1)
# LR Schedulers
deep_sch_StepLR5 = lr_scheduler.StepLR(deep_opt, step_size=5)
input_layer = len(tab_preprocessor.continuous_cols)
if TASK == "multiclass":
output_layer = NUM_CLASSES
if TASK == "binary":
output_layer = 1
hidden_layers3 = np.linspace(
input_layer * 2, output_layer, 3, endpoint=False, dtype=int
).tolist()
hidden_layers5 = np.linspace(
input_layer * 2, output_layer, 5, endpoint=False, dtype=int
).tolist()
config = {
"batch_size": tune.grid_search([1000, 10000]),
"deep_opt": tune.grid_search([deep_opt_sgd_01, deep_opt_adam_01]),
"deep_sch": tune.grid_search([deep_sch_StepLR5]),
"hidden_layers": tune.grid_search([hidden_layers3, hidden_layers5]),
"wandb": {
"project": "test",
"api_key_file": "src/wandb_api.key",
"log_config": True,
},
}
if TASK == "binary":
objective = "binary_focal_loss"
dataloader = DataLoaderImbalanced
if TASK == "multiclass":
objective = "multiclass_focal_loss"
dataloader = DataLoaderImbalanced
def training_function(config, X_train, X_val):
deeptabular = TabMlp(
mlp_hidden_dims=config["hidden_layers"],
column_idx=tab_preprocessor.column_idx,
continuous_cols=tab_preprocessor.continuous_cols,
mlp_batchnorm=True,
mlp_batchnorm_last=True,
mlp_linear_first=True,
)
model = WideDeep(deeptabular=deeptabular, pred_dim=output_layer)
trainer = Trainer(
model,
objective=objective,
callbacks=[RayTuneReporter, LRHistory(n_epochs=10)],
lr_schedulers={"deeptabular": config["deep_sch"]},
initializers={"deeptabular": XavierNormal},
optimizers={"deeptabular": config["deep_opt"]},
metrics=metrics,
verbose=0,
)
trainer.fit(
X_train=X_train,
X_val=X_val,
n_epochs=5,
batch_size=config["batch_size"],
custom_dataloader=dataloader,
oversample_mul=5,
)
X_train = {"X_tab": X_tab_train, "target": y_train}
X_val = {"X_tab": X_tab_valid, "target": y_valid}
# https://docs.ray.io/en/latest/tune/api_docs/schedulers.html#tune-scheduler-hyperband
asha_scheduler = AsyncHyperBandScheduler(
time_attr="training_iteration",
metric="_metric/val_loss",
mode="min",
max_t=100,
grace_period=10,
reduction_factor=3,
brackets=1,
)
analysis = tune.run(
tune.with_parameters(training_function, X_train=X_train, X_val=X_val),
# resources_per_trial={"cpu": 4, "gpu": 0},
num_samples=1,
progress_reporter=JupyterNotebookReporter(overwrite=True),
scheduler=asha_scheduler,
config=config,
loggers=DEFAULT_LOGGERS + (WandbLogger,),
)
== Status ==
Current time: 2021-11-05 00:27:15 (running for 00:01:54.21)
Memory usage on this node: 2.4/12.2 GiB
Using AsyncHyperBand: num_stopped=0 Bracket: Iter 90.000: None | Iter 30.000: None | Iter 10.000: None
Resources requested: 0/8 CPUs, 0/0 GPUs, 0.0/6.7 GiB heap, 0.0/3.35 GiB objects
Result logdir: /home/palo/ray_results/training_function_2021-11-05_00-25-21
Number of trials: 8/8 (8 TERMINATED)
Current time: 2021-11-05 00:27:15 (running for 00:01:54.21)
Memory usage on this node: 2.4/12.2 GiB
Using AsyncHyperBand: num_stopped=0 Bracket: Iter 90.000: None | Iter 30.000: None | Iter 10.000: None
Resources requested: 0/8 CPUs, 0/0 GPUs, 0.0/6.7 GiB heap, 0.0/3.35 GiB objects
Result logdir: /home/palo/ray_results/training_function_2021-11-05_00-25-21
Number of trials: 8/8 (8 TERMINATED)
| Trial name | status | loc | batch_size | deep_opt | deep_sch | hidden_layers | iter | total time (s) |
|---|---|---|---|---|---|---|---|---|
| training_function_7afc0_00000 | TERMINATED | 172.27.173.201:8451 | 1000 | SGD ( Parameter Group 0 dampening: 0 lr: 0.1 momentum: 0 nesterov: False weight_decay: 0 ) | <torch.optim.lr_scheduler.StepLR object at 0x7f7f904d3c70> | [26, 22, 18] | 5 | 73.5314 |
| training_function_7afc0_00001 | TERMINATED | 172.27.173.201:8454 | 10000 | SGD ( Parameter Group 0 dampening: 0 lr: 0.1 momentum: 0 nesterov: False weight_decay: 0 ) | <torch.optim.lr_scheduler.StepLR object at 0x7f7f9047ea90> | [26, 22, 18] | 5 | 52.1587 |
| training_function_7afc0_00002 | TERMINATED | 172.27.173.201:8448 | 1000 | Adam ( Parameter Group 0 amsgrad: False betas: (0.9, 0.999) eps: 1e-08 lr: 0.1 weight_decay: 0 ) | <torch.optim.lr_scheduler.StepLR object at 0x7f7f91157fa0> | [26, 22, 18] | 5 | 65.7284 |
| training_function_7afc0_00003 | TERMINATED | 172.27.173.201:8449 | 10000 | Adam ( Parameter Group 0 amsgrad: False betas: (0.9, 0.999) eps: 1e-08 lr: 0.1 weight_decay: 0 ) | <torch.optim.lr_scheduler.StepLR object at 0x7f7f9049d5b0> | [26, 22, 18] | 5 | 52.5479 |
| training_function_7afc0_00004 | TERMINATED | 172.27.173.201:8452 | 1000 | SGD ( Parameter Group 0 dampening: 0 lr: 0.1 momentum: 0 nesterov: False weight_decay: 0 ) | <torch.optim.lr_scheduler.StepLR object at 0x7f7f9050a8b0> | [26, 23, 21, 18, 16] | 5 | 70.8522 |
| training_function_7afc0_00005 | TERMINATED | 172.27.173.201:8450 | 10000 | SGD ( Parameter Group 0 dampening: 0 lr: 0.1 momentum: 0 nesterov: False weight_decay: 0 ) | <torch.optim.lr_scheduler.StepLR object at 0x7f7f9050aeb0> | [26, 23, 21, 18, 16] | 5 | 57.7705 |
| training_function_7afc0_00006 | TERMINATED | 172.27.173.201:8447 | 1000 | Adam ( Parameter Group 0 amsgrad: False betas: (0.9, 0.999) eps: 1e-08 lr: 0.1 weight_decay: 0 ) | <torch.optim.lr_scheduler.StepLR object at 0x7f7f9050af70> | [26, 23, 21, 18, 16] | 5 | 69.5162 |
| training_function_7afc0_00007 | TERMINATED | 172.27.173.201:8965 | 10000 | Adam ( Parameter Group 0 amsgrad: False betas: (0.9, 0.999) eps: 1e-08 lr: 0.1 weight_decay: 0 ) | <torch.optim.lr_scheduler.StepLR object at 0x7f7f90495580> | [26, 23, 21, 18, 16] | 5 | 49.4401 |
2021-11-05 00:27:16,111 INFO tune.py:630 -- Total run time: 114.35 seconds (113.77 seconds for the tuning loop).
best params
In [31]:
Copied!
params = copy(analysis.get_best_config("_metric/val_loss", "min"))
params.pop("wandb")
params
params = copy(analysis.get_best_config("_metric/val_loss", "min"))
params.pop("wandb")
params
Out[31]:
{'batch_size': 1000,
'deep_opt': Adam (
Parameter Group 0
amsgrad: False
betas: (0.9, 0.999)
eps: 1e-08
lr: 0.1
weight_decay: 0
),
'deep_sch': <torch.optim.lr_scheduler.StepLR at 0x7f7f9050af70>,
'hidden_layers': [26, 23, 21, 18, 16]}
Tensorboard visualization¶
In [32]:
Copied!
%load_ext tensorboard
%load_ext tensorboard
In [36]:
Copied!
from tensorboard import notebook
notebook.list()
from tensorboard import notebook
notebook.list()
No known TensorBoard instances running.
In [37]:
Copied!
%tensorboard --logdir ~/ray_results
%tensorboard --logdir ~/ray_results
APPENDIX¶
Logistic regression for comparisson¶
In [12]:
Copied!
LogReg_model = LogisticRegression(random_state=RANDOM_STATE).fit(df_train_scaled.drop(columns=[target]), df_train_scaled[target])
result = pd.DataFrame({"predicted": LogReg_model.predict(df_test_scaled.drop(columns=[target])),
"ground_truth": df_test[target].values,})
LogReg_model = LogisticRegression(random_state=RANDOM_STATE).fit(df_train_scaled.drop(columns=[target]), df_train_scaled[target])
result = pd.DataFrame({"predicted": LogReg_model.predict(df_test_scaled.drop(columns=[target])),
"ground_truth": df_test[target].values,})
In [13]:
Copied!
print('Classification report:\n{}'.format(classification_report(result['predicted'], result['ground_truth'])))
print('Classification report:\n{}'.format(classification_report(result['predicted'], result['ground_truth'])))
Classification report:
precision recall f1-score support
0 0.97 0.84 0.90 68348
1 0.42 0.59 0.49 2744
2 0.30 0.10 0.15 11340
3 0.14 0.11 0.12 5134
4 0.00 0.00 0.00 0
5 0.22 0.32 0.26 2764
6 0.00 0.00 0.00 163
7 0.16 0.14 0.15 4564
8 0.01 0.08 0.02 659
9 0.13 0.12 0.12 4458
10 0.03 0.09 0.05 1491
11 0.12 0.28 0.16 1643
12 0.92 0.91 0.92 135
13 0.00 0.00 0.00 564
accuracy 0.61 104007
macro avg 0.24 0.26 0.24 104007
weighted avg 0.71 0.61 0.65 104007
Different hidden layer designs¶
In [ ]:
Copied!
input_layer = len(tab_preprocessor.continuous_cols)
output_layer = 1
lengths = [3,5,7,9,11]
pipes = []
anti_autoencoders = []
trapezoids = []
anti_trapezoids = []
funnels = []
adj_funnels = []
apollos = []
for length in lengths:
pipe = [input_layer]*length
pipes.append(pipe)
anti_autoencoder = np.linspace(input_layer, input_layer*2, ceil(length/2), dtype=int).tolist()
anti_autoencoder.extend(anti_autoencoder[-2::-1])
anti_autoencoders.append(anti_autoencoder)
trapezoid = np.array([round(input_layer*1.25)]*length)
trapezoid[[0, -1]] = input_layer
trapezoids.append(trapezoid.tolist())
anti_trapezoid = np.array([round(input_layer*0.75)]*length)
anti_trapezoid[[0, -1]] = input_layer
anti_trapezoids.append(anti_trapezoid.tolist())
funnel = np.linspace(input_layer*2, output_layer, length, endpoint=False, dtype=int).tolist()
funnels.append(funnel)
adj_funnel = np.linspace(input_layer*2, output_layer, length, endpoint=False, dtype=int).tolist()
adj_funnel.insert(0, input_layer)
adj_funnels.append(adj_funnel)
apollo = np.linspace(input_layer, input_layer*2, length, dtype=int).tolist()
apollos.append(apollo)
input_layer = len(tab_preprocessor.continuous_cols)
output_layer = 1
lengths = [3,5,7,9,11]
pipes = []
anti_autoencoders = []
trapezoids = []
anti_trapezoids = []
funnels = []
adj_funnels = []
apollos = []
for length in lengths:
pipe = [input_layer]*length
pipes.append(pipe)
anti_autoencoder = np.linspace(input_layer, input_layer*2, ceil(length/2), dtype=int).tolist()
anti_autoencoder.extend(anti_autoencoder[-2::-1])
anti_autoencoders.append(anti_autoencoder)
trapezoid = np.array([round(input_layer*1.25)]*length)
trapezoid[[0, -1]] = input_layer
trapezoids.append(trapezoid.tolist())
anti_trapezoid = np.array([round(input_layer*0.75)]*length)
anti_trapezoid[[0, -1]] = input_layer
anti_trapezoids.append(anti_trapezoid.tolist())
funnel = np.linspace(input_layer*2, output_layer, length, endpoint=False, dtype=int).tolist()
funnels.append(funnel)
adj_funnel = np.linspace(input_layer*2, output_layer, length, endpoint=False, dtype=int).tolist()
adj_funnel.insert(0, input_layer)
adj_funnels.append(adj_funnel)
apollo = np.linspace(input_layer, input_layer*2, length, dtype=int).tolist()
apollos.append(apollo)