Hyperparameter tuning and using Raytune and visulization using Tensorboard¶

This notebook uses preprocessed dataset by following notebook.

notes

CPU monitoring in terminal:
```
top
```

GPU monitoring in terminal:

pip install gpustat
watch -c gpustat -cp --color

Initial imports¶

In [1]:

            
                Copied!
                
                    
                    
                
                

        
import sys
import pandas as pd
# to save results to data directory
module_path = '..'
if module_path not in sys.path:
    sys.path.insert(0, module_path)
# increase displayed columns in jupyter notebook
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 300)
import sys
import pandas as pd
# to save results to data directory
module_path = '..'
if module_path not in sys.path:
    sys.path.insert(0, module_path)
# increase displayed columns in jupyter notebook
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 300)

In [2]:

            
                Copied!
                
                    
                    
                
                

        
import pandas as pd
import os
from copy import copy, deepcopy
import torch
import multiprocessing
import json
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split, cross_val_score
import numpy as np
from sklearn.metrics import classification_report, mean_squared_error
import matplotlib.pyplot as plt

from pytorch_widedeep.dataloaders import DataLoaderImbalanced, DataLoaderDefault
from pytorch_widedeep.preprocessing import WidePreprocessor, TabPreprocessor
from pytorch_widedeep.training import Trainer
from pytorch_widedeep.models import Wide, TabMlp, WideDeep
from pytorch_widedeep.models.transformers.saint import SAINT
from pytorch_widedeep.callbacks import EarlyStopping, ModelCheckpoint, LRHistory, RayTuneReporter
from pytorch_widedeep.initializers import KaimingNormal, XavierNormal
from pytorch_widedeep.optim import RAdam
from sklearn.preprocessing import PowerTransformer

from torch.optim import Adam, SGD, lr_scheduler
from torchmetrics import F1 as F1_torchmetrics
from torchmetrics import Accuracy as Accuracy_torchmetrics
from torchmetrics import Precision as Precision_torchmetrics
from torchmetrics import Recall as Recall_torchmetrics
from torchmetrics import MeanSquaredError as MSE_torchmetrics

from pytorch_widedeep import Tab2Vec
import dill

# increase displayed columns in jupyter notebook
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 300)

# temporarily remove deprecation warnings
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
import src
from src import common

from time import time

import re

import tracemalloc
tracemalloc.start()
import ray
from ray import tune
from ray.tune import JupyterNotebookReporter
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune.integration.wandb import WandbLogger
from ray.tune.logger import DEFAULT_LOGGERS
import tracemalloc
from sklearn.linear_model import LogisticRegression

# increase displayed columns in jupyter notebook
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 300)

import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import os
from copy import copy, deepcopy
import torch
import multiprocessing
import json
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split, cross_val_score
import numpy as np
from sklearn.metrics import classification_report, mean_squared_error
import matplotlib.pyplot as plt

from pytorch_widedeep.dataloaders import DataLoaderImbalanced, DataLoaderDefault
from pytorch_widedeep.preprocessing import WidePreprocessor, TabPreprocessor
from pytorch_widedeep.training import Trainer
from pytorch_widedeep.models import Wide, TabMlp, WideDeep
from pytorch_widedeep.models.transformers.saint import SAINT
from pytorch_widedeep.callbacks import EarlyStopping, ModelCheckpoint, LRHistory, RayTuneReporter
from pytorch_widedeep.initializers import KaimingNormal, XavierNormal
from pytorch_widedeep.optim import RAdam
from sklearn.preprocessing import PowerTransformer

from torch.optim import Adam, SGD, lr_scheduler
from torchmetrics import F1 as F1_torchmetrics
from torchmetrics import Accuracy as Accuracy_torchmetrics
from torchmetrics import Precision as Precision_torchmetrics
from torchmetrics import Recall as Recall_torchmetrics
from torchmetrics import MeanSquaredError as MSE_torchmetrics

from pytorch_widedeep import Tab2Vec
import dill

# increase displayed columns in jupyter notebook
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 300)

# temporarily remove deprecation warnings
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
import src
from src import common

from time import time

import re

import tracemalloc
tracemalloc.start()
import ray
from ray import tune
from ray.tune import JupyterNotebookReporter
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune.integration.wandb import WandbLogger
from ray.tune.logger import DEFAULT_LOGGERS
import tracemalloc
from sklearn.linear_model import LogisticRegression

# increase displayed columns in jupyter notebook
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 300)

import warnings
warnings.filterwarnings('ignore')

Dataset¶

identifiers

In [3]:

            
                Copied!
                
                    
                    
                
                

        
column_types = common.json_load("#datasets/Colab_PowerConverter/column_types.json")
target = column_types["target"]
measurement_label = column_types["measurement_label"]
RANDOM_STATE = 1
TEST_SIZE_TRAIN = 0.2
TEST_SIZE_VALID = 0.5
EMBEDDING = False
TASK = "multiclass" #(or "binary")
column_types = common.json_load("#datasets/Colab_PowerConverter/column_types.json")
target = column_types["target"]
measurement_label = column_types["measurement_label"]
RANDOM_STATE = 1
TEST_SIZE_TRAIN = 0.2
TEST_SIZE_VALID = 0.5
EMBEDDING = False
TASK = "multiclass" #(or "binary")

In [4]:

            
                Copied!
                
df = pd.read_pickle("#datasets/Colab_PowerConverter/dataset.pkl")
df = pd.read_pickle("#datasets/Colab_PowerConverter/dataset.pkl")

In [5]:

            
                Copied!
                
# this measurement did not have a fault (?)
df = df[df[measurement_label]!="Single-Phase_Sensor_Fault"]
df.reset_index(inplace=True, drop=True)
# this measurement did not have a fault (?)
df = df[df[measurement_label]!="Single-Phase_Sensor_Fault"]
df.reset_index(inplace=True, drop=True)

In [6]:

            
                Copied!
                
fault_dict = {}
for label,i in zip(df[measurement_label].unique(), range(len(df[measurement_label].unique()))):
    df.loc[(df[measurement_label]==label) & (df[target]==1), target] = int(i+1)
    fault_dict[label] = int(i+1)
fault_dict = {}
for label,i in zip(df[measurement_label].unique(), range(len(df[measurement_label].unique()))):
    df.loc[(df[measurement_label]==label) & (df[target]==1), target] = int(i+1)
    fault_dict[label] = int(i+1)

In [7]:

            
                Copied!
                
# imbalance of the classes
df[target].value_counts()
# imbalance of the classes
df[target].value_counts()

Out[7]:

0     597599
5      40014
3      40001
6      40001
7      40001
8      40001
9      40001
10     40001
11     40001
13     40001
1      38971
2      38971
4       3166
12      1335
Name: fault, dtype: int64

In [8]:

            
                Copied!
                
fault_dict
fault_dict

Out[8]:

{'Damping-320': 1,
 'Damping-32000': 2,
 'Inertia-1.2': 3,
 'LL_Fault': 4,
 'Three-Phase_Sensor_Fault': 5,
 'Weak_Grid-4_5_mH': 6,
 'Weak_Grid-1_5_mH': 7,
 'Damping-3200': 8,
 'Inertia-0.2': 9,
 'Inertia-2': 10,
 'Single_Phase_Sag': 11,
 'Three_Phase_Grid_Fault': 12,
 'Weak_Grid-7_5_mH': 13}

Preprocessing¶

In [9]:

            
                Copied!
                
df.drop(columns=[measurement_label], inplace=True)
df.drop(columns=[measurement_label], inplace=True)

In [10]:

            
                Copied!
                
df_train, df_valid = train_test_split(df, test_size=TEST_SIZE_TRAIN, stratify=df[target], random_state=RANDOM_STATE)
df_valid, df_test = train_test_split(df_valid, test_size=TEST_SIZE_VALID, stratify=df_valid[target], random_state=RANDOM_STATE)

df_train.reset_index(inplace=True, drop=True)
df_valid.reset_index(inplace=True, drop=True)
df_test.reset_index(inplace=True, drop=True)
df_train, df_valid = train_test_split(df, test_size=TEST_SIZE_TRAIN, stratify=df[target], random_state=RANDOM_STATE)
df_valid, df_test = train_test_split(df_valid, test_size=TEST_SIZE_VALID, stratify=df_valid[target], random_state=RANDOM_STATE)

df_train.reset_index(inplace=True, drop=True)
df_valid.reset_index(inplace=True, drop=True)
df_test.reset_index(inplace=True, drop=True)

In [11]:

            
                Copied!
                
df_train_scaled, Scaler = common.scale(df_train, [target], scaler_sk='Standard')
df_valid_scaled, Scaler = common.scale(df_valid, [target], scaler_sk=Scaler)
df_test_scaled, Scaler = common.scale(df_test, [target], scaler_sk=Scaler)
df_train_scaled, Scaler = common.scale(df_train, [target], scaler_sk='Standard')
df_valid_scaled, Scaler = common.scale(df_valid, [target], scaler_sk=Scaler)
df_test_scaled, Scaler = common.scale(df_test, [target], scaler_sk=Scaler)

Deep Net¶

In [15]:

            
                Copied!
                
NUM_CLASSES = df[target].nunique()
NUM_CLASSES
NUM_CLASSES = df[target].nunique()
NUM_CLASSES

Out[15]:

Metrics¶

In [16]:

            
                Copied!
                
accuracy = Accuracy_torchmetrics(average=None, num_classes=NUM_CLASSES)
precision = Precision_torchmetrics(average='micro', num_classes=NUM_CLASSES)
f1 = F1_torchmetrics(average=None, num_classes=NUM_CLASSES)
recall = Recall_torchmetrics(average=None, num_classes=NUM_CLASSES)
metrics = [accuracy, precision, f1, recall]
accuracy = Accuracy_torchmetrics(average=None, num_classes=NUM_CLASSES)
precision = Precision_torchmetrics(average='micro', num_classes=NUM_CLASSES)
f1 = F1_torchmetrics(average=None, num_classes=NUM_CLASSES)
recall = Recall_torchmetrics(average=None, num_classes=NUM_CLASSES)
metrics = [accuracy, precision, f1, recall]

Embedding layer size rules¶

Jeremy Howard
- see related discussion 1 and 2, but it won't help you :)
Google blog rule
- Per class metrics

In [17]:

            
                Copied!
                
                    
                    
                
                

        
if EMBEDDING:
    embedding_rule = 'jeremy_old'

    embed_cols = []
    if embedding_rule == 'google':
        embed_input = [(u, round(df_train[u].nunique()**0.25)) for u in embed_cols]
    elif embedding_rule == 'jeremy_old':
        embed_input = [(u, min(50, df_train[u].nunique()//2)) for u in embed_cols]
    elif embedding_rule == 'jeremy_new':
        embed_input = [(u, min(600, round(1.6 * df_train[u].nunique()**0.56))) for u in embed_cols]
else:
    embed_input = None
if EMBEDDING:
    embedding_rule = 'jeremy_old'

    embed_cols = []
    if embedding_rule == 'google':
        embed_input = [(u, round(df_train[u].nunique()**0.25)) for u in embed_cols]
    elif embedding_rule == 'jeremy_old':
        embed_input = [(u, min(50, df_train[u].nunique()//2)) for u in embed_cols]
    elif embedding_rule == 'jeremy_new':
        embed_input = [(u, min(600, round(1.6 * df_train[u].nunique()**0.56))) for u in embed_cols]
else:
    embed_input = None

In [18]:

            
                Copied!
                
cont_cols = df.drop(columns=[target]).columns.values
cont_cols = df.drop(columns=[target]).columns.values

Prepare data for model¶

In [19]:

            
                Copied!
                
                    
                    
                
                

        
# deeptabular
tab_preprocessor = TabPreprocessor(embed_cols=embed_input,
                                   continuous_cols=cont_cols,
                                   shared_embed=False,
                                   scale=False)
X_tab_train = tab_preprocessor.fit_transform(df_train_scaled)
X_tab_valid = tab_preprocessor.transform(df_valid_scaled)
X_tab_test = tab_preprocessor.transform(df_test_scaled)

# target
y_train = df_train_scaled[target].values
y_valid = df_valid_scaled[target].values
y_test = df_test_scaled[target].values
# deeptabular
tab_preprocessor = TabPreprocessor(embed_cols=embed_input,
                                   continuous_cols=cont_cols,
                                   shared_embed=False,
                                   scale=False)
X_tab_train = tab_preprocessor.fit_transform(df_train_scaled)
X_tab_valid = tab_preprocessor.transform(df_valid_scaled)
X_tab_test = tab_preprocessor.transform(df_test_scaled)

# target
y_train = df_train_scaled[target].values
y_valid = df_valid_scaled[target].values
y_test = df_test_scaled[target].values

Define the model¶

In [20]:

            
                Copied!
                
                    
                    
                
                

        
input_layer = len(tab_preprocessor.continuous_cols)
if TASK == "multiclass":
    output_layer = NUM_CLASSES
if TASK == "binary":
    output_layer = 1
hidden_layers = np.linspace(input_layer*2, output_layer, 5, endpoint=False, dtype=int).tolist()
input_layer = len(tab_preprocessor.continuous_cols)
if TASK == "multiclass":
    output_layer = NUM_CLASSES
if TASK == "binary":
    output_layer = 1
hidden_layers = np.linspace(input_layer*2, output_layer, 5, endpoint=False, dtype=int).tolist()

In [21]:

            
                Copied!
                
                    
                    
                
                

        
if EMBEDDING:
    deeptabular = TabMlp(mlp_hidden_dims=hidden_layers,
                         column_idx=tab_preprocessor.column_idx,
                         embed_input=tab_preprocessor.embeddings_input,
                         continuous_cols=tab_preprocessor.continuous_cols,
                         mlp_batchnorm=True,
                         mlp_batchnorm_last=True,
                         mlp_linear_first=True)
else:
    deeptabular = TabMlp(mlp_hidden_dims=hidden_layers,
                     column_idx=tab_preprocessor.column_idx,
                     continuous_cols=tab_preprocessor.continuous_cols,
                     mlp_batchnorm=True,
                     mlp_batchnorm_last=True,
                     mlp_linear_first=True)

model = WideDeep(deeptabular=deeptabular, pred_dim=output_layer)
model
if EMBEDDING:
    deeptabular = TabMlp(mlp_hidden_dims=hidden_layers,
                         column_idx=tab_preprocessor.column_idx,
                         embed_input=tab_preprocessor.embeddings_input,
                         continuous_cols=tab_preprocessor.continuous_cols,
                         mlp_batchnorm=True,
                         mlp_batchnorm_last=True,
                         mlp_linear_first=True)
else:
    deeptabular = TabMlp(mlp_hidden_dims=hidden_layers,
                     column_idx=tab_preprocessor.column_idx,
                     continuous_cols=tab_preprocessor.continuous_cols,
                     mlp_batchnorm=True,
                     mlp_batchnorm_last=True,
                     mlp_linear_first=True)

model = WideDeep(deeptabular=deeptabular, pred_dim=output_layer)
model

Out[21]:

WideDeep(
  (deeptabular): Sequential(
    (0): TabMlp(
      (cat_embed_and_cont): CatEmbeddingsAndCont(
        (cont_norm): BatchNorm1d(13, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (tab_mlp): MLP(
        (mlp): Sequential(
          (dense_layer_0): Sequential(
            (0): Linear(in_features=13, out_features=26, bias=False)
            (1): ReLU(inplace=True)
            (2): BatchNorm1d(26, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (3): Dropout(p=0.1, inplace=False)
          )
          (dense_layer_1): Sequential(
            (0): Linear(in_features=26, out_features=23, bias=False)
            (1): ReLU(inplace=True)
            (2): BatchNorm1d(23, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (3): Dropout(p=0.1, inplace=False)
          )
          (dense_layer_2): Sequential(
            (0): Linear(in_features=23, out_features=21, bias=False)
            (1): ReLU(inplace=True)
            (2): BatchNorm1d(21, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (3): Dropout(p=0.1, inplace=False)
          )
          (dense_layer_3): Sequential(
            (0): Linear(in_features=21, out_features=18, bias=False)
            (1): ReLU(inplace=True)
            (2): BatchNorm1d(18, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (3): Dropout(p=0.1, inplace=False)
          )
          (dense_layer_4): Sequential(
            (0): Linear(in_features=18, out_features=16, bias=False)
            (1): ReLU(inplace=True)
            (2): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (3): Dropout(p=0.1, inplace=False)
          )
        )
      )
    )
    (1): Linear(in_features=16, out_features=14, bias=True)
  )
)

Optimizers and Schedulers¶

In [22]:

            
                Copied!
                
# Optimizers
deep_opt = SGD(model.deeptabular.parameters(), lr=0.1)
# LR Schedulers
deep_sch = lr_scheduler.StepLR(deep_opt, step_size=2)
# Optimizers
deep_opt = SGD(model.deeptabular.parameters(), lr=0.1)
# LR Schedulers
deep_sch = lr_scheduler.StepLR(deep_opt, step_size=2)

In [114]:

            
                Copied!
                
                    
                    
                
                

        
early_stopping = EarlyStopping()
model_checkpoint = ModelCheckpoint(
    filepath="#temp_models/",
    save_best_only=True,
    verbose=1,
    max_save=1,
)

if TASK == "binary":
    objective = "binary_focal_loss"
    dataloader = DataLoaderImbalanced
if TASK == "multiclass":
    objective = "multiclass_focal_loss"
    dataloader = DataLoaderImbalanced   
    
trainer = Trainer(model,
                  objective=objective,
                  callbacks=[LRHistory(n_epochs=10)],
                  lr_schedulers={'deeptabular': deep_sch},
                  initializers={'deeptabular': XavierNormal},
                  optimizers={'deeptabular': deep_opt},
                  metrics=metrics)

trainer.fit(X_train={"X_tab": X_tab_train, "target": y_train},
            X_val={"X_tab": X_tab_valid, "target": y_valid},
            n_epochs=10,
            batch_size=100,
            custom_dataloader=dataloader,
            oversample_mul=5)
early_stopping = EarlyStopping()
model_checkpoint = ModelCheckpoint(
    filepath="#temp_models/",
    save_best_only=True,
    verbose=1,
    max_save=1,
)

if TASK == "binary":
    objective = "binary_focal_loss"
    dataloader = DataLoaderImbalanced
if TASK == "multiclass":
    objective = "multiclass_focal_loss"
    dataloader = DataLoaderImbalanced   
    
trainer = Trainer(model,
                  objective=objective,
                  callbacks=[LRHistory(n_epochs=10)],
                  lr_schedulers={'deeptabular': deep_sch},
                  initializers={'deeptabular': XavierNormal},
                  optimizers={'deeptabular': deep_opt},
                  metrics=metrics)

trainer.fit(X_train={"X_tab": X_tab_train, "target": y_train},
            X_val={"X_tab": X_tab_valid, "target": y_valid},
            n_epochs=10,
            batch_size=100,
            custom_dataloader=dataloader,
            oversample_mul=5)

epoch 1: 100%|█| 748/748 [00:09<00:00, 81.72it/s, loss=0.105, metrics={'Accuracy': [0.075, 0.4447, 0.0113
valid: 100%|█| 1041/1041 [00:10<00:00, 96.47it/s, loss=0.0466, metrics={'Accuracy': [0.0034, 0.8101, 0.0,
epoch 2: 100%|█| 748/748 [00:10<00:00, 74.68it/s, loss=0.0376, metrics={'Accuracy': [0.0548, 0.6595, 0.00
valid: 100%|█| 1041/1041 [00:11<00:00, 93.48it/s, loss=0.0286, metrics={'Accuracy': [0.0008, 0.9956, 0.0,
epoch 3: 100%|█| 748/748 [00:13<00:00, 55.55it/s, loss=0.03, metrics={'Accuracy': [0.0773, 0.6895, 0.0086
valid: 100%|█| 1041/1041 [00:11<00:00, 87.92it/s, loss=0.0285, metrics={'Accuracy': [0.0019, 0.9941, 0.0,
epoch 4: 100%|█| 748/748 [00:10<00:00, 70.84it/s, loss=0.0294, metrics={'Accuracy': [0.0822, 0.6876, 0.01
valid: 100%|█| 1041/1041 [00:11<00:00, 89.74it/s, loss=0.0274, metrics={'Accuracy': [0.0143, 1.0, 0.0, 0.
epoch 5: 100%|█| 748/748 [00:10<00:00, 68.36it/s, loss=0.029, metrics={'Accuracy': [0.0832, 0.7022, 0.009
valid: 100%|█| 1041/1041 [00:12<00:00, 84.57it/s, loss=0.0274, metrics={'Accuracy': [0.0208, 1.0, 0.0, 0.
epoch 6: 100%|█| 748/748 [00:12<00:00, 61.49it/s, loss=0.029, metrics={'Accuracy': [0.0832, 0.6918, 0.011
valid: 100%|█| 1041/1041 [00:13<00:00, 75.65it/s, loss=0.027, metrics={'Accuracy': [0.0136, 0.9489, 0.0, 
epoch 7: 100%|█| 748/748 [00:11<00:00, 67.39it/s, loss=0.0289, metrics={'Accuracy': [0.079, 0.6971, 0.008
valid: 100%|█| 1041/1041 [00:11<00:00, 87.01it/s, loss=0.0275, metrics={'Accuracy': [0.0095, 0.9995, 0.0,
epoch 8: 100%|█| 748/748 [00:15<00:00, 47.47it/s, loss=0.029, metrics={'Accuracy': [0.0735, 0.6869, 0.013
valid: 100%|█| 1041/1041 [00:14<00:00, 73.14it/s, loss=0.0274, metrics={'Accuracy': [0.009, 1.0, 0.0, 0.0
epoch 9: 100%|█| 748/748 [00:13<00:00, 53.98it/s, loss=0.0289, metrics={'Accuracy': [0.0777, 0.6961, 0.01
valid: 100%|█| 1041/1041 [00:12<00:00, 81.94it/s, loss=0.0271, metrics={'Accuracy': [0.0111, 0.9895, 0.0,
epoch 10: 100%|█| 748/748 [00:11<00:00, 66.13it/s, loss=0.0289, metrics={'Accuracy': [0.0786, 0.6905, 0.0
valid: 100%|█| 1041/1041 [00:13<00:00, 77.42it/s, loss=0.0275, metrics={'Accuracy': [0.0114, 1.0, 0.0, 0.

Prediction & evaluation¶

Normal prediction¶

In [117]:

            
                Copied!
                
result = pd.DataFrame({"predicted": trainer.predict(X_tab=X_tab_test),
                       "ground_truth": df_test[target].values,})
result = pd.DataFrame({"predicted": trainer.predict(X_tab=X_tab_test),
                       "ground_truth": df_test[target].values,})

predict: 100%|██████████████████████████████████████████████████████| 1041/1041 [00:04<00:00, 253.97it/s]

In [118]:

            
                Copied!
                
print('Classification report:\n{}'.format(classification_report(result['predicted'], result['ground_truth'])))
print('Classification report:\n{}'.format(classification_report(result['predicted'], result['ground_truth'])))

Classification report:
              precision    recall  f1-score   support

           0       0.01      0.95      0.02       747
           1       1.00      0.28      0.43     14139
           2       0.00      0.00      0.00         0
           3       0.07      0.02      0.03     16889
           4       0.16      0.01      0.02      6015
           5       1.00      0.86      0.92      4652
           6       0.00      0.00      0.00         0
           7       0.09      0.08      0.08      4382
           8       0.05      0.03      0.04      8198
           9       0.15      0.10      0.12      6105
          10       0.02      0.12      0.04       739
          11       0.23      0.05      0.08     18332
          12       0.92      0.42      0.58       291
          13       0.41      0.07      0.12     23518

    accuracy                           0.12    104007
   macro avg       0.29      0.21      0.18    104007
weighted avg       0.35      0.12      0.16    104007

Monte Carlo prediction (uncertainty)¶

requires install of pytorch-widedeep branch that was not yet merged to master - https://github.com/jrzaurin/pytorch-widedeep/tree/pmulinka/uncertainty

In [123]:

            
                Copied!
                
df_pred_unc = trainer.predict_uncertainty(X_tab=X_tab_test, uncertainty_granularity=5)
result = pd.DataFrame({"predicted": df_pred_unc[:,-1],
                       "ground_truth": df_test[target].values,})
df_pred_unc = trainer.predict_uncertainty(X_tab=X_tab_test, uncertainty_granularity=5)
result = pd.DataFrame({"predicted": df_pred_unc[:,-1],
                       "ground_truth": df_test[target].values,})

predict_UncertaintyIter: 100%|█████████████████████████████████████████████| 5/5 [00:15<00:00,  3.12s/it]

In [124]:

            
                Copied!
                
print('Classification report:\n{}'.format(classification_report(result['predicted'], result['ground_truth'])))
print('Classification report:\n{}'.format(classification_report(result['predicted'], result['ground_truth'])))

Classification report:
              precision    recall  f1-score   support

         0.0       0.04      0.89      0.08      2847
         1.0       0.98      0.22      0.36     17144
         2.0       0.00      0.02      0.00       413
         3.0       0.06      0.02      0.03     12746
         4.0       0.09      0.01      0.01      4305
         5.0       0.94      0.77      0.85      4877
         6.0       0.01      0.08      0.01       309
         7.0       0.04      0.06      0.05      2803
         8.0       0.10      0.04      0.06      9666
         9.0       0.13      0.07      0.09      7258
        10.0       0.06      0.09      0.07      2415
        11.0       0.31      0.06      0.09     22332
        12.0       0.95      0.13      0.22       993
        13.0       0.25      0.06      0.10     15899

    accuracy                           0.14    104007
   macro avg       0.28      0.18      0.15    104007
weighted avg       0.35      0.14      0.16    104007

Extract the dictionary from a learned model¶

only in case we are using embeddings that we want to use in other models
Use built-in Tab2Vec with model and tab_preprocessor

In [115]:

            
                Copied!
                
# t2v = Tab2Vec(model=model, tab_preprocessor=tab_preprocessor, return_dataframe=True)
# df_test_scaled_enc, df_test_y = t2v.transform(df_test_scaled, target_col=target_col)
# df_test_scaled_eanc.head()
# t2v = Tab2Vec(model=model, tab_preprocessor=tab_preprocessor, return_dataframe=True)
# df_test_scaled_enc, df_test_y = t2v.transform(df_test_scaled, target_col=target_col)
# df_test_scaled_eanc.head()

dill "lesson learned" :¶

issue

you fit scalers, transformers, preprocessors, label encoders, models, etc. and you need to save them so you could use them when necessary for predictions, ie. in prediction script that is used everyday

wrong solution

use pickle to store the objects in files, or even better - create named dictionary with all models that includes the objects

why is it wrong?

As it is nicely described in this [post](https://stackoverflow.com/questions/4529815/saving-an-object-data-persistence/25119089#25119089) , pickle serializes the objects but uses references to the objects, ie. if you change anything in your code, or if you had "ad-hoc" class in notebook that you then move to repo/library then the object will be empty/non-initialized
better approach.
As mentioned in the post use "dill" that serializes also the class definition, you can also save the whole session, e.g. when you finished working in a jupyter notebook for the day and yo udo not want run all cells after you respawn the machine next day

wanna get fancy?
- you can use also klepto which extends dill with "nifty" archive types, e.g.:
  - file_archive - a dictionary-style interface to a file
  - dir_archive - a dictionary-style interface to a folder of files
- https://klepto.readthedocs.io/en/latest/index.html
- https://pypi.org/project/dill/

In [21]:

            
                Copied!
                
# with open("#temp_models/dl_entity_emb_model.dill", "wb") as f:
#     dill.dump(model, f)
# with open("#temp_models/dl_entity_emb_model_tab_preprocessor.dill", "wb") as f:
#     dill.dump(tab_preprocessor, f)
# with open("#temp_models/dl_entity_emb_model.dill", "wb") as f:
#     dill.dump(model, f)
# with open("#temp_models/dl_entity_emb_model_tab_preprocessor.dill", "wb") as f:
#     dill.dump(tab_preprocessor, f)

w Raytune¶

In [25]:

            
                Copied!
                
                    
                    
                
                

        
# Optimizers
deep_opt_sgd_01 = SGD(model.deeptabular.parameters(), lr=0.1)
deep_opt_adam_01 = Adam(model.deeptabular.parameters(), lr=0.1)
# LR Schedulers
deep_sch_StepLR5 = lr_scheduler.StepLR(deep_opt, step_size=5)

input_layer = len(tab_preprocessor.continuous_cols)
if TASK == "multiclass":
    output_layer = NUM_CLASSES
if TASK == "binary":
    output_layer = 1
hidden_layers3 = np.linspace(
    input_layer * 2, output_layer, 3, endpoint=False, dtype=int
).tolist()
hidden_layers5 = np.linspace(
    input_layer * 2, output_layer, 5, endpoint=False, dtype=int
).tolist()

config = {
    "batch_size": tune.grid_search([1000, 10000]),
    "deep_opt": tune.grid_search([deep_opt_sgd_01, deep_opt_adam_01]),
    "deep_sch": tune.grid_search([deep_sch_StepLR5]),
    "hidden_layers": tune.grid_search([hidden_layers3, hidden_layers5]),
    "wandb": {
        "project": "test",
        "api_key_file": "src/wandb_api.key",
        "log_config": True,
    },
}

if TASK == "binary":
    objective = "binary_focal_loss"
    dataloader = DataLoaderImbalanced
if TASK == "multiclass":
    objective = "multiclass_focal_loss"
    dataloader = DataLoaderImbalanced


def training_function(config, X_train, X_val):
    deeptabular = TabMlp(
        mlp_hidden_dims=config["hidden_layers"],
        column_idx=tab_preprocessor.column_idx,
        continuous_cols=tab_preprocessor.continuous_cols,
        mlp_batchnorm=True,
        mlp_batchnorm_last=True,
        mlp_linear_first=True,
    )

    model = WideDeep(deeptabular=deeptabular, pred_dim=output_layer)

    trainer = Trainer(
        model,
        objective=objective,
        callbacks=[RayTuneReporter, LRHistory(n_epochs=10)],
        lr_schedulers={"deeptabular": config["deep_sch"]},
        initializers={"deeptabular": XavierNormal},
        optimizers={"deeptabular": config["deep_opt"]},
        metrics=metrics,
        verbose=0,
    )

    trainer.fit(
        X_train=X_train,
        X_val=X_val,
        n_epochs=5,
        batch_size=config["batch_size"],
        custom_dataloader=dataloader,
        oversample_mul=5,
    )


X_train = {"X_tab": X_tab_train, "target": y_train}
X_val = {"X_tab": X_tab_valid, "target": y_valid}

# https://docs.ray.io/en/latest/tune/api_docs/schedulers.html#tune-scheduler-hyperband
asha_scheduler = AsyncHyperBandScheduler(
    time_attr="training_iteration",
    metric="_metric/val_loss",
    mode="min",
    max_t=100,
    grace_period=10,
    reduction_factor=3,
    brackets=1,
)

analysis = tune.run(
    tune.with_parameters(training_function, X_train=X_train, X_val=X_val),
    # resources_per_trial={"cpu": 4, "gpu": 0},
    num_samples=1,
    progress_reporter=JupyterNotebookReporter(overwrite=True),
    scheduler=asha_scheduler,
    config=config,
    loggers=DEFAULT_LOGGERS + (WandbLogger,),
)
# Optimizers
deep_opt_sgd_01 = SGD(model.deeptabular.parameters(), lr=0.1)
deep_opt_adam_01 = Adam(model.deeptabular.parameters(), lr=0.1)
# LR Schedulers
deep_sch_StepLR5 = lr_scheduler.StepLR(deep_opt, step_size=5)

input_layer = len(tab_preprocessor.continuous_cols)
if TASK == "multiclass":
    output_layer = NUM_CLASSES
if TASK == "binary":
    output_layer = 1
hidden_layers3 = np.linspace(
    input_layer * 2, output_layer, 3, endpoint=False, dtype=int
).tolist()
hidden_layers5 = np.linspace(
    input_layer * 2, output_layer, 5, endpoint=False, dtype=int
).tolist()

config = {
    "batch_size": tune.grid_search([1000, 10000]),
    "deep_opt": tune.grid_search([deep_opt_sgd_01, deep_opt_adam_01]),
    "deep_sch": tune.grid_search([deep_sch_StepLR5]),
    "hidden_layers": tune.grid_search([hidden_layers3, hidden_layers5]),
    "wandb": {
        "project": "test",
        "api_key_file": "src/wandb_api.key",
        "log_config": True,
    },
}

if TASK == "binary":
    objective = "binary_focal_loss"
    dataloader = DataLoaderImbalanced
if TASK == "multiclass":
    objective = "multiclass_focal_loss"
    dataloader = DataLoaderImbalanced


def training_function(config, X_train, X_val):
    deeptabular = TabMlp(
        mlp_hidden_dims=config["hidden_layers"],
        column_idx=tab_preprocessor.column_idx,
        continuous_cols=tab_preprocessor.continuous_cols,
        mlp_batchnorm=True,
        mlp_batchnorm_last=True,
        mlp_linear_first=True,
    )

    model = WideDeep(deeptabular=deeptabular, pred_dim=output_layer)

    trainer = Trainer(
        model,
        objective=objective,
        callbacks=[RayTuneReporter, LRHistory(n_epochs=10)],
        lr_schedulers={"deeptabular": config["deep_sch"]},
        initializers={"deeptabular": XavierNormal},
        optimizers={"deeptabular": config["deep_opt"]},
        metrics=metrics,
        verbose=0,
    )

    trainer.fit(
        X_train=X_train,
        X_val=X_val,
        n_epochs=5,
        batch_size=config["batch_size"],
        custom_dataloader=dataloader,
        oversample_mul=5,
    )


X_train = {"X_tab": X_tab_train, "target": y_train}
X_val = {"X_tab": X_tab_valid, "target": y_valid}

# https://docs.ray.io/en/latest/tune/api_docs/schedulers.html#tune-scheduler-hyperband
asha_scheduler = AsyncHyperBandScheduler(
    time_attr="training_iteration",
    metric="_metric/val_loss",
    mode="min",
    max_t=100,
    grace_period=10,
    reduction_factor=3,
    brackets=1,
)

analysis = tune.run(
    tune.with_parameters(training_function, X_train=X_train, X_val=X_val),
    # resources_per_trial={"cpu": 4, "gpu": 0},
    num_samples=1,
    progress_reporter=JupyterNotebookReporter(overwrite=True),
    scheduler=asha_scheduler,
    config=config,
    loggers=DEFAULT_LOGGERS + (WandbLogger,),
)

== Status ==
Current time: 2021-11-05 00:27:15 (running for 00:01:54.21)
Memory usage on this node: 2.4/12.2 GiB
Using AsyncHyperBand: num_stopped=0 Bracket: Iter 90.000: None | Iter 30.000: None | Iter 10.000: None
Resources requested: 0/8 CPUs, 0/0 GPUs, 0.0/6.7 GiB heap, 0.0/3.35 GiB objects
Result logdir: /home/palo/ray_results/training_function_2021-11-05_00-25-21
Number of trials: 8/8 (8 TERMINATED)

Trial name	status	loc	batch_size	deep_opt	deep_sch	hidden_layers	iter	total time (s)
training_function_7afc0_00000	TERMINATED	172.27.173.201:8451	1000	SGD ( Parameter Group 0 dampening: 0 lr: 0.1 momentum: 0 nesterov: False weight_decay: 0 )	<torch.optim.lr_scheduler.StepLR object at 0x7f7f904d3c70>	[26, 22, 18]	5	73.5314
training_function_7afc0_00001	TERMINATED	172.27.173.201:8454	10000	SGD ( Parameter Group 0 dampening: 0 lr: 0.1 momentum: 0 nesterov: False weight_decay: 0 )	<torch.optim.lr_scheduler.StepLR object at 0x7f7f9047ea90>	[26, 22, 18]	5	52.1587
training_function_7afc0_00002	TERMINATED	172.27.173.201:8448	1000	Adam ( Parameter Group 0 amsgrad: False betas: (0.9, 0.999) eps: 1e-08 lr: 0.1 weight_decay: 0 )	<torch.optim.lr_scheduler.StepLR object at 0x7f7f91157fa0>	[26, 22, 18]	5	65.7284
training_function_7afc0_00003	TERMINATED	172.27.173.201:8449	10000	Adam ( Parameter Group 0 amsgrad: False betas: (0.9, 0.999) eps: 1e-08 lr: 0.1 weight_decay: 0 )	<torch.optim.lr_scheduler.StepLR object at 0x7f7f9049d5b0>	[26, 22, 18]	5	52.5479
training_function_7afc0_00004	TERMINATED	172.27.173.201:8452	1000	SGD ( Parameter Group 0 dampening: 0 lr: 0.1 momentum: 0 nesterov: False weight_decay: 0 )	<torch.optim.lr_scheduler.StepLR object at 0x7f7f9050a8b0>	[26, 23, 21, 18, 16]	5	70.8522
training_function_7afc0_00005	TERMINATED	172.27.173.201:8450	10000	SGD ( Parameter Group 0 dampening: 0 lr: 0.1 momentum: 0 nesterov: False weight_decay: 0 )	<torch.optim.lr_scheduler.StepLR object at 0x7f7f9050aeb0>	[26, 23, 21, 18, 16]	5	57.7705
training_function_7afc0_00006	TERMINATED	172.27.173.201:8447	1000	Adam ( Parameter Group 0 amsgrad: False betas: (0.9, 0.999) eps: 1e-08 lr: 0.1 weight_decay: 0 )	<torch.optim.lr_scheduler.StepLR object at 0x7f7f9050af70>	[26, 23, 21, 18, 16]	5	69.5162
training_function_7afc0_00007	TERMINATED	172.27.173.201:8965	10000	Adam ( Parameter Group 0 amsgrad: False betas: (0.9, 0.999) eps: 1e-08 lr: 0.1 weight_decay: 0 )	<torch.optim.lr_scheduler.StepLR object at 0x7f7f90495580>	[26, 23, 21, 18, 16]	5	49.4401

2021-11-05 00:27:16,111	INFO tune.py:630 -- Total run time: 114.35 seconds (113.77 seconds for the tuning loop).

best params

In [31]:

            
                Copied!
                
params = copy(analysis.get_best_config("_metric/val_loss", "min"))
params.pop("wandb")
params
params = copy(analysis.get_best_config("_metric/val_loss", "min"))
params.pop("wandb")
params

Out[31]:

{'batch_size': 1000,
 'deep_opt': Adam (
 Parameter Group 0
     amsgrad: False
     betas: (0.9, 0.999)
     eps: 1e-08
     lr: 0.1
     weight_decay: 0
 ),
 'deep_sch': <torch.optim.lr_scheduler.StepLR at 0x7f7f9050af70>,
 'hidden_layers': [26, 23, 21, 18, 16]}

Tensorboard visualization¶

In [32]:

            
                Copied!
                
%load_ext tensorboard
%load_ext tensorboard

In [36]:

            
                Copied!
                
from tensorboard import notebook
notebook.list()
from tensorboard import notebook
notebook.list()

No known TensorBoard instances running.

In [37]:

            
                Copied!
                
%tensorboard --logdir ~/ray_results
%tensorboard --logdir ~/ray_results

APPENDIX¶

Logistic regression for comparisson¶

In [12]:

            
                Copied!
                
LogReg_model = LogisticRegression(random_state=RANDOM_STATE).fit(df_train_scaled.drop(columns=[target]), df_train_scaled[target])
result = pd.DataFrame({"predicted": LogReg_model.predict(df_test_scaled.drop(columns=[target])),
                       "ground_truth": df_test[target].values,})
LogReg_model = LogisticRegression(random_state=RANDOM_STATE).fit(df_train_scaled.drop(columns=[target]), df_train_scaled[target])
result = pd.DataFrame({"predicted": LogReg_model.predict(df_test_scaled.drop(columns=[target])),
                       "ground_truth": df_test[target].values,})

In [13]:

            
                Copied!
                
print('Classification report:\n{}'.format(classification_report(result['predicted'], result['ground_truth'])))
print('Classification report:\n{}'.format(classification_report(result['predicted'], result['ground_truth'])))

Classification report:
              precision    recall  f1-score   support

           0       0.97      0.84      0.90     68348
           1       0.42      0.59      0.49      2744
           2       0.30      0.10      0.15     11340
           3       0.14      0.11      0.12      5134
           4       0.00      0.00      0.00         0
           5       0.22      0.32      0.26      2764
           6       0.00      0.00      0.00       163
           7       0.16      0.14      0.15      4564
           8       0.01      0.08      0.02       659
           9       0.13      0.12      0.12      4458
          10       0.03      0.09      0.05      1491
          11       0.12      0.28      0.16      1643
          12       0.92      0.91      0.92       135
          13       0.00      0.00      0.00       564

    accuracy                           0.61    104007
   macro avg       0.24      0.26      0.24    104007
weighted avg       0.71      0.61      0.65    104007

Different hidden layer designs¶

In [ ]:

            
                Copied!
                
                    
                    
                
                

        
input_layer = len(tab_preprocessor.continuous_cols)
output_layer = 1

lengths = [3,5,7,9,11]

pipes = []
anti_autoencoders = []
trapezoids = []
anti_trapezoids = []
funnels = []
adj_funnels = []
apollos = []

for length in lengths:
    pipe = [input_layer]*length
    pipes.append(pipe)
    anti_autoencoder = np.linspace(input_layer, input_layer*2, ceil(length/2), dtype=int).tolist()
    anti_autoencoder.extend(anti_autoencoder[-2::-1])
    anti_autoencoders.append(anti_autoencoder)
    trapezoid = np.array([round(input_layer*1.25)]*length)
    trapezoid[[0, -1]] = input_layer
    trapezoids.append(trapezoid.tolist())
    anti_trapezoid = np.array([round(input_layer*0.75)]*length)
    anti_trapezoid[[0, -1]] = input_layer
    anti_trapezoids.append(anti_trapezoid.tolist())
    funnel = np.linspace(input_layer*2, output_layer, length, endpoint=False, dtype=int).tolist()
    funnels.append(funnel)
    adj_funnel = np.linspace(input_layer*2, output_layer, length, endpoint=False, dtype=int).tolist()
    adj_funnel.insert(0, input_layer)
    adj_funnels.append(adj_funnel)
    apollo = np.linspace(input_layer, input_layer*2, length, dtype=int).tolist()
    apollos.append(apollo)
input_layer = len(tab_preprocessor.continuous_cols)
output_layer = 1

lengths = [3,5,7,9,11]

pipes = []
anti_autoencoders = []
trapezoids = []
anti_trapezoids = []
funnels = []
adj_funnels = []
apollos = []

for length in lengths:
    pipe = [input_layer]*length
    pipes.append(pipe)
    anti_autoencoder = np.linspace(input_layer, input_layer*2, ceil(length/2), dtype=int).tolist()
    anti_autoencoder.extend(anti_autoencoder[-2::-1])
    anti_autoencoders.append(anti_autoencoder)
    trapezoid = np.array([round(input_layer*1.25)]*length)
    trapezoid[[0, -1]] = input_layer
    trapezoids.append(trapezoid.tolist())
    anti_trapezoid = np.array([round(input_layer*0.75)]*length)
    anti_trapezoid[[0, -1]] = input_layer
    anti_trapezoids.append(anti_trapezoid.tolist())
    funnel = np.linspace(input_layer*2, output_layer, length, endpoint=False, dtype=int).tolist()
    funnels.append(funnel)
    adj_funnel = np.linspace(input_layer*2, output_layer, length, endpoint=False, dtype=int).tolist()
    adj_funnel.insert(0, input_layer)
    adj_funnels.append(adj_funnel)
    apollo = np.linspace(input_layer, input_layer*2, length, dtype=int).tolist()
    apollos.append(apollo)