Imports¶
In [1]:
Copied!
import sys
import pandas as pd
# to save results to data directory
module_path = ".."
if module_path not in sys.path:
sys.path.insert(1, module_path)
# increase displayed columns in jupyter notebook
pd.set_option("display.max_columns", 200)
pd.set_option("display.max_rows", 300)
import sys
import pandas as pd
# to save results to data directory
module_path = ".."
if module_path not in sys.path:
sys.path.insert(1, module_path)
# increase displayed columns in jupyter notebook
pd.set_option("display.max_columns", 200)
pd.set_option("display.max_rows", 300)
In [2]:
Copied!
import dill
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import lightgbm as lgbm
import h2o
from h2o.automl import H2OAutoML
from pytorch_widedeep.dataloaders import DataLoaderImbalanced, DataLoaderDefault
from pytorch_widedeep.preprocessing import TabPreprocessor
from pytorch_widedeep.training import Trainer
from pytorch_widedeep.models import TabMlp, WideDeep
from pytorch_widedeep.bayesian_models import BayesianTabMlp
from pytorch_widedeep.models.transformers.saint import SAINT
from pytorch_widedeep.callbacks import (
EarlyStopping,
ModelCheckpoint,
LRHistory,
RayTuneReporter,
)
from pytorch_widedeep.initializers import (
KaimingNormal,
KaimingUniform,
XavierNormal,
XavierUniform,
Normal,
Uniform,
)
from pytorch_widedeep import Tab2Vec
from pytorch_widedeep.optim import RAdam
import torch
from torch.optim import Adam, SGD, lr_scheduler#, NAdam
from ray import tune
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune import JupyterNotebookReporter
from ray.tune.integration.wandb import WandbLoggerCallback, wandb_mixin
from ray.tune.logger import DEFAULT_LOGGERS
import wandb
import src.utils as utils
import src.common as common
import tracemalloc
tracemalloc.start()
import dill
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import lightgbm as lgbm
import h2o
from h2o.automl import H2OAutoML
from pytorch_widedeep.dataloaders import DataLoaderImbalanced, DataLoaderDefault
from pytorch_widedeep.preprocessing import TabPreprocessor
from pytorch_widedeep.training import Trainer
from pytorch_widedeep.models import TabMlp, WideDeep
from pytorch_widedeep.bayesian_models import BayesianTabMlp
from pytorch_widedeep.models.transformers.saint import SAINT
from pytorch_widedeep.callbacks import (
EarlyStopping,
ModelCheckpoint,
LRHistory,
RayTuneReporter,
)
from pytorch_widedeep.initializers import (
KaimingNormal,
KaimingUniform,
XavierNormal,
XavierUniform,
Normal,
Uniform,
)
from pytorch_widedeep import Tab2Vec
from pytorch_widedeep.optim import RAdam
import torch
from torch.optim import Adam, SGD, lr_scheduler#, NAdam
from ray import tune
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune import JupyterNotebookReporter
from ray.tune.integration.wandb import WandbLoggerCallback, wandb_mixin
from ray.tune.logger import DEFAULT_LOGGERS
import wandb
import src.utils as utils
import src.common as common
import tracemalloc
tracemalloc.start()
2022-01-10 22:09:28.218228: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory 2022-01-10 22:09:28.218387: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
Dataset¶
identifiers
In [3]:
Copied!
column_types = common.json_load("#datasets/Colab_PowerConverter/column_types.json")
target = column_types["target"]
identifier = column_types["identifier"]
measurement_label = column_types["measurement_label"]
parameters = {
"random_state": 1,
"valid_size": 0.2,
"test_size": 0.5,
"scaler_mapper_def": {
"target_col": None,
"identifier_col": None,
"cont_cols": StandardScaler,
},
}
valid_size = parameters["valid_size"]
test_size = parameters["test_size"]
scaler_mapper_def = parameters["scaler_mapper_def"]
random_state = parameters["random_state"]
test_n_valid_combined = True
task = "multiclass" #(or "binary")
column_types = common.json_load("#datasets/Colab_PowerConverter/column_types.json")
target = column_types["target"]
identifier = column_types["identifier"]
measurement_label = column_types["measurement_label"]
parameters = {
"random_state": 1,
"valid_size": 0.2,
"test_size": 0.5,
"scaler_mapper_def": {
"target_col": None,
"identifier_col": None,
"cont_cols": StandardScaler,
},
}
valid_size = parameters["valid_size"]
test_size = parameters["test_size"]
scaler_mapper_def = parameters["scaler_mapper_def"]
random_state = parameters["random_state"]
test_n_valid_combined = True
task = "multiclass" #(or "binary")
In [4]:
Copied!
df = pd.read_pickle("#datasets/Colab_PowerConverter/dataset.pkl")
df = pd.read_pickle("#datasets/Colab_PowerConverter/dataset.pkl")
In [5]:
Copied!
# this measurement did not have a fault (?)
df = df[df[measurement_label]!="Single-Phase_Sensor_Fault"]
df.reset_index(inplace=True, drop=True)
# this measurement did not have a fault (?)
df = df[df[measurement_label]!="Single-Phase_Sensor_Fault"]
df.reset_index(inplace=True, drop=True)
In [6]:
Copied!
fault_dict = {}
for label,i in zip(df[measurement_label].unique(), range(len(df[measurement_label].unique()))):
df.loc[(df[measurement_label]==label) & (df[target]==1), target] = int(i+1)
fault_dict[label] = int(i+1)
fault_dict = {}
for label,i in zip(df[measurement_label].unique(), range(len(df[measurement_label].unique()))):
df.loc[(df[measurement_label]==label) & (df[target]==1), target] = int(i+1)
fault_dict[label] = int(i+1)
In [7]:
Copied!
# imbalance of the classes
df[target].value_counts()
# imbalance of the classes
df[target].value_counts()
Out[7]:
0 597599 5 40014 3 40001 6 40001 7 40001 8 40001 9 40001 10 40001 11 40001 13 40001 1 38971 2 38971 4 3166 12 1335 Name: fault, dtype: int64
In [8]:
Copied!
fault_dict
fault_dict
Out[8]:
{'Damping-320': 1,
'Damping-32000': 2,
'Inertia-1.2': 3,
'LL_Fault': 4,
'Three-Phase_Sensor_Fault': 5,
'Weak_Grid-4_5_mH': 6,
'Weak_Grid-1_5_mH': 7,
'Damping-3200': 8,
'Inertia-0.2': 9,
'Inertia-2': 10,
'Single_Phase_Sag': 11,
'Three_Phase_Grid_Fault': 12,
'Weak_Grid-7_5_mH': 13}
Preprocessing¶
In [9]:
Copied!
df.drop(columns=[measurement_label], inplace=True)
df.drop(columns=[measurement_label], inplace=True)
In [10]:
Copied!
df_train, df_valid = train_test_split(df, test_size=valid_size, stratify=df[target], random_state=random_state)
df_valid, df_test = train_test_split(df_valid, test_size=test_size, stratify=df_valid[target], random_state=random_state)
df_train.reset_index(inplace=True, drop=True)
df_valid.reset_index(inplace=True, drop=True)
df_test.reset_index(inplace=True, drop=True)
df_train, df_valid = train_test_split(df, test_size=valid_size, stratify=df[target], random_state=random_state)
df_valid, df_test = train_test_split(df_valid, test_size=test_size, stratify=df_valid[target], random_state=random_state)
df_train.reset_index(inplace=True, drop=True)
df_valid.reset_index(inplace=True, drop=True)
df_test.reset_index(inplace=True, drop=True)
In [11]:
Copied!
cont_cols = df.drop(columns=[target,identifier]).columns.values
scaler = utils.scaler_mapper(
cont_cols=cont_cols,
target_col=target,
identifier=identifier,
scaler_mapper_def=scaler_mapper_def,
)
df_train_scaled = scaler.fit_transform(df_train)
df_test_scaled = scaler.transform(df_test)
df_valid_scaled = scaler.transform(df_valid)
cont_cols = df.drop(columns=[target,identifier]).columns.values
scaler = utils.scaler_mapper(
cont_cols=cont_cols,
target_col=target,
identifier=identifier,
scaler_mapper_def=scaler_mapper_def,
)
df_train_scaled = scaler.fit_transform(df_train)
df_test_scaled = scaler.transform(df_test)
df_valid_scaled = scaler.transform(df_valid)
H2O AutoML¶
In [44]:
Copied!
h2o_test
h2o_test
| f_c | P | m_d | m_q | theta | P_ref | V_DC | V_phaseA | V_phaseB | V_phaseC | I_phaseA | I_phaseB | I_phaseC | fault | sample_id |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0.0764099 | -0.0644905 | 0 | 0 | 0.103179 | 0 | 0 | 0.0136144 | -1.25391 | 1.23911 | 0.00159406 | -0.002035 | 0.000468371 | 0 | 1.06966e+06 |
| 0.0764093 | -0.06449 | 0 | 0 | 0.670499 | 0 | 0 | -1.21108 | -0.0848593 | 1.28534 | -0.0250353 | 0.0462085 | -0.0228063 | 7 | 614815 |
| 0.0764093 | -0.06449 | 0 | 0 | 1.03229 | 0 | 0 | -0.717513 | -0.727768 | 1.4383 | -0.0400163 | 0.0394656 | 0.000842797 | 7 | 630845 |
| 0.0764093 | -0.06449 | 0 | 0 | -0.0804041 | 0 | 0 | -1.25339 | 1.24611 | -0.00229003 | 0.00109732 | -0.000931532 | -0.00018652 | 0 | 741526 |
| 0.0766861 | -0.0647668 | 0 | 0 | 0.611718 | 0 | 0 | 0.00023845 | -0.00149468 | 0.00126347 | 0.035414 | -0.00863098 | -0.0292607 | 5 | 452148 |
| -15.9596 | 15.978 | 0 | 0 | -3.04602 | 0 | 0 | 0.53551 | 0.89073 | -1.42061 | 0.00213073 | 0.0130485 | -0.0164733 | 0 | 1.00344e+06 |
| 0.0766861 | -0.0647668 | 0 | 0 | 1.24128 | 0 | 0 | 0.00023845 | -0.00149468 | 0.00126347 | -0.0308137 | -0.0729725 | 0.112734 | 5 | 479973 |
| -0.0550365 | -0.05134 | 0 | 0 | -2.16788 | 0 | 0 | 1.21972 | -1.28017 | 0.0697169 | 0.0838388 | -0.00528121 | -0.0857018 | 1 | 75794 |
| 0.0764147 | -0.064495 | 0 | 0 | 0.781905 | 0 | 0 | 1.32264 | -1.16804 | -0.144324 | -0.00410528 | -0.0110269 | 0.0164344 | 6 | 539733 |
| 0.0764093 | -0.06449 | 0 | 0 | -3.30012 | 0 | 0 | -0.80531 | 1.43398 | -0.634138 | 0.00130697 | -0.00087162 | -0.000480115 | 0 | 1.0339e+06 |
Out[44]:
In [50]:
Copied!
# initialize H2O
h2o.init(log_dir="h2o_logs", log_level="WARN")
# read as h2o file
print("Reading data into H2O format")
h2o_train = h2o.H2OFrame(df_train_scaled)
h2o_valid = h2o.H2OFrame(df_valid_scaled)
h2o_test = h2o.H2OFrame(df_test_scaled)
# For binary classification, response should be a factor
h2o_train[target] = h2o_train[target].asfactor()
h2o_valid[target] = h2o_valid[target].asfactor()
h2o_test[target] = h2o_test[target].asfactor()
# Define AML task
aml = H2OAutoML(seed=random_state, max_runtime_secs=1800)
# over/under sample for classification tasks
aml.balance_classes = True
# Run it
_ = aml.train(
x=list(cont_cols),
y=target,
training_frame=h2o_train,
leaderboard_frame=h2o_valid,
)
m = aml.get_best_model()
# initialize H2O
h2o.init(log_dir="h2o_logs", log_level="WARN")
# read as h2o file
print("Reading data into H2O format")
h2o_train = h2o.H2OFrame(df_train_scaled)
h2o_valid = h2o.H2OFrame(df_valid_scaled)
h2o_test = h2o.H2OFrame(df_test_scaled)
# For binary classification, response should be a factor
h2o_train[target] = h2o_train[target].asfactor()
h2o_valid[target] = h2o_valid[target].asfactor()
h2o_test[target] = h2o_test[target].asfactor()
# Define AML task
aml = H2OAutoML(seed=random_state, max_runtime_secs=1800)
# over/under sample for classification tasks
aml.balance_classes = True
# Run it
_ = aml.train(
x=list(cont_cols),
y=target,
training_frame=h2o_train,
leaderboard_frame=h2o_valid,
)
m = aml.get_best_model()
Checking whether there is an H2O instance running at http://localhost:54321 . connected.
| H2O_cluster_uptime: | 26 mins 38 secs |
| H2O_cluster_timezone: | Europe/Madrid |
| H2O_data_parsing_timezone: | UTC |
| H2O_cluster_version: | 3.34.0.7 |
| H2O_cluster_version_age: | 16 days |
| H2O_cluster_name: | H2O_from_python_palo_ku3umh |
| H2O_cluster_total_nodes: | 1 |
| H2O_cluster_free_memory: | 2.520 Gb |
| H2O_cluster_total_cores: | 8 |
| H2O_cluster_allowed_cores: | 8 |
| H2O_cluster_status: | locked, healthy |
| H2O_connection_url: | http://localhost:54321 |
| H2O_connection_proxy: | {"http": null, "https": null} |
| H2O_internal_security: | False |
| H2O_API_Extensions: | Amazon S3, XGBoost, Algos, AutoML, Core V3, TargetEncoder, Core V4 |
| Python_version: | 3.8.5 final |
Reading data into H2O format
/home/palo/miniconda3/lib/python3.8/site-packages/h2o/h2o.py:121: ResourceWarning: unclosed file <_io.BufferedReader name='/tmp/tmpq6_1h7s5.csv'>
return h2oconn.request(endpoint, data=data, json=json, filename=filename, save_to=save_to)
Object allocated at (most recent call last):
File "/home/palo/miniconda3/lib/python3.8/site-packages/h2o/backend/connection.py", lineno 720
return {os.path.basename(absfilename): open(absfilename, "rb")}
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100% Parse progress: |
/home/palo/miniconda3/lib/python3.8/site-packages/h2o/h2o.py:121: ResourceWarning: unclosed file <_io.BufferedReader name='/tmp/tmpkfhzlt8y.csv'>
return h2oconn.request(endpoint, data=data, json=json, filename=filename, save_to=save_to)
Object allocated at (most recent call last):
File "/home/palo/miniconda3/lib/python3.8/site-packages/h2o/backend/connection.py", lineno 720
return {os.path.basename(absfilename): open(absfilename, "rb")}
████████████████████████████████████████████████████████████████| (done) 100% Parse progress: |
/home/palo/miniconda3/lib/python3.8/site-packages/h2o/h2o.py:121: ResourceWarning: unclosed file <_io.BufferedReader name='/tmp/tmpchp3xzlo.csv'>
return h2oconn.request(endpoint, data=data, json=json, filename=filename, save_to=save_to)
Object allocated at (most recent call last):
File "/home/palo/miniconda3/lib/python3.8/site-packages/h2o/backend/connection.py", lineno 720
return {os.path.basename(absfilename): open(absfilename, "rb")}
████████████████████████████████████████████████████████████████| (done) 100% AutoML progress: | 23:33:05.829: _train param, Dropping bad and constant columns: [P_ref, V_DC, m_d, m_q] ██████████████████████ 23:43:03.261: _train param, Dropping bad and constant columns: [P_ref, V_DC, m_d, m_q] █████████████████████████████ 23:57:25.774: _train param, Dropping bad and constant columns: [P_ref, V_DC, m_d, m_q] Failed polling AutoML progress log: HTTP 500 Server Error: <html> <head> <meta http-equiv="Content-Type" content="text/html;charset=utf-8"/> <title>Error 500 Server Error</title> </head> <body><h2>HTTP ERROR 500</h2> <p>Problem accessing /99/AutoML/AutoML_4_20220106_233305@@fault. Reason: <pre> Server Error</pre></p><h3>Caused by:</h3><pre>java.lang.OutOfMemoryError: Java heap space </pre> </body> </html> █ 23:57:46.590: GBM_1_AutoML_4_20220106_233305 [GBM def_5] failed: DistributedException from /127.0.0.1:54321: 'Java heap space', caused by java.lang.OutOfMemoryError: Java heap space 23:57:46.601: _train param, Dropping unused columns: [P_ref, V_DC, m_d, m_q] ███████████| (done) 100%
/home/palo/miniconda3/lib/python3.8/site-packages/h2o/h2o.py:121: ResourceWarning: unclosed file <_io.BufferedReader name='/tmp/tmpu8jj_eou.csv'>
return h2oconn.request(endpoint, data=data, json=json, filename=filename, save_to=save_to)
Object allocated at (most recent call last):
File "/home/palo/miniconda3/lib/python3.8/site-packages/h2o/backend/connection.py", lineno 720
return {os.path.basename(absfilename): open(absfilename, "rb")}
--------------------------------------------------------------------------- H2OResponseError Traceback (most recent call last) /tmp/ipykernel_1835/2320084112.py in <module> 20 21 # Run it ---> 22 _ = aml.train( 23 x=list(cont_cols), 24 y=target, ~/miniconda3/lib/python3.8/site-packages/h2o/automl/_estimator.py in train(self, x, y, training_frame, fold_column, weights_column, validation_frame, leaderboard_frame, blending_frame) 594 poll_updates(self._job, 1) 595 --> 596 self._fetch() 597 return self.leader 598 ~/miniconda3/lib/python3.8/site-packages/h2o/automl/_estimator.py in _fetch(self) 623 624 def _fetch(self): --> 625 state = _fetch_state(self.key) 626 self._leader_id = state['leader_id'] 627 self._leaderboard = state['leaderboard'] ~/miniconda3/lib/python3.8/site-packages/h2o/automl/_base.py in _fetch_state(aml_id, properties, verbosity) 273 leaderboard = None 274 if should_fetch('leaderboard'): --> 275 leaderboard = _fetch_table(state_json['leaderboard_table'], key=project_name+"_leaderboard", progress_bar=False) 276 277 event_log = None ~/miniconda3/lib/python3.8/site-packages/h2o/automl/_base.py in _fetch_table(table, key, progress_bar) 250 H2OJob.__PROGRESS_BAR__ = progress_bar 251 # Parse leaderboard H2OTwoDimTable & return as an H2OFrame --> 252 fr = h2o.H2OFrame(table.cell_values, destination_frame=key, column_names=table.col_header, column_types=table.col_types) 253 return h2o.assign(fr[1:], key) # removing index and reassign id to ensure persistence on backend 254 finally: ~/miniconda3/lib/python3.8/site-packages/h2o/frame.py in __init__(self, python_obj, destination_frame, header, separator, column_names, column_types, na_strings, skipped_columns) 108 self._is_frame = True # Indicate that this is an actual frame, allowing typechecks to be made 109 if python_obj is not None: --> 110 self._upload_python_object(python_obj, destination_frame, header, separator, 111 column_names, column_types, na_strings, skipped_columns) 112 ~/miniconda3/lib/python3.8/site-packages/h2o/frame.py in _upload_python_object(self, python_obj, destination_frame, header, separator, column_names, column_types, na_strings, skipped_columns) 150 csv_writer.writerows(data_to_write) 151 tmp_file.close() # close the streams --> 152 self._upload_parse(tmp_path, destination_frame, 1, separator, column_names, column_types, na_strings, skipped_columns) 153 os.remove(tmp_path) # delete the tmp file 154 ~/miniconda3/lib/python3.8/site-packages/h2o/frame.py in _upload_parse(self, path, destination_frame, header, sep, column_names, column_types, na_strings, skipped_columns, quotechar, escapechar) 465 ret = h2o.api("POST /3/PostFile", filename=path) 466 rawkey = ret["destination_frame"] --> 467 self._parse(rawkey, destination_frame, header, sep, column_names, column_types, na_strings, skipped_columns, 468 quotechar=quotechar, escapechar=escapechar) 469 return self ~/miniconda3/lib/python3.8/site-packages/h2o/frame.py in _parse(self, rawkey, destination_frame, header, separator, column_names, column_types, na_strings, skipped_columns, custom_non_data_line_markers, partition_by, quotechar, escapechar) 475 setup = h2o.parse_setup(rawkey, destination_frame, header, separator, column_names, column_types, na_strings, 476 skipped_columns, custom_non_data_line_markers, partition_by, quotechar, escapechar) --> 477 return self._parse_raw(setup) 478 479 ~/miniconda3/lib/python3.8/site-packages/h2o/frame.py in _parse_raw(self, setup) 504 p['source_frames'] = [_quoted(src['name']) for src in setup['source_frames']] 505 --> 506 H2OJob(h2o.api("POST /3/Parse", data=p), "Parse").poll() 507 # Need to return a Frame here for nearly all callers 508 # ... but job stats returns only a dest_key, requiring another REST call to get nrow/ncol ~/miniconda3/lib/python3.8/site-packages/h2o/h2o.py in api(endpoint, data, json, filename, save_to) 119 # type checks are performed in H2OConnection class 120 _check_connection() --> 121 return h2oconn.request(endpoint, data=data, json=json, filename=filename, save_to=save_to) 122 123 ~/miniconda3/lib/python3.8/site-packages/h2o/backend/connection.py in request(self, endpoint, data, json, filename, save_to) 479 save_to = save_to(resp) 480 self._log_end_transaction(start_time, resp) --> 481 return self._process_response(resp, save_to) 482 483 except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError) as e: ~/miniconda3/lib/python3.8/site-packages/h2o/backend/connection.py in _process_response(response, save_to) 817 if status_code in {400, 404, 412} and isinstance(data, H2OErrorV3): 818 data.show_stacktrace = False --> 819 raise H2OResponseError(data) 820 821 # Server errors (notably 500 = "Server Error") H2OResponseError: Server error java.lang.IllegalArgumentException: Error: Total input file size of 436 B is much larger than total cluster memory of Zero , please use either a larger cluster or smaller data. Request: POST /3/Parse data: {'destination_frame': 'AutoML_4_20220106_233305_leaderboard', 'parse_type': 'CSV', 'separator': '44', 'check_header': '1', 'number_columns': '6', 'chunk_size': '4194304', 'delete_on_done': 'True', 'blocking': 'False', 'column_types': '["string","string","double","double","double","double"]', 'single_quotes': 'False', 'escapechar': '0', 'column_names': '["","model_id","mean_per_class_error","logloss","rmse","mse"]', 'source_frames': '["upload_8a591791a2d0d184c45385b51edf4c66"]'}
In [51]:
Copied!
lb = h2o.automl.get_leaderboard(aml, extra_columns="ALL")
lb = h2o.automl.get_leaderboard(aml, extra_columns="ALL")
/home/palo/miniconda3/lib/python3.8/site-packages/h2o/h2o.py:121: ResourceWarning: unclosed file <_io.BufferedReader name='/tmp/tmpd93uewig.csv'>
return h2oconn.request(endpoint, data=data, json=json, filename=filename, save_to=save_to)
Object allocated at (most recent call last):
File "/home/palo/miniconda3/lib/python3.8/site-packages/h2o/backend/connection.py", lineno 720
return {os.path.basename(absfilename): open(absfilename, "rb")}
--------------------------------------------------------------------------- H2OResponseError Traceback (most recent call last) /tmp/ipykernel_1835/3835008652.py in <module> ----> 1 lb = h2o.automl.get_leaderboard(aml, extra_columns="ALL") ~/miniconda3/lib/python3.8/site-packages/h2o/automl/autoh2o.py in get_leaderboard(aml, extra_columns) 43 """ 44 assert_is_type(aml, H2OAutoML, H2OAutoMLOutput) ---> 45 return _fetch_leaderboard(aml.key, extra_columns) ~/miniconda3/lib/python3.8/site-packages/h2o/automl/_base.py in _fetch_leaderboard(aml_id, extensions) 240 resp = h2o.api("GET /99/Leaderboards/%s" % aml_id, data=dict(extensions=extensions)) 241 dest_key = resp['project_name'].split('@', 1)[0]+"_custom_leaderboard" --> 242 return _fetch_table(resp['table'], key=dest_key, progress_bar=False) 243 244 ~/miniconda3/lib/python3.8/site-packages/h2o/automl/_base.py in _fetch_table(table, key, progress_bar) 250 H2OJob.__PROGRESS_BAR__ = progress_bar 251 # Parse leaderboard H2OTwoDimTable & return as an H2OFrame --> 252 fr = h2o.H2OFrame(table.cell_values, destination_frame=key, column_names=table.col_header, column_types=table.col_types) 253 return h2o.assign(fr[1:], key) # removing index and reassign id to ensure persistence on backend 254 finally: ~/miniconda3/lib/python3.8/site-packages/h2o/frame.py in __init__(self, python_obj, destination_frame, header, separator, column_names, column_types, na_strings, skipped_columns) 108 self._is_frame = True # Indicate that this is an actual frame, allowing typechecks to be made 109 if python_obj is not None: --> 110 self._upload_python_object(python_obj, destination_frame, header, separator, 111 column_names, column_types, na_strings, skipped_columns) 112 ~/miniconda3/lib/python3.8/site-packages/h2o/frame.py in _upload_python_object(self, python_obj, destination_frame, header, separator, column_names, column_types, na_strings, skipped_columns) 150 csv_writer.writerows(data_to_write) 151 tmp_file.close() # close the streams --> 152 self._upload_parse(tmp_path, destination_frame, 1, separator, column_names, column_types, na_strings, skipped_columns) 153 os.remove(tmp_path) # delete the tmp file 154 ~/miniconda3/lib/python3.8/site-packages/h2o/frame.py in _upload_parse(self, path, destination_frame, header, sep, column_names, column_types, na_strings, skipped_columns, quotechar, escapechar) 465 ret = h2o.api("POST /3/PostFile", filename=path) 466 rawkey = ret["destination_frame"] --> 467 self._parse(rawkey, destination_frame, header, sep, column_names, column_types, na_strings, skipped_columns, 468 quotechar=quotechar, escapechar=escapechar) 469 return self ~/miniconda3/lib/python3.8/site-packages/h2o/frame.py in _parse(self, rawkey, destination_frame, header, separator, column_names, column_types, na_strings, skipped_columns, custom_non_data_line_markers, partition_by, quotechar, escapechar) 475 setup = h2o.parse_setup(rawkey, destination_frame, header, separator, column_names, column_types, na_strings, 476 skipped_columns, custom_non_data_line_markers, partition_by, quotechar, escapechar) --> 477 return self._parse_raw(setup) 478 479 ~/miniconda3/lib/python3.8/site-packages/h2o/frame.py in _parse_raw(self, setup) 504 p['source_frames'] = [_quoted(src['name']) for src in setup['source_frames']] 505 --> 506 H2OJob(h2o.api("POST /3/Parse", data=p), "Parse").poll() 507 # Need to return a Frame here for nearly all callers 508 # ... but job stats returns only a dest_key, requiring another REST call to get nrow/ncol ~/miniconda3/lib/python3.8/site-packages/h2o/h2o.py in api(endpoint, data, json, filename, save_to) 119 # type checks are performed in H2OConnection class 120 _check_connection() --> 121 return h2oconn.request(endpoint, data=data, json=json, filename=filename, save_to=save_to) 122 123 ~/miniconda3/lib/python3.8/site-packages/h2o/backend/connection.py in request(self, endpoint, data, json, filename, save_to) 479 save_to = save_to(resp) 480 self._log_end_transaction(start_time, resp) --> 481 return self._process_response(resp, save_to) 482 483 except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError) as e: ~/miniconda3/lib/python3.8/site-packages/h2o/backend/connection.py in _process_response(response, save_to) 817 if status_code in {400, 404, 412} and isinstance(data, H2OErrorV3): 818 data.show_stacktrace = False --> 819 raise H2OResponseError(data) 820 821 # Server errors (notably 500 = "Server Error") H2OResponseError: Server error java.lang.IllegalArgumentException: Error: Total input file size of 577 B is much larger than total cluster memory of Zero , please use either a larger cluster or smaller data. Request: POST /3/Parse data: {'destination_frame': 'AutoML_4_20220106_233305_custom_leaderboard', 'parse_type': 'CSV', 'separator': '44', 'check_header': '1', 'number_columns': '9', 'chunk_size': '4194304', 'delete_on_done': 'True', 'blocking': 'False', 'column_types': '["string","string","double","double","double","double","long","double","string"]', 'single_quotes': 'False', 'escapechar': '0', 'column_names': '["","model_id","mean_per_class_error","logloss","rmse","mse","training_time_ms","predict_time_per_row_ms","algo"]', 'source_frames': '["upload_b2adc2f5455e83f4133392c81f524ffb"]'}
In [52]:
Copied!
print(lb)
print(lb)
This H2OFrame is empty.
In [ ]:
Copied!
# Leaderboard, show and save
lb = h2o.automl.get_leaderboard(aml, extra_columns="ALL")
print(lb)
# save results
h2o.export_file(lb, path="h2o_logs/leaderboard.csv"), force=True)
print(m.model_performance(h2o_valid))
# MOJO is h2o version agnostic
model_path = os.path.join(session_dir_path, "bestmodel.zip")
m.save_mojo("h2o_logs/bestmodel.zip")
predictions = m.predict(h2o_test)
# Leaderboard, show and save
lb = h2o.automl.get_leaderboard(aml, extra_columns="ALL")
print(lb)
# save results
h2o.export_file(lb, path="h2o_logs/leaderboard.csv"), force=True)
print(m.model_performance(h2o_valid))
# MOJO is h2o version agnostic
model_path = os.path.join(session_dir_path, "bestmodel.zip")
m.save_mojo("h2o_logs/bestmodel.zip")
predictions = m.predict(h2o_test)
LightGBM¶
Metric and objective functions¶
In [18]:
Copied!
def focal_loss_lgb(y_pred, dtrain, alpha, gamma, num_class):
"""
Focal Loss for lightgbm
Parameters:
-----------
y_pred: numpy.ndarray
array with the predictions
dtrain: lightgbm.Dataset
alpha, gamma: float
See original paper https://arxiv.org/pdf/1708.02002.pdf
num_class: int
number of classes
"""
a,g = alpha, gamma
y_true = dtrain.label
# N observations x num_class arrays
y_true = np.eye(num_class)[y_true.astype('int')]
y_pred = y_pred.reshape(-1,num_class, order='F')
# alpha and gamma multiplicative factors with BCEWithLogitsLoss
def fl(x,t):
p = 1/(1+np.exp(-x))
return -( a*t + (1-a)*(1-t) ) * (( 1 - ( t*p + (1-t)*(1-p)) )**g) * ( t*np.log(p)+(1-t)*np.log(1-p) )
partial_fl = lambda x: fl(x, y_true)
grad = derivative(partial_fl, y_pred, n=1, dx=1e-6)
hess = derivative(partial_fl, y_pred, n=2, dx=1e-6)
# flatten in column-major (Fortran-style) order
return grad.flatten('F'), hess.flatten('F')
def focal_loss_lgb_eval_error(y_pred, dtrain, alpha, gamma, num_class):
"""
Focal Loss for lightgbm
Parameters:
-----------
y_pred: numpy.ndarray
array with the predictions
dtrain: lightgbm.Dataset
alpha, gamma: float
See original paper https://arxiv.org/pdf/1708.02002.pdf
num_class: int
number of classes
"""
a,g = alpha, gamma
y_true = dtrain.label
y_true = np.eye(num_class)[y_true.astype('int')]
y_pred = y_pred.reshape(-1, num_class, order='F')
p = 1/(1+np.exp(-y_pred))
loss = -( a*y_true + (1-a)*(1-y_true) ) * (( 1 - ( y_true*p + (1-y_true)*(1-p)) )**g) * ( y_true*np.log(p)+(1-y_true)*np.log(1-p) )
# a variant can be np.sum(loss)/num_class
return 'focal_loss', np.mean(loss), False
def focal_loss_lgb(y_pred, dtrain, alpha, gamma, num_class):
"""
Focal Loss for lightgbm
Parameters:
-----------
y_pred: numpy.ndarray
array with the predictions
dtrain: lightgbm.Dataset
alpha, gamma: float
See original paper https://arxiv.org/pdf/1708.02002.pdf
num_class: int
number of classes
"""
a,g = alpha, gamma
y_true = dtrain.label
# N observations x num_class arrays
y_true = np.eye(num_class)[y_true.astype('int')]
y_pred = y_pred.reshape(-1,num_class, order='F')
# alpha and gamma multiplicative factors with BCEWithLogitsLoss
def fl(x,t):
p = 1/(1+np.exp(-x))
return -( a*t + (1-a)*(1-t) ) * (( 1 - ( t*p + (1-t)*(1-p)) )**g) * ( t*np.log(p)+(1-t)*np.log(1-p) )
partial_fl = lambda x: fl(x, y_true)
grad = derivative(partial_fl, y_pred, n=1, dx=1e-6)
hess = derivative(partial_fl, y_pred, n=2, dx=1e-6)
# flatten in column-major (Fortran-style) order
return grad.flatten('F'), hess.flatten('F')
def focal_loss_lgb_eval_error(y_pred, dtrain, alpha, gamma, num_class):
"""
Focal Loss for lightgbm
Parameters:
-----------
y_pred: numpy.ndarray
array with the predictions
dtrain: lightgbm.Dataset
alpha, gamma: float
See original paper https://arxiv.org/pdf/1708.02002.pdf
num_class: int
number of classes
"""
a,g = alpha, gamma
y_true = dtrain.label
y_true = np.eye(num_class)[y_true.astype('int')]
y_pred = y_pred.reshape(-1, num_class, order='F')
p = 1/(1+np.exp(-y_pred))
loss = -( a*y_true + (1-a)*(1-y_true) ) * (( 1 - ( y_true*p + (1-y_true)*(1-p)) )**g) * ( y_true*np.log(p)+(1-y_true)*np.log(1-p) )
# a variant can be np.sum(loss)/num_class
return 'focal_loss', np.mean(loss), False
In [12]:
Copied!
test_n_valid_combined = True
n_class = pd.concat([df_train_scaled, df_valid_scaled, df_test_scaled])[target].nunique()
#config = {"verbose": -1}
config = {}
# config["is_unbalance"] = True
# config["objective"] = "multiclass"
config["num_classes"] = n_class
custom = utils.LGBM_custom_score(n_class=n_class)
fobj = lambda preds, data: custom.lgbm_focal_loss(preds, data, 0.25, 1.0)
#feval = lambda preds, data: focal_loss_lgb_eval_error(preds, data, 0.25, 1.0, n_class)
feval = [
lambda preds, data: [
custom.lgbm_focal_loss_eval(preds, data, 0.25, 1.0),
custom.lgbm_f1(preds, data),
custom.lgbm_precision(preds, data),
custom.lgbm_recall(preds, data),
custom.lgbm_accuracy(preds, data),
]
]
#ray_metric = "-" + "focal_loss"
test_n_valid_combined = True
n_class = pd.concat([df_train_scaled, df_valid_scaled, df_test_scaled])[target].nunique()
#config = {"verbose": -1}
config = {}
# config["is_unbalance"] = True
# config["objective"] = "multiclass"
config["num_classes"] = n_class
custom = utils.LGBM_custom_score(n_class=n_class)
fobj = lambda preds, data: custom.lgbm_focal_loss(preds, data, 0.25, 1.0)
#feval = lambda preds, data: focal_loss_lgb_eval_error(preds, data, 0.25, 1.0, n_class)
feval = [
lambda preds, data: [
custom.lgbm_focal_loss_eval(preds, data, 0.25, 1.0),
custom.lgbm_f1(preds, data),
custom.lgbm_precision(preds, data),
custom.lgbm_recall(preds, data),
custom.lgbm_accuracy(preds, data),
]
]
#ray_metric = "-" + "focal_loss"
Datasets¶
In [13]:
Copied!
lgbtrain = lgbm.Dataset(
df_train.drop(columns=[target]+[identifier]),
df_train[target],
free_raw_data=False,
)
lgbvalid = lgbm.Dataset(
df_valid.drop(columns=[target]+[identifier]),
df_valid[target],
reference=lgbtrain,
free_raw_data=False,
)
if test_n_valid_combined:
df_testNvalid_enc = pd.concat([df_valid, df_test]).reset_index(
drop=True
)
lgbtest = lgbm.Dataset(
df_testNvalid_enc.drop(columns=[target]+[identifier]),
df_testNvalid_enc[target],
free_raw_data=False,
)
else:
lgbtest = lgbm.Dataset(
df_test.drop(columns=[target]+[identifier]),
df_test[target],
reference=lgbtrain,
free_raw_data=False,
)
lgbtrain = lgbm.Dataset(
df_train.drop(columns=[target]+[identifier]),
df_train[target],
free_raw_data=False,
)
lgbvalid = lgbm.Dataset(
df_valid.drop(columns=[target]+[identifier]),
df_valid[target],
reference=lgbtrain,
free_raw_data=False,
)
if test_n_valid_combined:
df_testNvalid_enc = pd.concat([df_valid, df_test]).reset_index(
drop=True
)
lgbtest = lgbm.Dataset(
df_testNvalid_enc.drop(columns=[target]+[identifier]),
df_testNvalid_enc[target],
free_raw_data=False,
)
else:
lgbtest = lgbm.Dataset(
df_test.drop(columns=[target]+[identifier]),
df_test[target],
reference=lgbtrain,
free_raw_data=False,
)
Train model¶
In [14]:
Copied!
%%time
model = lgbm.train(
config,
lgbtrain,
valid_sets=[lgbvalid],
fobj=fobj,
feval=feval,
)
%%time
model = lgbm.train(
config,
lgbtrain,
valid_sets=[lgbvalid],
fobj=fobj,
feval=feval,
)
[LightGBM] [Warning] Using self-defined objective function [LightGBM] [Warning] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006237 seconds. You can set `force_row_wise=true` to remove the overhead. And if memory is not enough, you can set `force_col_wise=true`. [LightGBM] [Info] Total Bins 2295 [LightGBM] [Info] Number of data points in the train set: 832051, number of used features: 9 [LightGBM] [Warning] Using self-defined objective function [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1] valid_0's focal_loss: 0.215605 valid_0's f1: 0.887934 valid_0's precision: 0.888266 valid_0's recall_0: 0.890163 valid_0's accuracy: 0.888266 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [2] valid_0's focal_loss: 0.188667 valid_0's f1: 0.889244 valid_0's precision: 0.889689 valid_0's recall_0: 0.894021 valid_0's accuracy: 0.889689 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [3] valid_0's focal_loss: 0.165771 valid_0's f1: 0.897054 valid_0's precision: 0.895756 valid_0's recall_0: 0.900642 valid_0's accuracy: 0.895756 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [4] valid_0's focal_loss: 0.146172 valid_0's f1: 0.898813 valid_0's precision: 0.897189 valid_0's recall_0: 0.902035 valid_0's accuracy: 0.897189 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [5] valid_0's focal_loss: 0.129293 valid_0's f1: 0.89918 valid_0's precision: 0.897891 valid_0's recall_0: 0.901601 valid_0's accuracy: 0.897891
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <timed exec> in <module> ~/miniconda3/lib/python3.8/site-packages/lightgbm/engine.py in train(params, train_set, num_boost_round, valid_sets, valid_names, fobj, feval, init_model, feature_name, categorical_feature, early_stopping_rounds, evals_result, verbose_eval, learning_rates, keep_training_booster, callbacks) 291 evaluation_result_list=None)) 292 --> 293 booster.update(fobj=fobj) 294 295 evaluation_result_list = [] ~/miniconda3/lib/python3.8/site-packages/lightgbm/basic.py in update(self, train_set, fobj) 3027 if not self.__set_objective_to_none: 3028 self.reset_parameter({"objective": "none"}).__set_objective_to_none = True -> 3029 grad, hess = fobj(self.__inner_predict(0), self.train_set) 3030 return self.__boost(grad, hess) 3031 /tmp/ipykernel_3906/2022256575.py in <lambda>(preds, data) 9 10 custom = utils.LGBM_custom_score(n_class=n_class) ---> 11 fobj = lambda preds, data: custom.lgbm_focal_loss(preds, data, 0.25, 1.0) 12 #feval = lambda preds, data: focal_loss_lgb_eval_error(preds, data, 0.25, 1.0, n_class) 13 feval = [ /mnt/c/#work/FIREMAN/FIREMAN-project/src/utils.py in lgbm_focal_loss(self, preds_raw, lgbDataset, alpha, gamma) 169 partial_fl = lambda x: self._focal_loss(x, y_true, alpha, gamma) 170 grad = derivative(partial_fl, y_pred, n=1, dx=1e-6) --> 171 hess = derivative(partial_fl, y_pred, n=2, dx=1e-6) 172 if self.n_class>2: 173 return grad.flatten('F'), hess.flatten('F') ~/miniconda3/lib/python3.8/site-packages/scipy/misc/common.py in derivative(func, x0, dx, n, args, order) 142 ho = order >> 1 143 for k in range(order): --> 144 val += weights[k]*func(x0+(k-ho)*dx,*args) 145 return val / prod((dx,)*n,axis=0) 146 /mnt/c/#work/FIREMAN/FIREMAN-project/src/utils.py in <lambda>(x) 167 y_pred = preds_raw.astype('int') 168 --> 169 partial_fl = lambda x: self._focal_loss(x, y_true, alpha, gamma) 170 grad = derivative(partial_fl, y_pred, n=1, dx=1e-6) 171 hess = derivative(partial_fl, y_pred, n=2, dx=1e-6) /mnt/c/#work/FIREMAN/FIREMAN-project/src/utils.py in _focal_loss(self, y_pred, y_true, alpha, gamma) 135 loss = ( 136 -(alpha * y_true + (1 - alpha) * (1 - y_true)) --> 137 * ((1 - (y_true * preds + (1 - y_true) * (1 - preds))) ** gamma) 138 * (y_true * np.log(preds) + (1 - y_true) * np.log(1 - preds)) 139 ) KeyboardInterrupt:
Prediction & Evaluation¶
In [79]:
Copied!
predicted = model.predict(lgbtest.data).argmax(axis=1)
actual = lgbtest.label
print(classification_report(predicted, actual))
predicted = model.predict(lgbtest.data).argmax(axis=1)
actual = lgbtest.label
print(classification_report(predicted, actual))
precision recall f1-score support
0 0.97 0.92 0.95 126433
1 0.10 0.50 0.17 1622
2 0.49 0.78 0.60 4952
3 0.12 0.26 0.17 3709
4 0.00 0.00 0.00 202
5 0.36 0.87 0.51 3298
6 0.14 0.33 0.20 3375
7 0.59 0.80 0.68 5892
8 0.22 0.21 0.22 8595
9 0.13 0.20 0.16 5105
10 0.14 0.32 0.20 3567
11 0.68 0.44 0.53 12306
12 0.00 0.00 0.00 49
13 0.79 0.22 0.34 28908
accuracy 0.70 208013
macro avg 0.34 0.42 0.34 208013
weighted avg 0.80 0.70 0.72 208013
w RayTune¶
In [54]:
Copied!
start = time()
#config["eta"] = tune.loguniform(1e-4, 1e-1),
#config["subsample"] = tune.uniform(0.5, 1.0),
config["max_depth"] = tune.randint(1, 9),
# config["wandb"]["project"] = "GBM_classifier",
# config["wandb"]["api_key_file"] = "../data/wandb_api.key",
# config["wandb"]["log_config"] = True
def training_function(config, train, valid):
lgbm_config = config.copy()
#lgbm_config.pop("wandb")
trainer = lgbm.train(
lgbm_config,
train,
valid_sets=[valid],
valid_names=[""],
callbacks=[
TuneReportCheckpointCallback(
{
ray_metric: ray_metric,
}
)
],
)
asha_scheduler = AsyncHyperBandScheduler(
time_attr="training_iteration",
metric=ray_metric,
mode="min",
max_t=100,
grace_period=10,
reduction_factor=3,
brackets=1,
)
analysis = tune.run(
tune.with_parameters(training_function, train=lgbtrain, valid=lgbvalid),
# resources_per_trial={"cpu": 4, "gpu": 0},
num_samples=2,
progress_reporter=JupyterNotebookReporter(overwrite=True),
scheduler=asha_scheduler,
config=config,
#loggers=DEFAULT_LOGGERS + (WandbLogger,),
)
start = time()
#config["eta"] = tune.loguniform(1e-4, 1e-1),
#config["subsample"] = tune.uniform(0.5, 1.0),
config["max_depth"] = tune.randint(1, 9),
# config["wandb"]["project"] = "GBM_classifier",
# config["wandb"]["api_key_file"] = "../data/wandb_api.key",
# config["wandb"]["log_config"] = True
def training_function(config, train, valid):
lgbm_config = config.copy()
#lgbm_config.pop("wandb")
trainer = lgbm.train(
lgbm_config,
train,
valid_sets=[valid],
valid_names=[""],
callbacks=[
TuneReportCheckpointCallback(
{
ray_metric: ray_metric,
}
)
],
)
asha_scheduler = AsyncHyperBandScheduler(
time_attr="training_iteration",
metric=ray_metric,
mode="min",
max_t=100,
grace_period=10,
reduction_factor=3,
brackets=1,
)
analysis = tune.run(
tune.with_parameters(training_function, train=lgbtrain, valid=lgbvalid),
# resources_per_trial={"cpu": 4, "gpu": 0},
num_samples=2,
progress_reporter=JupyterNotebookReporter(overwrite=True),
scheduler=asha_scheduler,
config=config,
#loggers=DEFAULT_LOGGERS + (WandbLogger,),
)
== Status ==
Current time: 2021-11-08 10:38:03 (running for 00:00:01.22)
Memory usage on this node: 3.0/12.2 GiB
Using AsyncHyperBand: num_stopped=0 Bracket: Iter 90.000: None | Iter 30.000: None | Iter 10.000: None
Resources requested: 0/8 CPUs, 0/0 GPUs, 0.0/6.47 GiB heap, 0.0/3.24 GiB objects
Result logdir: /home/palo/ray_results/training_function_2021-11-08_10-38-02
Number of trials: 2/2 (2 ERROR)
Number of errored trials: 2
Current time: 2021-11-08 10:38:03 (running for 00:00:01.22)
Memory usage on this node: 3.0/12.2 GiB
Using AsyncHyperBand: num_stopped=0 Bracket: Iter 90.000: None | Iter 30.000: None | Iter 10.000: None
Resources requested: 0/8 CPUs, 0/0 GPUs, 0.0/6.47 GiB heap, 0.0/3.24 GiB objects
Result logdir: /home/palo/ray_results/training_function_2021-11-08_10-38-02
Number of trials: 2/2 (2 ERROR)
| Trial name | status | loc |
|---|---|---|
| training_function_91209_00000 | ERROR | 172.18.71.208:626 |
| training_function_91209_00001 | ERROR | 172.18.71.208:624 |
Number of errored trials: 2
| Trial name | # failures | error file |
|---|---|---|
| training_function_91209_00000 | 1 | /home/palo/ray_results/training_function_2021-11-08_10-38-02/training_function_91209_00000_0_2021-11-08_10-38-02/error.txt |
| training_function_91209_00001 | 1 | /home/palo/ray_results/training_function_2021-11-08_10-38-02/training_function_91209_00001_1_2021-11-08_10-38-02/error.txt |
--------------------------------------------------------------------------- TuneError Traceback (most recent call last) /tmp/ipykernel_409/1918576441.py in <module> 37 ) 38 ---> 39 analysis = tune.run( 40 tune.with_parameters(training_function, train=lgbtrain, valid=lgbvalid), 41 # resources_per_trial={"cpu": 4, "gpu": 0}, ~/miniconda3/lib/python3.8/site-packages/ray/tune/tune.py in run(run_or_experiment, name, metric, mode, stop, time_budget_s, config, resources_per_trial, num_samples, local_dir, search_alg, scheduler, keep_checkpoints_num, checkpoint_score_attr, checkpoint_freq, checkpoint_at_end, verbose, progress_reporter, log_to_file, trial_name_creator, trial_dirname_creator, sync_config, export_formats, max_failures, fail_fast, restore, server_port, resume, queue_trials, reuse_actors, trial_executor, raise_on_failed_trial, callbacks, max_concurrent_trials, loggers, _remote) 622 if incomplete_trials: 623 if raise_on_failed_trial and not state[signal.SIGINT]: --> 624 raise TuneError("Trials did not complete", incomplete_trials) 625 else: 626 logger.error("Trials did not complete: %s", incomplete_trials) TuneError: ('Trials did not complete', [training_function_91209_00000, training_function_91209_00001])
In [40]:
Copied!
analysis.trial_dataframes
analysis.trial_dataframes
Train best params model¶
In [ ]:
Copied!
runtime = time() - start
print("Optimization time:\n{}".format(runtime))
params = copy(analysis.get_best_config(ray_metric, "min"))
params.pop("wandb")
# params["n_estimators"] = 1000
start = time()
model = lgbm.train(
params,
flgbtrain,
valid_sets=[lgbtest],
callbacks=[lgbm.log_evaluation(show_stdv=False)],
)
runtime = time() - start
print("Final model training time:\n{}".format(str(datetime.timedelta(seconds=runtime))))a
runtime = time() - start
print("Optimization time:\n{}".format(runtime))
params = copy(analysis.get_best_config(ray_metric, "min"))
params.pop("wandb")
# params["n_estimators"] = 1000
start = time()
model = lgbm.train(
params,
flgbtrain,
valid_sets=[lgbtest],
callbacks=[lgbm.log_evaluation(show_stdv=False)],
)
runtime = time() - start
print("Final model training time:\n{}".format(str(datetime.timedelta(seconds=runtime))))a
Tensorboard visualization¶
In [ ]:
Copied!
from tensorboard import notebook
notebook.list()
from tensorboard import notebook
notebook.list()
In [ ]:
Copied!
%load_ext tensorboard
%tensorboard --logdir ~/ray_results
%load_ext tensorboard
%tensorboard --logdir ~/ray_results
Deep Learning Models¶
In [ ]:
Copied!
X_train, X_valid, tab_preprocessor = utils.dl_train_prep(
data_train=df_train_scaled,
data_valid=df_valid_scaled,
identifier=identifier,
cont_cols=cont_cols,
target_col=target,
)
test_n_valid_combined = True
# X_tab_test = tab_preprocessor.transform(data_test_scaled).astype(float)
if test_n_valid_combined:
test = pd.concat([df_valid_scaled, df_test_scaled]).reset_index(drop=True)
else:
test = df_test_scaled.copy()
X_test = {"X_tab": tab_preprocessor.transform(test)}
n_classes = pd.concat([df_train_scaled, df_valid_scaled, df_test_scaled])[target].nunique()
metrics = utils.dl_metrics(n_classes)
input_layer = len(tab_preprocessor.continuous_cols)
output_layer = n_classes
hidden_layers = utils.dl_design(
input_layer, 2, output_layer, design="funnel"
).hidden_layers()
X_train, X_valid, tab_preprocessor = utils.dl_train_prep(
data_train=df_train_scaled,
data_valid=df_valid_scaled,
identifier=identifier,
cont_cols=cont_cols,
target_col=target,
)
test_n_valid_combined = True
# X_tab_test = tab_preprocessor.transform(data_test_scaled).astype(float)
if test_n_valid_combined:
test = pd.concat([df_valid_scaled, df_test_scaled]).reset_index(drop=True)
else:
test = df_test_scaled.copy()
X_test = {"X_tab": tab_preprocessor.transform(test)}
n_classes = pd.concat([df_train_scaled, df_valid_scaled, df_test_scaled])[target].nunique()
metrics = utils.dl_metrics(n_classes)
input_layer = len(tab_preprocessor.continuous_cols)
output_layer = n_classes
hidden_layers = utils.dl_design(
input_layer, 2, output_layer, design="funnel"
).hidden_layers()
TabMLP¶
In [ ]:
Copied!
deeptabular_net = TabMlp(
mlp_hidden_dims=hidden_layers,
column_idx=tab_preprocessor.column_idx,
continuous_cols=tab_preprocessor.continuous_cols,
mlp_batchnorm=True,
mlp_batchnorm_last=True,
mlp_linear_first=True,
)
model = WideDeep(deeptabular=deeptabular_net, pred_dim=output_layer)
model
deeptabular_net = TabMlp(
mlp_hidden_dims=hidden_layers,
column_idx=tab_preprocessor.column_idx,
continuous_cols=tab_preprocessor.continuous_cols,
mlp_batchnorm=True,
mlp_batchnorm_last=True,
mlp_linear_first=True,
)
model = WideDeep(deeptabular=deeptabular_net, pred_dim=output_layer)
model
In [ ]:
Copied!
# Initializers/Optimizers/Schedulers/callbacks
initializers = {"deeptabular": XavierNormal}
deeptab_opt = NAdam(model.deeptabular.parameters(), lr=0.001)
deeptab_sch = lr_scheduler.StepLR(deeptab_opt, step_size=5)
optimizers = {"deeptabular": deeptab_opt}
schedulers = {"deeptabular": deeptab_sch}
early_stopping = EarlyStopping()
model_checkpoint = ModelCheckpoint(save_best_only=True, verbose=1)
# Initializers/Optimizers/Schedulers/callbacks
initializers = {"deeptabular": XavierNormal}
deeptab_opt = NAdam(model.deeptabular.parameters(), lr=0.001)
deeptab_sch = lr_scheduler.StepLR(deeptab_opt, step_size=5)
optimizers = {"deeptabular": deeptab_opt}
schedulers = {"deeptabular": deeptab_sch}
early_stopping = EarlyStopping()
model_checkpoint = ModelCheckpoint(save_best_only=True, verbose=1)
In [ ]:
Copied!
%%time
objective = "multiclass_focal_loss"
dataloader = DataLoaderImbalanced
trainer = Trainer(
model,
objective=objective,
callbacks=[early_stopping, model_checkpoint],
lr_schedulers=schedulers,
initializers=initializers,
optimizers=optimizers,
metrics=metrics,
)
trainer.fit(
X_train=X_train,
X_val=X_valid,
n_epochs=50,
batch_size=1000,
custom_dataloader=dataloader,
oversample_mul=5,
)
%%time
objective = "multiclass_focal_loss"
dataloader = DataLoaderImbalanced
trainer = Trainer(
model,
objective=objective,
callbacks=[early_stopping, model_checkpoint],
lr_schedulers=schedulers,
initializers=initializers,
optimizers=optimizers,
metrics=metrics,
)
trainer.fit(
X_train=X_train,
X_val=X_valid,
n_epochs=50,
batch_size=1000,
custom_dataloader=dataloader,
oversample_mul=5,
)
In [ ]:
Copied!
actual = test[target_ltv]
predicted = trainer.predict(**X_test)
#predicted_mc = trainer.predict_uncertainty(**X_test, uncertainty_granularity=10)[:, -1],
classification_report(predicted, actual)
actual = test[target_ltv]
predicted = trainer.predict(**X_test)
#predicted_mc = trainer.predict_uncertainty(**X_test, uncertainty_granularity=10)[:, -1],
classification_report(predicted, actual)
Transformers¶
In [ ]:
Copied!
model = SAINT(
input_dim=input_layer,
column_idx=tab_preprocessor.column_idx,
continuous_cols=tab_preprocessor.continuous_cols,
)
model
model = SAINT(
input_dim=input_layer,
column_idx=tab_preprocessor.column_idx,
continuous_cols=tab_preprocessor.continuous_cols,
)
model
In [ ]:
Copied!
# Initializers/Optimizers/Schedulers/callbacks
initializers = {"deeptabular": XavierNormal}
deeptab_opt = NAdam(model.deeptabular.parameters(), lr=0.001)
deeptab_sch = lr_scheduler.StepLR(deeptab_opt, step_size=5)
optimizers = {"deeptabular": deeptab_opt}
schedulers = {"deeptabular": deeptab_sch}
early_stopping = EarlyStopping()
model_checkpoint = ModelCheckpoint(save_best_only=True, verbose=1)
# Initializers/Optimizers/Schedulers/callbacks
initializers = {"deeptabular": XavierNormal}
deeptab_opt = NAdam(model.deeptabular.parameters(), lr=0.001)
deeptab_sch = lr_scheduler.StepLR(deeptab_opt, step_size=5)
optimizers = {"deeptabular": deeptab_opt}
schedulers = {"deeptabular": deeptab_sch}
early_stopping = EarlyStopping()
model_checkpoint = ModelCheckpoint(save_best_only=True, verbose=1)
In [ ]:
Copied!
%%time
objective = "multiclass_focal_loss"
dataloader = DataLoaderImbalanced
trainer = Trainer(
model,
objective=objective,
callbacks=[early_stopping, model_checkpoint],
lr_schedulers=schedulers,
initializers=initializers,
optimizers=optimizers,
metrics=metrics,
)
trainer.fit(
X_train=X_train,
X_val=X_valid,
n_epochs=50,
batch_size=1000,
custom_dataloader=dataloader,
oversample_mul=5,
)
%%time
objective = "multiclass_focal_loss"
dataloader = DataLoaderImbalanced
trainer = Trainer(
model,
objective=objective,
callbacks=[early_stopping, model_checkpoint],
lr_schedulers=schedulers,
initializers=initializers,
optimizers=optimizers,
metrics=metrics,
)
trainer.fit(
X_train=X_train,
X_val=X_valid,
n_epochs=50,
batch_size=1000,
custom_dataloader=dataloader,
oversample_mul=5,
)
In [ ]:
Copied!
actual = test[target_ltv]
predicted = trainer.predict(**X_test)
#predicted_mc = trainer.predict_uncertainty(**X_test, uncertainty_granularity=10)[:, -1],
classification_report(predicted, actual)
actual = test[target_ltv]
predicted = trainer.predict(**X_test)
#predicted_mc = trainer.predict_uncertainty(**X_test, uncertainty_granularity=10)[:, -1],
classification_report(predicted, actual)
Bayes¶
In [ ]:
Copied!
model = BayesianTabMlp(
mlp_hidden_dims=hidden_layers,
column_idx=tab_preprocessor.column_idx,
continuous_cols = tab_preprocessor.continuous_cols,
)
model
model = BayesianTabMlp(
mlp_hidden_dims=hidden_layers,
column_idx=tab_preprocessor.column_idx,
continuous_cols = tab_preprocessor.continuous_cols,
)
model
In [ ]:
Copied!
# Initializers/Optimizers/Schedulers/callbacks
initializers = {"deeptabular": XavierNormal}
deeptab_opt = NAdam(model.deeptabular.parameters(), lr=0.001)
deeptab_sch = lr_scheduler.StepLR(deeptab_opt, step_size=5)
optimizers = {"deeptabular": deeptab_opt}
schedulers = {"deeptabular": deeptab_sch}
early_stopping = EarlyStopping()
model_checkpoint = ModelCheckpoint(save_best_only=True, verbose=1)
# Initializers/Optimizers/Schedulers/callbacks
initializers = {"deeptabular": XavierNormal}
deeptab_opt = NAdam(model.deeptabular.parameters(), lr=0.001)
deeptab_sch = lr_scheduler.StepLR(deeptab_opt, step_size=5)
optimizers = {"deeptabular": deeptab_opt}
schedulers = {"deeptabular": deeptab_sch}
early_stopping = EarlyStopping()
model_checkpoint = ModelCheckpoint(save_best_only=True, verbose=1)
In [ ]:
Copied!
%%time
objective = "multiclass_focal_loss"
dataloader = DataLoaderImbalanced
trainer = Trainer(
model,
objective=objective,
callbacks=[early_stopping, model_checkpoint],
lr_schedulers=schedulers,
initializers=initializers,
optimizers=optimizers,
metrics=metrics,
)
trainer.fit(
X_train=X_train,
X_val=X_valid,
n_epochs=50,
batch_size=1000,
custom_dataloader=dataloader,
oversample_mul=5,
)
%%time
objective = "multiclass_focal_loss"
dataloader = DataLoaderImbalanced
trainer = Trainer(
model,
objective=objective,
callbacks=[early_stopping, model_checkpoint],
lr_schedulers=schedulers,
initializers=initializers,
optimizers=optimizers,
metrics=metrics,
)
trainer.fit(
X_train=X_train,
X_val=X_valid,
n_epochs=50,
batch_size=1000,
custom_dataloader=dataloader,
oversample_mul=5,
)
In [ ]:
Copied!
actual = test[target_ltv]
predicted = trainer.predict(**X_test)
#predicted_mc = trainer.predict_uncertainty(**X_test, uncertainty_granularity=10)[:, -1],
classification_report(predicted, actual)
actual = test[target_ltv]
predicted = trainer.predict(**X_test)
#predicted_mc = trainer.predict_uncertainty(**X_test, uncertainty_granularity=10)[:, -1],
classification_report(predicted, actual)
w RayTune¶
In [ ]:
Copied!
%%time
# Optimizers
deep_opt_sgd_01 = SGD(model.deeptabular.parameters(), lr=0.1)
deep_opt_sgd_001 = SGD(model.deeptabular.parameters(), lr=0.01)
deep_opt_adam_01 = Adam(model.deeptabular.parameters(), lr=0.1)
deep_opt_adam_001 = Adam(model.deeptabular.parameters(), lr=0.01)
# LR Schedulers
deep_sch_StepLR5 = lr_scheduler.StepLR(deep_opt, step_size=5)
deep_sch_StepLR10 = lr_scheduler.StepLR(deep_opt, step_size=10)
input_layer = len(tab_preprocessor.continuous_cols)
output_layer = n_classes
hidden_layers2 = utils.dl_design(input_layer, 2, output_layer, design="funnel")
hidden_layers3 = utils.dl_design(input_layer, 3, output_layer, design="funnel")
hidden_layers5 = utils.dl_design(input_layer, 5, output_layer, design="funnel")
hidden_layers10 = utils.dl_design(input_layer, 10, output_layer, design="funnel")
config = {
"batch_size": tune.grid_search([100, 1000, 10000]),
"deeptab_opt": tune.grid_search(
[
deep_opt_sgd_01,
deep_opt_adam_01,
deep_opt_sgd_001,
deep_opt_adam_001,
]
),
"deeptab_sch": tune.grid_search([deep_sch_StepLR5]), # , deep_sch_StepLR10]),
"hidden_layers": tune.grid_search(
[hidden_layers2, hidden_layers3, hidden_layers5, hidden_layers10]
),
"wandb": {
"project": "dl_gm",
"api_key_file": "/home/jovyan/repos/pltv/data/wandb_api.key",
},
}
objective = "multiclass_focal_loss"
dataloader = DataLoaderImbalanced
@wandb_mixin
def training_function(config, X_train, X_val):
early_stopping = EarlyStopping()
model_checkpoint = ModelCheckpoint(save_best_only=True, wb=wandb)
deeptabular = TabMlp(
mlp_hidden_dims=config["hidden_layers"].hidden_layers(),
column_idx=tab_preprocessor.column_idx,
embed_input=tab_preprocessor.embeddings_input,
continuous_cols=tab_preprocessor.continuous_cols,
mlp_batchnorm=True,
mlp_batchnorm_last=True,
mlp_linear_first=True,
)
model = WideDeep(wide=wide, deeptabular=deeptabular)
trainer = Trainer(
model,
objective=objective,
callbacks=[RayTuneReporter, early_stopping, model_checkpoint],
lr_schedulers={"deeptabular": config["deeptab_sch"]},
initializers={"deeptabular": XavierNormal},
optimizers={"deeptabular": config["deeptab_opt"]},
metrics=metrics,
verbose=0,
)
trainer.fit(
X_train=X_train,
X_val=X_val,
n_epochs=50,
batch_size=config["batch_size"],
custom_dataloader=dataloader,
oversample_mul=5,
)
# https://docs.ray.io/en/latest/tune/api_docs/schedulers.html#tune-scheduler-hyperband
asha_scheduler = AsyncHyperBandScheduler(
time_attr="training_iteration",
metric="_metric/val_loss",
mode="min",
max_t=100,
grace_period=10,
reduction_factor=3,
brackets=1,
)
analysis = tune.run(
tune.with_parameters(training_function, X_train=X_train, X_val=X_valid),
# resources_per_trial={"cpu": 4, "gpu": 0},
num_samples=1,
progress_reporter=JupyterNotebookReporter(overwrite=True),
scheduler=asha_scheduler,
config=config,
callbacks=[
WandbLoggerCallback(
project=config["wandb"]["project"],
api_key_file=config["wandb"]["api_key_file"],
log_config=True,
)
],
)
%%time
# Optimizers
deep_opt_sgd_01 = SGD(model.deeptabular.parameters(), lr=0.1)
deep_opt_sgd_001 = SGD(model.deeptabular.parameters(), lr=0.01)
deep_opt_adam_01 = Adam(model.deeptabular.parameters(), lr=0.1)
deep_opt_adam_001 = Adam(model.deeptabular.parameters(), lr=0.01)
# LR Schedulers
deep_sch_StepLR5 = lr_scheduler.StepLR(deep_opt, step_size=5)
deep_sch_StepLR10 = lr_scheduler.StepLR(deep_opt, step_size=10)
input_layer = len(tab_preprocessor.continuous_cols)
output_layer = n_classes
hidden_layers2 = utils.dl_design(input_layer, 2, output_layer, design="funnel")
hidden_layers3 = utils.dl_design(input_layer, 3, output_layer, design="funnel")
hidden_layers5 = utils.dl_design(input_layer, 5, output_layer, design="funnel")
hidden_layers10 = utils.dl_design(input_layer, 10, output_layer, design="funnel")
config = {
"batch_size": tune.grid_search([100, 1000, 10000]),
"deeptab_opt": tune.grid_search(
[
deep_opt_sgd_01,
deep_opt_adam_01,
deep_opt_sgd_001,
deep_opt_adam_001,
]
),
"deeptab_sch": tune.grid_search([deep_sch_StepLR5]), # , deep_sch_StepLR10]),
"hidden_layers": tune.grid_search(
[hidden_layers2, hidden_layers3, hidden_layers5, hidden_layers10]
),
"wandb": {
"project": "dl_gm",
"api_key_file": "/home/jovyan/repos/pltv/data/wandb_api.key",
},
}
objective = "multiclass_focal_loss"
dataloader = DataLoaderImbalanced
@wandb_mixin
def training_function(config, X_train, X_val):
early_stopping = EarlyStopping()
model_checkpoint = ModelCheckpoint(save_best_only=True, wb=wandb)
deeptabular = TabMlp(
mlp_hidden_dims=config["hidden_layers"].hidden_layers(),
column_idx=tab_preprocessor.column_idx,
embed_input=tab_preprocessor.embeddings_input,
continuous_cols=tab_preprocessor.continuous_cols,
mlp_batchnorm=True,
mlp_batchnorm_last=True,
mlp_linear_first=True,
)
model = WideDeep(wide=wide, deeptabular=deeptabular)
trainer = Trainer(
model,
objective=objective,
callbacks=[RayTuneReporter, early_stopping, model_checkpoint],
lr_schedulers={"deeptabular": config["deeptab_sch"]},
initializers={"deeptabular": XavierNormal},
optimizers={"deeptabular": config["deeptab_opt"]},
metrics=metrics,
verbose=0,
)
trainer.fit(
X_train=X_train,
X_val=X_val,
n_epochs=50,
batch_size=config["batch_size"],
custom_dataloader=dataloader,
oversample_mul=5,
)
# https://docs.ray.io/en/latest/tune/api_docs/schedulers.html#tune-scheduler-hyperband
asha_scheduler = AsyncHyperBandScheduler(
time_attr="training_iteration",
metric="_metric/val_loss",
mode="min",
max_t=100,
grace_period=10,
reduction_factor=3,
brackets=1,
)
analysis = tune.run(
tune.with_parameters(training_function, X_train=X_train, X_val=X_valid),
# resources_per_trial={"cpu": 4, "gpu": 0},
num_samples=1,
progress_reporter=JupyterNotebookReporter(overwrite=True),
scheduler=asha_scheduler,
config=config,
callbacks=[
WandbLoggerCallback(
project=config["wandb"]["project"],
api_key_file=config["wandb"]["api_key_file"],
log_config=True,
)
],
)
Train the Best model¶
In [ ]:
Copied!
%%time
params = copy(analysis.get_best_config("_metric/val_loss", "min"))
params.pop("wandb")
trainer = Trainer(
model,
objective=objective,
callbacks=[LRHistory(n_epochs=10)],
lr_schedulers={"wide": params["wide_sch"], "deeptabular": params["deeptab_sch"]},
initializers={"wide": XavierNormal, "deeptabular": XavierNormal},
optimizers={"wide": params["wide_opt"], "deeptabular": params["deeptab_opt"]},
metrics=metrics,
verbose=0,
)
trainer.fit(
X_train=X_train,
X_val=X_val,
n_epochs=5,
batch_size=params["batch_size"],
custom_dataloader=dataloader,
oversample_mul=5,
)
%%time
params = copy(analysis.get_best_config("_metric/val_loss", "min"))
params.pop("wandb")
trainer = Trainer(
model,
objective=objective,
callbacks=[LRHistory(n_epochs=10)],
lr_schedulers={"wide": params["wide_sch"], "deeptabular": params["deeptab_sch"]},
initializers={"wide": XavierNormal, "deeptabular": XavierNormal},
optimizers={"wide": params["wide_opt"], "deeptabular": params["deeptab_opt"]},
metrics=metrics,
verbose=0,
)
trainer.fit(
X_train=X_train,
X_val=X_val,
n_epochs=5,
batch_size=params["batch_size"],
custom_dataloader=dataloader,
oversample_mul=5,
)