1. Libraries¶
In [ ]:
Copied!
import pandas as pd
# https://xgboost.readthedocs.io/en/latest/
import xgboost
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
# https://scikit-learn.org/stable/modules/svm.html
from sklearn import svm
# https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegressionCV.html#sklearn.linear_model.LogisticRegressionCV
from sklearn.linear_model import LogisticRegression
#https://scikit-learn.org/stable/modules/classes.html#module-sklearn.metrics
# defining scoring strategy:
# https://scikit-learn.org/stable/modules/model_evaluation.html#defining-your-scoring-strategy-from-metric-functions
# scoring needs to be changed with string, ie : LogisticRegressionCV(cv=10, random_state=0,multi_class='multinomial', scoring="f1_score").fit(samples, labels)
# https://scikit-learn.org/stable/modules/cross_validation.html
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
# https://xgboost.readthedocs.io/en/latest/
import xgboost
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
# https://scikit-learn.org/stable/modules/svm.html
from sklearn import svm
# https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegressionCV.html#sklearn.linear_model.LogisticRegressionCV
from sklearn.linear_model import LogisticRegression
#https://scikit-learn.org/stable/modules/classes.html#module-sklearn.metrics
# defining scoring strategy:
# https://scikit-learn.org/stable/modules/model_evaluation.html#defining-your-scoring-strategy-from-metric-functions
# scoring needs to be changed with string, ie : LogisticRegressionCV(cv=10, random_state=0,multi_class='multinomial', scoring="f1_score").fit(samples, labels)
# https://scikit-learn.org/stable/modules/cross_validation.html
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
2. Dataset¶
In [ ]:
Copied!
dataset_scaled = pd.read_csv('Tennessee_Event-Driven/datasets/dataset_standard_scaled.csv',index_col=False)
dataset_scaled = pd.read_csv('Tennessee_Event-Driven/datasets/dataset_standard_scaled.csv',index_col=False)
In [ ]:
Copied!
samples = dataset_scaled[dataset_scaled.columns[:-1]].values
labels = dataset_scaled['fault_id'].values
samples = dataset_scaled[dataset_scaled.columns[:-1]].values
labels = dataset_scaled['fault_id'].values
In [ ]:
Copied!
samples_train, samples_test, labels_train, labels_test = train_test_split(samples, labels, test_size=0.1)
samples_train, samples_test, labels_train, labels_test = train_test_split(samples, labels, test_size=0.1)
3. Classificators¶
In [ ]:
Copied!
# logistic regression
# For multiclass problems, only ‘newton-cg’, ‘sag’, ‘saga’ and ‘lbfgs’ handle multinomial loss; ‘liblinear’ is limited to one-versus-rest schemes.
# solver = ?
LR_clf = LogisticRegression(multi_class='multinomial', solver='lbfgs')
# vdt
LRscores = cross_val_score(LR_clf, samples, labels, cv=10, scoring='f1_weighted')
LR_clf.fit(samples_train,labels_train)
LR_predicted = LR_clf.predict(samples_test)
print('LR 10CV f1_weighted scores : ' + str(LRscores))
print('LR classification report :\n' + str(classification_report(labels_test, LR_predicted)))
print('LR confusion matrix :\n' + str(confusion_matrix(labels_test, LR_predicted)))
# SVM
# about gamma='scale' issue : https://stackoverflow.com/questions/52582796/support-vector-regression-typeerror-must-be-real-number-not-str
SVM_clf = svm.SVC(decision_function_shape='ovo')
SVMscores = cross_val_score(SVM_clf, samples, labels, cv=10, scoring='f1_weighted')
SVM_clf.fit(samples_train,labels_train)
SVM_predicted = SVM_clf.predict(samples_test)
print('SVM 10CV f1_weighted scores : ' + str(SVMscores))
print('SVM classification report :\n' + str(classification_report(labels_test, SVM_predicted)))
print('SVM confusion matrix :\n' + str(confusion_matrix(labels_test, SVM_predicted)))
# xgboost
XGBOOST_clf = xgboost.XGBClassifier()
XGBOOSTscores = cross_val_score(XGBOOST_clf, samples, labels, cv=10, scoring='f1_weighted')
XGBOOST_clf.fit(samples_train,labels_train)
XGBOOST_predicted = XGBOOST_clf.predict(samples_test)
print('XGBOOST 10CV f1_weighted scores : ' + str(XGBOOSTscores))
print('XGBOOST classification report :\n' + str(classification_report(labels_test, XGBOOST_predicted)))
print('XGBOOST confusion matrix :\n' + str(confusion_matrix(labels_test, XGBOOST_predicted)))
print('XGBOOST features importances :\n' + str(XGBOOST_clf.feature_importances_))
# Random Forest
RF_clf = RandomForestClassifier()
RFscores = cross_val_score(RF_clf, samples, labels, cv=10, scoring='f1_weighted')
RF_clf.fit(samples_train,labels_train)
RF_predicted = RF_clf.predict(samples_test)
print('Random Forest 10CV f1_weighted scores : ' + str(RFscores))
print('Random Forest classification report :\n' + str(classification_report(labels_test, RF_predicted)))
print('Random Forest confusion matrix :\n' + str(confusion_matrix(labels_test, RF_predicted)))
print('Random Forest features importances :\n' + str(RF_clf.feature_importances_))
# logistic regression
# For multiclass problems, only ‘newton-cg’, ‘sag’, ‘saga’ and ‘lbfgs’ handle multinomial loss; ‘liblinear’ is limited to one-versus-rest schemes.
# solver = ?
LR_clf = LogisticRegression(multi_class='multinomial', solver='lbfgs')
# vdt
LRscores = cross_val_score(LR_clf, samples, labels, cv=10, scoring='f1_weighted')
LR_clf.fit(samples_train,labels_train)
LR_predicted = LR_clf.predict(samples_test)
print('LR 10CV f1_weighted scores : ' + str(LRscores))
print('LR classification report :\n' + str(classification_report(labels_test, LR_predicted)))
print('LR confusion matrix :\n' + str(confusion_matrix(labels_test, LR_predicted)))
# SVM
# about gamma='scale' issue : https://stackoverflow.com/questions/52582796/support-vector-regression-typeerror-must-be-real-number-not-str
SVM_clf = svm.SVC(decision_function_shape='ovo')
SVMscores = cross_val_score(SVM_clf, samples, labels, cv=10, scoring='f1_weighted')
SVM_clf.fit(samples_train,labels_train)
SVM_predicted = SVM_clf.predict(samples_test)
print('SVM 10CV f1_weighted scores : ' + str(SVMscores))
print('SVM classification report :\n' + str(classification_report(labels_test, SVM_predicted)))
print('SVM confusion matrix :\n' + str(confusion_matrix(labels_test, SVM_predicted)))
# xgboost
XGBOOST_clf = xgboost.XGBClassifier()
XGBOOSTscores = cross_val_score(XGBOOST_clf, samples, labels, cv=10, scoring='f1_weighted')
XGBOOST_clf.fit(samples_train,labels_train)
XGBOOST_predicted = XGBOOST_clf.predict(samples_test)
print('XGBOOST 10CV f1_weighted scores : ' + str(XGBOOSTscores))
print('XGBOOST classification report :\n' + str(classification_report(labels_test, XGBOOST_predicted)))
print('XGBOOST confusion matrix :\n' + str(confusion_matrix(labels_test, XGBOOST_predicted)))
print('XGBOOST features importances :\n' + str(XGBOOST_clf.feature_importances_))
# Random Forest
RF_clf = RandomForestClassifier()
RFscores = cross_val_score(RF_clf, samples, labels, cv=10, scoring='f1_weighted')
RF_clf.fit(samples_train,labels_train)
RF_predicted = RF_clf.predict(samples_test)
print('Random Forest 10CV f1_weighted scores : ' + str(RFscores))
print('Random Forest classification report :\n' + str(classification_report(labels_test, RF_predicted)))
print('Random Forest confusion matrix :\n' + str(confusion_matrix(labels_test, RF_predicted)))
print('Random Forest features importances :\n' + str(RF_clf.feature_importances_))
3.1. Results¶
LR 10CV f1_weighted scores :
[0.42448604 0.45865517 0.47881965 0.24104686 0.25607518 0.43588939 0.46481111 0.48854089 0.49014324 0.41764563]
LR classification report :
precision recall f1-score support
0 0.11 0.07 0.09 135
1 0.94 0.84 0.89 151
2 0.88 0.81 0.84 130
3 0.12 0.14 0.13 137
4 0.66 0.90 0.76 153
5 0.88 0.92 0.90 137
6 1.00 0.89 0.94 164
7 0.98 0.91 0.94 139
8 0.28 0.41 0.33 151
9 0.12 0.11 0.12 134
10 0.54 0.39 0.45 161
11 0.05 0.07 0.06 134
12 0.29 0.27 0.28 138
13 0.46 0.51 0.49 139
14 0.03 0.01 0.02 134
15 0.12 0.14 0.13 132
16 0.62 0.33 0.43 138
17 0.72 0.71 0.71 154
18 0.87 0.81 0.84 161
19 0.23 0.19 0.21 160
20 0.54 0.61 0.58 147
21 0.27 0.43 0.33 141
accuracy 0.49 3170
macro avg 0.49 0.48 0.48 3170
weighted avg 0.50 0.49 0.49 3170
LR confusion matrix :
[[ 10 0 0 25 0 0 0 0 16 9 1 16 3 4 7 13 1 0 2 12 1 15]
[ 3 127 0 1 0 0 0 0 0 1 1 4 0 2 2 5 1 0 0 2 0 2]
[ 1 0 105 2 0 0 0 0 1 3 1 4 0 0 2 1 1 0 0 3 2 4]
[ 9 0 0 19 0 0 0 0 12 7 6 21 4 5 7 14 0 0 2 13 2 16]
[ 0 0 0 4 138 0 0 0 1 3 1 2 0 0 0 2 0 0 0 1 1 0]
[ 0 0 0 3 0 126 0 0 0 1 1 3 0 0 0 0 0 0 0 1 0 2]
[ 1 0 0 1 0 0 146 0 3 3 0 3 1 0 1 0 0 0 0 0 0 5]
[ 1 0 0 0 0 0 0 127 1 2 0 1 0 0 1 5 0 0 0 0 0 1]
[ 2 7 14 2 1 0 0 0 62 1 4 5 17 12 1 3 4 1 0 4 10 1]
[ 9 0 0 12 0 0 0 0 12 15 4 18 6 2 7 10 2 0 2 9 7 19]
[ 3 0 0 8 0 0 0 0 9 9 63 10 10 5 2 14 0 0 0 6 4 18]
[ 6 0 0 5 28 0 0 1 15 8 3 9 3 8 2 16 1 5 1 9 4 10]
[ 2 0 0 10 0 15 0 1 13 5 3 5 37 21 1 5 1 0 9 4 2 4]
[ 2 0 1 2 2 0 0 0 14 0 0 5 13 71 2 2 10 0 1 1 0 13]
[ 4 1 0 3 33 0 0 0 4 21 3 8 1 0 2 7 2 37 0 4 2 2]
[ 10 0 0 14 0 0 0 1 17 7 8 10 2 8 6 19 1 0 1 7 2 19]
[ 7 0 0 12 0 0 0 0 7 4 7 6 4 9 4 10 45 0 0 11 2 10]
[ 1 0 0 1 6 0 0 0 3 6 1 6 5 0 1 3 0 109 0 4 3 5]
[ 2 0 0 5 0 3 0 0 7 1 1 4 1 0 0 4 0 0 130 1 0 2]
[ 10 0 0 11 0 0 0 0 14 9 3 6 18 0 3 11 4 0 1 31 30 9]
[ 1 0 0 8 0 0 0 0 5 3 2 13 1 1 6 4 0 0 1 5 90 7]
[ 3 0 0 6 1 0 0 0 6 7 3 14 2 5 9 15 0 0 0 5 4 61]]
SVM 10CV f1_weighted scores :
[0.5435794 0.6021387 0.5920755 0.30431432 0.32529879 0.5764909 0.59781791 0.62112622 0.63663635 0.66662602]
SVM classification report :
precision recall f1-score support
0 0.13 0.27 0.17 135
1 1.00 0.83 0.91 151
2 1.00 0.81 0.89 130
3 0.13 0.28 0.17 137
4 0.87 0.88 0.87 153
5 0.96 0.88 0.92 137
6 1.00 0.89 0.94 164
7 1.00 0.91 0.95 139
8 0.98 0.87 0.92 151
9 0.14 0.24 0.18 134
10 0.53 0.30 0.38 161
11 0.36 0.19 0.25 134
12 0.95 0.77 0.85 138
13 0.97 0.85 0.90 139
14 0.98 0.71 0.82 134
15 0.13 0.33 0.18 132
16 0.40 0.18 0.25 138
17 0.88 0.75 0.81 154
18 0.99 0.78 0.88 161
19 0.60 0.56 0.58 160
20 0.83 0.50 0.62 147
21 0.93 0.27 0.42 141
accuracy 0.60 3170
macro avg 0.72 0.59 0.63 3170
weighted avg 0.72 0.60 0.64 3170
SVM confusion matrix :
[[ 37 0 0 36 0 0 0 0 0 19 1 3 0 0 0 27 4 0 0 5 3 0]
[ 6 126 0 5 0 0 0 0 3 3 0 1 0 0 0 5 1 0 0 1 0 0]
[ 6 0 105 8 0 0 0 0 0 3 0 2 0 0 0 5 0 0 0 1 0 0]
[ 22 0 0 39 0 0 0 0 0 24 2 4 0 0 0 35 2 0 0 6 2 1]
[ 3 0 0 4 135 0 0 0 0 4 0 2 0 0 0 4 0 0 0 1 0 0]
[ 2 0 0 3 0 121 0 0 0 0 0 1 0 0 0 4 1 0 0 5 0 0]
[ 3 0 0 0 0 0 146 0 0 6 0 2 0 0 0 6 0 0 0 0 0 1]
[ 1 0 0 2 0 0 0 127 0 3 1 0 0 0 0 5 0 0 0 0 0 0]
[ 4 0 0 3 0 0 0 0 132 3 1 1 2 1 0 2 2 0 0 0 0 0]
[ 29 0 0 20 0 0 0 0 0 32 4 5 0 0 0 36 3 0 0 5 0 0]
[ 23 0 0 25 0 0 0 0 0 13 48 4 0 1 0 25 10 0 0 7 5 0]
[ 21 0 0 21 21 0 0 0 0 10 4 26 0 0 1 20 4 1 0 5 0 0]
[ 6 0 0 5 0 4 0 0 0 4 4 1 106 2 0 4 1 0 1 0 0 0]
[ 3 0 0 2 0 0 0 0 0 4 0 2 1 118 0 7 2 0 0 0 0 0]
[ 4 0 0 3 0 0 0 0 0 6 0 4 0 0 95 5 0 15 0 1 1 0]
[ 34 0 0 26 0 0 0 0 0 19 1 1 0 0 0 43 2 0 0 5 1 0]
[ 21 0 0 26 0 0 0 0 0 14 14 3 0 0 0 26 25 0 0 6 2 1]
[ 5 0 0 10 0 0 0 0 0 7 1 1 0 0 1 10 1 116 0 2 0 0]
[ 6 0 0 9 0 0 0 0 0 5 0 1 3 0 0 7 1 0 126 3 0 0]
[ 17 0 0 22 0 0 0 0 0 17 1 1 0 0 0 11 1 0 0 90 0 0]
[ 13 0 0 18 0 1 0 0 0 13 6 2 0 0 0 18 0 0 0 3 73 0]
[ 22 0 0 24 0 0 0 0 0 14 2 5 0 0 0 28 2 0 0 5 1 38]]
XGBOOST 10CV f1_weighted scores :
[0.68392688 0.74462579 0.74093651 0.36879107 0.41598073 0.72445736 0.7336726 0.7663613 0.70552416 0.75007832]
XGBOOST classification report :
precision recall f1-score support
0 0.25 0.47 0.33 135
1 1.00 0.85 0.92 151
2 1.00 0.83 0.91 130
3 0.34 0.52 0.41 137
4 0.94 0.90 0.92 153
5 0.94 0.91 0.93 137
6 1.00 0.89 0.94 164
7 0.99 0.91 0.95 139
8 0.96 0.85 0.91 151
9 0.34 0.43 0.38 134
10 0.75 0.71 0.73 161
11 0.86 0.69 0.77 134
12 0.92 0.77 0.84 138
13 0.93 0.85 0.89 139
14 1.00 0.86 0.92 134
15 0.26 0.43 0.32 132
16 0.82 0.56 0.66 138
17 0.94 0.84 0.89 154
18 0.98 0.81 0.89 161
19 0.71 0.72 0.72 160
20 0.75 0.64 0.69 147
21 1.00 0.91 0.95 141
accuracy 0.75 3170
macro avg 0.80 0.74 0.77 3170
weighted avg 0.81 0.75 0.77 3170
XGBOOST confusion matrix :
[[ 63 0 0 23 0 0 0 0 0 22 1 2 0 0 0 17 1 0 0 6 0 0]
[ 11 129 0 2 0 0 0 0 2 1 0 0 0 0 0 5 0 0 0 1 0 0]
[ 12 0 108 2 0 0 0 0 0 2 0 0 0 0 0 2 0 0 0 2 2 0]
[ 17 0 0 71 0 1 0 0 0 16 2 2 0 0 0 16 2 1 1 6 2 0]
[ 3 0 0 2 137 0 0 0 0 1 0 2 0 0 0 7 0 0 0 1 0 0]
[ 5 0 0 1 0 125 0 0 0 0 0 0 0 0 0 4 0 1 0 1 0 0]
[ 8 0 0 1 0 0 146 0 0 2 0 0 0 0 0 7 0 0 0 0 0 0]
[ 5 0 0 0 0 0 0 127 0 4 0 0 0 0 0 3 0 0 0 0 0 0]
[ 4 0 0 3 0 0 0 0 129 3 1 0 4 5 0 1 0 0 0 1 0 0]
[ 15 0 0 18 0 0 0 0 0 58 3 1 0 0 0 26 2 1 0 6 4 0]
[ 9 0 0 10 0 0 0 0 0 6 114 0 1 0 0 12 7 0 0 1 1 0]
[ 10 0 0 7 8 0 0 0 0 7 0 93 0 0 0 5 0 1 0 1 2 0]
[ 6 0 0 2 0 2 0 1 2 1 3 0 106 4 0 9 0 0 0 1 1 0]
[ 7 0 0 1 0 0 0 0 1 1 0 0 4 118 0 4 2 0 0 1 0 0]
[ 5 0 0 3 0 0 0 0 0 2 0 0 0 0 115 4 0 4 0 1 0 0]
[ 24 0 0 17 0 0 0 0 0 14 7 2 0 0 0 57 1 0 0 5 5 0]
[ 7 0 0 12 0 1 0 0 0 7 16 0 0 0 0 10 77 0 0 2 6 0]
[ 6 0 0 2 0 0 0 0 0 6 0 0 0 0 0 7 0 130 0 1 2 0]
[ 4 0 0 3 0 4 0 0 0 5 0 0 0 0 0 8 0 0 131 5 1 0]
[ 13 0 0 14 0 0 0 0 0 3 0 5 0 0 0 4 1 0 0 115 5 0]
[ 14 0 0 8 0 0 0 0 0 10 4 1 0 0 0 10 1 0 1 4 94 0]
[ 4 0 0 4 0 0 0 0 0 2 0 0 0 0 0 2 0 0 0 0 1 128]]
XGBOOST features importances :
[0.12379248 0.0047626 0.00850815 0.04373371 0.0364475 0.00078652 0.01043513 0.01692818 0.03933102 0.10539821 0.01340495 0.0005318 0.01391175 0.0010541 0.00046664 0.01039928 0.0308312 0.01555407 0.02439173 0.01242347 0.0381037 0.0296824 0.00856636 0.0043253 0.00754108 0.00109454 0.00474678 0.00864964 0.00869501 0.00915563 0.01988467 0.00742988 0.01122067 0.0132043 0.00754636 0.00789573 0.00406679 0.00796529 0.00421074 0.00391345 0.00462407 0.00291873 0.00429754 0.03122465 0.10151894 0.02033001 0.02878321 0.00032342 0. 0.01245736 0.03271366 0.03981758]
Random Forest 10CV f1_weighted scores :
[0.69059684 0.74095688 0.74139596 0.34923431 0.42912053 0.71319971 0.7265349 0.76775074 0.67521654 0.72273346]
Random Forest classification report :
precision recall f1-score support
0 0.39 0.72 0.51 135
1 1.00 0.89 0.94 151
2 1.00 0.83 0.91 130
3 0.59 0.74 0.66 137
4 0.88 0.89 0.88 153
5 0.86 0.85 0.85 137
6 0.99 0.90 0.95 164
7 1.00 0.94 0.97 139
8 1.00 0.96 0.98 151
9 0.52 0.65 0.58 134
10 0.96 0.84 0.90 161
11 0.91 0.73 0.81 134
12 1.00 0.90 0.95 138
13 0.99 0.92 0.96 139
14 1.00 0.87 0.93 134
15 0.49 0.65 0.56 132
16 0.93 0.82 0.87 138
17 0.94 0.88 0.91 154
18 1.00 0.84 0.92 161
19 0.80 0.88 0.84 160
20 0.85 0.72 0.78 147
21 0.99 0.91 0.95 141
accuracy 0.84 3170
macro avg 0.87 0.83 0.85 3170
weighted avg 0.87 0.84 0.85 3170
Random Forest confusion matrix :
[[ 97 0 0 10 0 0 0 0 0 14 0 1 0 0 0 9 0 0 0 4 0 0]
[ 8 135 0 1 0 0 0 0 0 2 0 0 0 0 0 5 0 0 0 0 0 0]
[ 13 0 108 1 0 0 0 0 0 1 0 0 0 0 0 6 0 0 0 1 0 0]
[ 12 0 0 102 0 1 0 0 0 4 0 1 0 0 0 8 1 1 0 5 2 0]
[ 0 0 0 0 136 14 0 0 0 1 0 2 0 0 0 0 0 0 0 0 0 0]
[ 4 0 0 2 10 116 0 0 0 3 0 0 0 0 0 1 0 1 0 0 0 0]
[ 9 0 0 1 0 0 148 0 0 1 0 0 0 0 0 5 0 0 0 0 0 0]
[ 3 0 0 0 0 0 0 131 0 1 0 0 0 0 0 4 0 0 0 0 0 0]
[ 3 0 0 2 0 0 0 0 145 0 0 0 0 0 0 1 0 0 0 0 0 0]
[ 13 0 0 11 0 0 0 0 0 87 0 1 0 0 0 9 2 1 0 5 4 1]
[ 9 0 0 5 0 0 0 0 0 6 136 0 0 0 0 2 2 0 0 1 0 0]
[ 7 0 0 6 9 0 0 0 0 5 0 98 0 0 0 6 0 2 0 1 0 0]
[ 6 0 0 0 0 1 0 0 0 2 0 0 124 1 0 3 0 0 0 0 1 0]
[ 6 0 0 0 0 0 0 0 0 3 0 0 0 128 0 1 0 0 0 1 0 0]
[ 6 0 0 2 0 0 0 0 0 1 0 0 0 0 117 3 0 4 0 1 0 0]
[ 14 0 0 7 0 1 0 0 0 15 2 0 0 0 0 86 1 0 0 3 3 0]
[ 7 0 0 4 0 0 0 0 0 4 2 0 0 0 0 2 113 0 0 2 4 0]
[ 4 0 0 4 0 0 0 0 0 4 0 0 0 0 0 5 0 135 0 1 1 0]
[ 6 0 0 2 0 1 0 0 0 5 0 0 0 0 0 7 0 0 136 3 1 0]
[ 1 0 0 7 0 0 0 0 0 3 0 3 0 0 0 3 2 0 0 140 1 0]
[ 15 0 0 7 0 1 1 0 0 4 1 2 0 0 0 6 0 0 0 4 106 0]
[ 4 0 0 0 0 0 0 0 0 2 0 0 0 0 0 3 0 0 0 2 1 129]]
Random Forest features importances :
[0.0373927 0.00810206 0.01020342 0.02006777 0.01795898 0.01199323 0.02037662 0.01540028 0.0343393 0.02601041 0.02151706 0.0064665 0.02009364 0.0069221 0.006431 0.02152803 0.0142307 0.02657733 0.03480786 0.02227191 0.04406107 0.01665294 0.01672006 0.01113766 0.01661354 0.00877211 0.01333297 0.01801175 0.01938749 0.01216083 0.01905983 0.0100468 0.01381571 0.01823266 0.01333095 0.01168955 0.01188482 0.01856537 0.01270057 0.01215902 0.01280374 0.00821088 0.01059093 0.03070568 0.06515119 0.02355875 0.02400868 0.00638174 0.00640568 0.0309878 0.06080765 0.01936067]
GridSearchCV for better parameter values¶
In [ ]:
Copied!
param_grid = [{'n_estimators': [100, 200, 500],
'max_features': ['auto', 'log2'],
'max_depth' : [5,10,50,100,None],
'criterion' :['gini', 'entropy']}]
RF_clf_gs = GridSearchCV(estimator = RandomForestClassifier(n_estimators=100), param_grid=param_grid, scoring='f1',n_jobs=4, cv=10)
RF_clf_gs.fit(samples, labels)
means = RF_clf_gs.cv_results_['mean_test_score']
stds = RF_clf_gs.cv_results_['std_test_score']
print('RF 10CV f1 score mean with 95% confidence interval : ')
for mean, std, params in zip(means, stds, RF_clf_gs.cv_results_['params']):
print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
param_grid = [{'n_estimators': [100, 200, 500],
'max_features': ['auto', 'log2'],
'max_depth' : [5,10,50,100,None],
'criterion' :['gini', 'entropy']}]
RF_clf_gs = GridSearchCV(estimator = RandomForestClassifier(n_estimators=100), param_grid=param_grid, scoring='f1',n_jobs=4, cv=10)
RF_clf_gs.fit(samples, labels)
means = RF_clf_gs.cv_results_['mean_test_score']
stds = RF_clf_gs.cv_results_['std_test_score']
print('RF 10CV f1 score mean with 95% confidence interval : ')
for mean, std, params in zip(means, stds, RF_clf_gs.cv_results_['params']):
print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
In [ ]:
Copied!