08_Tennessee_Variables_PekkaR
In [1]:
Copied!
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math
from glob import glob
import datetime
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math
from glob import glob
import datetime
Contextualizing the problem¶
Normal operating condition
In [2]:
Copied!
data = np.genfromtxt('d00_te.dat')
data.shape
data = np.genfromtxt('d00_te.dat')
data.shape
Out[2]:
(960, 52)
Fault #1
In [3]:
Copied!
fault_1 = np.genfromtxt('d01_te.dat')
data.shape
fault_1 = np.genfromtxt('d01_te.dat')
data.shape
Out[3]:
(960, 52)
Dataset for each fault¶
Plot of variable 1
In [4]:
Copied!
paths = glob('d*_te.dat')
dfs = {}
columns_name = range(1,53)
for mov in range(0,len(paths)):
data = np.genfromtxt(paths[mov])
dfs[mov] = pd.DataFrame(data=data,columns=columns_name)
plt.plot(dfs[mov].iloc[:,0])
plt.plot(160,dfs[0].iloc[160,0], 'r*',markersize=14)
plt.show()
paths = glob('d*_te.dat')
dfs = {}
columns_name = range(1,53)
for mov in range(0,len(paths)):
data = np.genfromtxt(paths[mov])
dfs[mov] = pd.DataFrame(data=data,columns=columns_name)
plt.plot(dfs[mov].iloc[:,0])
plt.plot(160,dfs[0].iloc[160,0], 'r*',markersize=14)
plt.show()
In [5]:
Copied!
max_v1 = dfs[0].iloc[:,0].max()
mean_v1 = dfs[0].iloc[:,0].mean()
min_v1 = dfs[0].iloc[:,0].min()
plot_x = range(0,len(dfs[1].iloc[:,0]))
# 90% UP AND DOWN THE MEAN
mean_up = mean_v1+(0.9*abs(max_v1-mean_v1))
mean_down = mean_v1-(0.9*abs(min_v1-mean_v1))
max_v1 = dfs[0].iloc[:,0].max()
mean_v1 = dfs[0].iloc[:,0].mean()
min_v1 = dfs[0].iloc[:,0].min()
plot_x = range(0,len(dfs[1].iloc[:,0]))
# 90% UP AND DOWN THE MEAN
mean_up = mean_v1+(0.9*abs(max_v1-mean_v1))
mean_down = mean_v1-(0.9*abs(min_v1-mean_v1))
In [6]:
Copied!
plt.plot(plot_x,[max_v1]*len(dfs[1].iloc[:,0]),'r')
plt.plot(plot_x,[mean_up]*len(dfs[1].iloc[:,0]),'r')
plt.plot(plot_x,[mean_v1]*len(dfs[1].iloc[:,0]),'r')
plt.plot(plot_x,[mean_down]*len(dfs[1].iloc[:,0]),'r')
plt.plot(plot_x,[min_v1]*len(dfs[1].iloc[:,0]),'r')
#plt.plot(dfs[1].iloc[:,0])
plt.plot(dfs[0].iloc[:,0])
plt.show()
plt.plot(plot_x,[max_v1]*len(dfs[1].iloc[:,0]),'r')
plt.plot(plot_x,[mean_up]*len(dfs[1].iloc[:,0]),'r')
plt.plot(plot_x,[mean_v1]*len(dfs[1].iloc[:,0]),'r')
plt.plot(plot_x,[mean_down]*len(dfs[1].iloc[:,0]),'r')
plt.plot(plot_x,[min_v1]*len(dfs[1].iloc[:,0]),'r')
#plt.plot(dfs[1].iloc[:,0])
plt.plot(dfs[0].iloc[:,0])
plt.show()
New signal¶
In [7]:
Copied!
dfs1 = dfs[1]
dfs1.head()
dfs1 = dfs[1]
dfs1.head()
Out[7]:
| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | ... | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.24975 | 3643.7 | 4515.6 | 9.3650 | 26.918 | 42.056 | 2703.7 | 75.977 | 120.40 | 0.33576 | ... | 53.904 | 24.720 | 62.749 | 22.190 | 40.230 | 40.289 | 45.095 | 47.298 | 41.250 | 18.073 |
| 1 | 0.25070 | 3675.9 | 4508.2 | 9.3657 | 26.505 | 42.221 | 2705.4 | 74.833 | 120.39 | 0.33750 | ... | 53.993 | 24.626 | 62.241 | 22.007 | 40.018 | 38.353 | 44.789 | 47.440 | 41.108 | 17.501 |
| 2 | 0.24939 | 3648.0 | 4539.0 | 9.2663 | 27.008 | 42.607 | 2706.8 | 75.550 | 120.42 | 0.33778 | ... | 53.624 | 24.763 | 61.248 | 22.228 | 39.982 | 38.718 | 47.165 | 47.459 | 41.555 | 18.627 |
| 3 | 0.25143 | 3668.3 | 4481.5 | 9.2949 | 26.714 | 42.302 | 2705.5 | 75.441 | 120.41 | 0.33601 | ... | 54.313 | 24.562 | 61.412 | 22.103 | 40.198 | 36.441 | 47.802 | 47.504 | 41.310 | 20.383 |
| 4 | 0.25546 | 3624.7 | 4457.6 | 9.3807 | 26.906 | 42.227 | 2706.9 | 76.041 | 120.37 | 0.34574 | ... | 54.621 | 25.354 | 60.937 | 22.305 | 40.550 | 40.812 | 47.486 | 47.509 | 41.037 | 18.659 |
5 rows × 52 columns
In [8]:
Copied!
plt.plot(dfs1[1])
plt.plot(dfs1[1])
Out[8]:
[<matplotlib.lines.Line2D at 0x7fcbf6bf8160>]
Short code: Generic for all variables, picking one by one individually¶
In [9]:
Copied!
def tennessee_resampling(data,v,f,p):
v = int(v)
f = int(f)
paths = glob(data)
dfs = {}
columns_name = range(1,53)
for mov in range(0,len(paths)):
data = np.genfromtxt(paths[mov])
dfs[mov] = pd.DataFrame(data=data,columns=columns_name)
max_v1 = dfs[0].iloc[:,v-1].max()
mean_v1 = dfs[0].iloc[:,v-1].mean()
min_v1 = dfs[0].iloc[:,v-1].min()
#plot_x = range(0,len(dfs[1].iloc[:,0]))
# UP AND DOWN THE MEAN
mean_up = mean_v1+((p/100)*abs(max_v1-mean_v1))
mean_down = mean_v1-((p/100)*abs(min_v1-mean_v1))
if p==0:
margin_up = mean_v1+((90/100)*abs(max_v1-mean_v1))
margin_down = mean_v1-((90/100)*abs(min_v1-mean_v1))
else:
margin_up = mean_up
margin_down = mean_down
dfs1 = dfs[f]
index1 = dfs1[dfs1[v].between(mean_down,mean_up)].index
dfs1.loc[index1,v] = mean_v1
index2 = dfs1.index.drop(index1)
index = len(index1)/len(dfs1)
plot_x = range(0,len(dfs[0].iloc[:,0]))
plot_yu = [margin_up]*len(plot_x)
plot_yd = [margin_down]*len(plot_x)
return dfs1,index,plot_x,plot_yu,plot_yd
def tennessee_resampling(data,v,f,p):
v = int(v)
f = int(f)
paths = glob(data)
dfs = {}
columns_name = range(1,53)
for mov in range(0,len(paths)):
data = np.genfromtxt(paths[mov])
dfs[mov] = pd.DataFrame(data=data,columns=columns_name)
max_v1 = dfs[0].iloc[:,v-1].max()
mean_v1 = dfs[0].iloc[:,v-1].mean()
min_v1 = dfs[0].iloc[:,v-1].min()
#plot_x = range(0,len(dfs[1].iloc[:,0]))
# UP AND DOWN THE MEAN
mean_up = mean_v1+((p/100)*abs(max_v1-mean_v1))
mean_down = mean_v1-((p/100)*abs(min_v1-mean_v1))
if p==0:
margin_up = mean_v1+((90/100)*abs(max_v1-mean_v1))
margin_down = mean_v1-((90/100)*abs(min_v1-mean_v1))
else:
margin_up = mean_up
margin_down = mean_down
dfs1 = dfs[f]
index1 = dfs1[dfs1[v].between(mean_down,mean_up)].index
dfs1.loc[index1,v] = mean_v1
index2 = dfs1.index.drop(index1)
index = len(index1)/len(dfs1)
plot_x = range(0,len(dfs[0].iloc[:,0]))
plot_yu = [margin_up]*len(plot_x)
plot_yd = [margin_down]*len(plot_x)
return dfs1,index,plot_x,plot_yu,plot_yd
Picking up a variable 0 < v < 53 and a fault 1 < fault < 22¶
In [10]:
Copied!
from matplotlib import style
style.use('seaborn-paper')
from matplotlib import style
style.use('seaborn-paper')
In [11]:
Copied!
import matplotlib
matplotlib.rcParams.update({'font.size': 16})
import matplotlib
matplotlib.rcParams.update({'font.size': 16})
In [12]:
Copied!
# Picking up a variable from 0 < v < 53
variable = 5
# Picking up a fault from 1 < fault < 22
fault = 10
# Defining compression rate of the data
percentage = 0
plt.figure(figsize=(14,5))
graph,index_0,plot_x0,plot_yu0,plot_yd0 = tennessee_resampling('d*_te.dat',variable,fault,percentage)
plt.plot(graph[variable],'k',label='raw data')
plt.plot(plot_x0,plot_yu0,'k--')
plt.plot(plot_x0,plot_yd0,'k--')
plt.plot(160,graph.iloc[160,variable-1], 'k*',markersize=16,label='fault inception', color='r')
plt.legend(loc='lower right')
#plt.axis([0, 960,0.1, 0.4])
plt.xlabel('Time slot $k$')
plt.ylabel('Output value for $N = 2$')
plt.grid()
plt.savefig('original.pdf', bbox_inches='tight', pad_inches=0.1, dpi=600)
plt.show()
# Defining compression rate of the data
percentage = 90
plt.figure(figsize=(14,5))
graph,index,plot_x,plot_yu,plot_yd = tennessee_resampling('d*_te.dat',variable,fault,percentage)
plt.plot(graph[variable],'k',label='data at receving point')
plt.plot(plot_x,plot_yu,'k--')
plt.plot(plot_x,plot_yd,'k--')
plt.plot(160,graph.iloc[160,variable-1], 'k*',markersize=16,label='fault inception', color='r')
#plt.axis([0, 960,0.1, 0.4])
plt.legend(loc='lower right')
plt.xlabel('Time slot $k$')
plt.ylabel('Output value for $N = 2$')
plt.grid()
plt.savefig('event-driven.pdf', bbox_inches='tight', pad_inches=0.1, dpi=600)
plt.show()
# Picking up a variable from 0 < v < 53
variable = 5
# Picking up a fault from 1 < fault < 22
fault = 10
# Defining compression rate of the data
percentage = 0
plt.figure(figsize=(14,5))
graph,index_0,plot_x0,plot_yu0,plot_yd0 = tennessee_resampling('d*_te.dat',variable,fault,percentage)
plt.plot(graph[variable],'k',label='raw data')
plt.plot(plot_x0,plot_yu0,'k--')
plt.plot(plot_x0,plot_yd0,'k--')
plt.plot(160,graph.iloc[160,variable-1], 'k*',markersize=16,label='fault inception', color='r')
plt.legend(loc='lower right')
#plt.axis([0, 960,0.1, 0.4])
plt.xlabel('Time slot $k$')
plt.ylabel('Output value for $N = 2$')
plt.grid()
plt.savefig('original.pdf', bbox_inches='tight', pad_inches=0.1, dpi=600)
plt.show()
# Defining compression rate of the data
percentage = 90
plt.figure(figsize=(14,5))
graph,index,plot_x,plot_yu,plot_yd = tennessee_resampling('d*_te.dat',variable,fault,percentage)
plt.plot(graph[variable],'k',label='data at receving point')
plt.plot(plot_x,plot_yu,'k--')
plt.plot(plot_x,plot_yd,'k--')
plt.plot(160,graph.iloc[160,variable-1], 'k*',markersize=16,label='fault inception', color='r')
#plt.axis([0, 960,0.1, 0.4])
plt.legend(loc='lower right')
plt.xlabel('Time slot $k$')
plt.ylabel('Output value for $N = 2$')
plt.grid()
plt.savefig('event-driven.pdf', bbox_inches='tight', pad_inches=0.1, dpi=600)
plt.show()
In [13]:
Copied!
index
index
Out[13]:
0.984375
Cycling through of all 52 variables¶
In [14]:
Copied!
paths = glob('d*_te.dat')
dfs = {}
columns_name = range(1,53)
for mov in range(0,len(paths)):
data = np.genfromtxt(paths[mov])
dfs[mov] = pd.DataFrame(data=data,columns=columns_name)
paths = glob('d*_te.dat')
dfs = {}
columns_name = range(1,53)
for mov in range(0,len(paths)):
data = np.genfromtxt(paths[mov])
dfs[mov] = pd.DataFrame(data=data,columns=columns_name)
In [15]:
Copied!
#Cycling through faults, 1 < f < 22
f = 5
#Cycling through faults, 1 < f < 22
f = 5
In [16]:
Copied!
dfs1 = dfs[f]
dfs1.head()
dfs1 = dfs[f]
dfs1.head()
Out[16]:
| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | ... | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.24899 | 3657.2 | 4545.0 | 9.4084 | 26.895 | 42.445 | 2705.0 | 74.635 | 120.40 | 0.33667 | ... | 53.553 | 24.795 | 59.783 | 22.341 | 40.119 | 41.224 | 46.773 | 47.348 | 41.266 | 18.092 |
| 1 | 0.24934 | 3652.7 | 4500.7 | 9.3025 | 27.209 | 42.120 | 2706.0 | 74.960 | 120.40 | 0.33663 | ... | 54.083 | 24.760 | 60.483 | 22.554 | 40.124 | 34.524 | 44.999 | 47.587 | 40.983 | 17.679 |
| 2 | 0.24856 | 3629.5 | 4477.3 | 9.3286 | 26.483 | 42.600 | 2704.6 | 75.036 | 120.41 | 0.33917 | ... | 54.363 | 24.844 | 61.044 | 21.970 | 39.813 | 41.849 | 48.342 | 47.504 | 41.214 | 17.421 |
| 3 | 0.25219 | 3625.5 | 4566.2 | 9.4262 | 27.149 | 42.437 | 2703.5 | 75.675 | 120.38 | 0.33386 | ... | 53.298 | 24.488 | 58.240 | 22.079 | 40.460 | 42.350 | 49.280 | 47.283 | 41.120 | 17.346 |
| 4 | 0.23994 | 3646.6 | 4487.9 | 9.4232 | 26.702 | 42.409 | 2705.2 | 74.241 | 120.41 | 0.32965 | ... | 54.022 | 23.921 | 62.526 | 22.521 | 39.179 | 36.990 | 45.103 | 47.435 | 41.262 | 17.671 |
5 rows × 52 columns
In [17]:
Copied!
#Cycling through variables, 0 < v < 52
v = 1
# Percentage
percentage = [90,95]
p = 90
paths = glob('d*_te.dat')
dfs = {}
columns_list = ['Index_' + str(x) for x in range(1,53)]
columns_name = ['Variable_' + str(x) for x in range(1,53)]
index_z = np.zeros((21,52))
compression_df = pd.DataFrame(data=index_z,columns=columns_list)
compression_df.index= ['Fault_' + str(x) for x in range(1,22)]
for mov in range(0,len(paths)):
data = np.genfromtxt(paths[mov])
dfs[mov] = pd.DataFrame(data=data,columns=columns_name)
Zeros = np.zeros((len(paths)-1,len(dfs[1])),dtype='int64')
hist_df = pd.DataFrame(data=Zeros,columns=range(1,len(dfs[1])+1))
hist_df.index= ['Fault_' + str(x) for x in range(1,22)]
for f in range(1,22):
for v in range(1,53):
max_v1 = dfs[0].iloc[:,v-1].max()
mean_v1 = dfs[0].iloc[:,v-1].mean()
min_v1 = dfs[0].iloc[:,v-1].min()
#plot_x = range(0,len(dfs[1].iloc[:,0]))
# 90% UP AND DOWN THE MEAN
mean_up = mean_v1+((p/100)*abs(max_v1-mean_v1))
mean_down = mean_v1-((p/100)*abs(min_v1-mean_v1))
dfs1 = dfs[f]
index1 = dfs1[dfs1['Variable_' + str(v)].between(mean_down,mean_up)].index
dfs1.loc[index1,v] = mean_v1
index2 = dfs1.index.drop(index1)
X = np.zeros(960,dtype='int64')
X[index2] = 1
hist_df.loc['Fault_' + str(f)] = hist_df.loc['Fault_' + str(f)] + X
compression_df.loc['Fault_' + str(f),'Index_' + str(v)] = len(index1)/len(dfs1)
#Cycling through variables, 0 < v < 52
v = 1
# Percentage
percentage = [90,95]
p = 90
paths = glob('d*_te.dat')
dfs = {}
columns_list = ['Index_' + str(x) for x in range(1,53)]
columns_name = ['Variable_' + str(x) for x in range(1,53)]
index_z = np.zeros((21,52))
compression_df = pd.DataFrame(data=index_z,columns=columns_list)
compression_df.index= ['Fault_' + str(x) for x in range(1,22)]
for mov in range(0,len(paths)):
data = np.genfromtxt(paths[mov])
dfs[mov] = pd.DataFrame(data=data,columns=columns_name)
Zeros = np.zeros((len(paths)-1,len(dfs[1])),dtype='int64')
hist_df = pd.DataFrame(data=Zeros,columns=range(1,len(dfs[1])+1))
hist_df.index= ['Fault_' + str(x) for x in range(1,22)]
for f in range(1,22):
for v in range(1,53):
max_v1 = dfs[0].iloc[:,v-1].max()
mean_v1 = dfs[0].iloc[:,v-1].mean()
min_v1 = dfs[0].iloc[:,v-1].min()
#plot_x = range(0,len(dfs[1].iloc[:,0]))
# 90% UP AND DOWN THE MEAN
mean_up = mean_v1+((p/100)*abs(max_v1-mean_v1))
mean_down = mean_v1-((p/100)*abs(min_v1-mean_v1))
dfs1 = dfs[f]
index1 = dfs1[dfs1['Variable_' + str(v)].between(mean_down,mean_up)].index
dfs1.loc[index1,v] = mean_v1
index2 = dfs1.index.drop(index1)
X = np.zeros(960,dtype='int64')
X[index2] = 1
hist_df.loc['Fault_' + str(f)] = hist_df.loc['Fault_' + str(f)] + X
compression_df.loc['Fault_' + str(f),'Index_' + str(v)] = len(index1)/len(dfs1)
In [18]:
Copied!
compression_df.head()
compression_df.head()
Out[18]:
| Index_1 | Index_2 | Index_3 | Index_4 | Index_5 | Index_6 | Index_7 | Index_8 | Index_9 | Index_10 | ... | Index_43 | Index_44 | Index_45 | Index_46 | Index_47 | Index_48 | Index_49 | Index_50 | Index_51 | Index_52 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Fault_1 | 0.920833 | 0.975000 | 0.932292 | 0.932292 | 0.989583 | 0.961458 | 0.944792 | 0.958333 | 0.840625 | 0.967708 | ... | 0.947917 | 0.919792 | 0.718750 | 0.921875 | 0.950000 | 0.990625 | 0.988542 | 0.977083 | 0.910417 | 0.951042 |
| Fault_2 | 1.000000 | 0.997917 | 1.000000 | 1.000000 | 0.991667 | 1.000000 | 1.000000 | 1.000000 | 0.996875 | 1.000000 | ... | 1.000000 | 1.000000 | 0.904167 | 1.000000 | 1.000000 | 0.996875 | 0.996875 | 1.000000 | 1.000000 | 0.995833 |
| Fault_3 | 1.000000 | 0.998958 | 1.000000 | 1.000000 | 0.984375 | 1.000000 | 1.000000 | 1.000000 | 0.992708 | 1.000000 | ... | 1.000000 | 1.000000 | 0.873958 | 0.926042 | 1.000000 | 0.998958 | 0.992708 | 1.000000 | 1.000000 | 0.996875 |
| Fault_4 | 1.000000 | 0.995833 | 1.000000 | 1.000000 | 0.988542 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 1.000000 | 1.000000 | 0.869792 | 1.000000 | 1.000000 | 0.995833 | 0.996875 | 1.000000 | 1.000000 | 0.995833 |
| Fault_5 | 0.359375 | 0.380208 | 0.958333 | 0.345833 | 0.996875 | 0.357292 | 0.351042 | 0.952083 | 0.975000 | 0.987500 | ... | 0.371875 | 0.367708 | 0.344792 | 0.367708 | 0.382292 | 0.995833 | 0.995833 | 0.409375 | 0.370833 | 0.370833 |
5 rows × 52 columns
In [19]:
Copied!
hist_df.head()
hist_df.head()
Out[19]:
| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | ... | 951 | 952 | 953 | 954 | 955 | 956 | 957 | 958 | 959 | 960 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Fault_1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | ... | 2 | 1 | 4 | 4 | 4 | 3 | 4 | 2 | 3 | 2 |
| Fault_2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| Fault_3 | 0 | 0 | 0 | 0 | 0 | 1 | 2 | 1 | 1 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| Fault_4 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | ... | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 |
| Fault_5 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 33 | 33 | 32 | 32 | 32 | 34 | 35 | 35 | 35 | 35 |
5 rows × 960 columns
Tennessee -resampled datasets for PCA -calculations¶
In [ ]:
Copied!
In [ ]:
Copied!