import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import animation
# torch
import torch
import torch.nn.functional as F
from torch_geometric_temporal.nn.recurrent import GConvGRU
# scipy
from scipy.interpolate import interp1d
# utils
import time
import pickle
from tqdm import tqdm
# rpy2
import rpy2
import rpy2.robjects as ro
from rpy2.robjects.vectors import FloatVector
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
import rpy2.robjects.numpy2ri as rpyn
Class of Method(GNAR) lag 1 80% Missing repeat
ST-GCN
GNAR fiveNet,fivenodes lag 1
import
class RecurrentGCN(torch.nn.Module):
def __init__(self, node_features, filters):
super(RecurrentGCN, self).__init__()
self.recurrent = GConvGRU(node_features, filters, 2)
self.linear = torch.nn.Linear(filters, 1)
def forward(self, x, edge_index, edge_weight):
= self.recurrent(x, edge_index, edge_weight)
h = F.relu(h)
h = self.linear(h)
h return h
my functions
def load_data(fname):
with open(fname, 'rb') as outfile:
= pickle.load(outfile)
data_dict return data_dict
def save_data(data_dict,fname):
with open(fname,'wb') as outfile:
pickle.dump(data_dict,outfile)
def plot(f,*args,t=None,h=2.5,**kwargs):
= f.shape
T,N if t == None: t = range(T)
= plt.figure()
fig = fig.subplots(N,1)
ax for n in range(N):
*args,**kwargs)
ax[n].plot(t,f[:,n],'node='+str(n))
ax[n].set_title(*h)
fig.set_figheight(N
fig.tight_layout()
plt.close()return fig
def plot_add(fig,f,*args,t=None,**kwargs):
= f.shape[0]
T = f.shape[1]
N if t == None: t = range(T)
= fig.get_axes()
ax for n in range(N):
*args,**kwargs)
ax[n].plot(t,f[:,n],return fig
def make_Psi(T):
= np.zeros((T,T))
W for i in range(T):
for j in range(T):
if i==j :
= 0
W[i,j] elif np.abs(i-j) <= 1 :
= 1
W[i,j] = np.array(W.sum(axis=1))
d = np.diag(d)
D = np.array(np.diag(1/np.sqrt(d)) @ (D-W) @ np.diag(1/np.sqrt(d)))
L = np.linalg.eigh(L)
lamb, Psi return Psi
= importr('EbayesThresh').ebayesthresh ebayesthresh
def trim(f):
= np.array(f)
f if len(f.shape)==1: f = f.reshape(-1,1)
= f.shape
T,N = make_Psi(T)
Psi = Psi.T @ f # apply dft
fbar = np.stack([ebayesthresh(FloatVector(fbar[:,i])) for i in range(N)],axis=1)
fbar_threshed = Psi @ fbar_threshed # inverse dft
fhat return fhat
def update_from_freq_domain(signal, missing_index):
= np.array(signal)
signal = signal.shape
T,N = trim(signal)
signal_trimed for i in range(N):
= signal_trimed[missing_index[i],i]
signal[missing_index[i],i] return signal
data 정리
-
데이터정리
= load_data('./data/fivenodes.pkl') data
= torch.tensor(data['edges'])
edges_tensor = np.array(data['f'])
fiveVTS = edges_tensor.nonzero()
nonzero_indices = np.array(nonzero_indices).T
fiveNet_edge = 200
T = 5 # number of Nodes
N = fiveNet_edge
E = np.array([1,2,3,4,5])
V = np.arange(0,T)
t = 1
node_features = torch.tensor(E)
edge_index = torch.tensor(np.array([1,1,1,1,1,1,1,1,1,1]),dtype=torch.float32) edge_attr
-
train / test
= fiveVTS[:int(len(fiveVTS)*0.8)]
fiveVTS_train = fiveVTS[int(len(fiveVTS)*0.8):] fiveVTS_test
Random Missing Values
class Missing:
def __init__(self,df):
self.df = df
self.N = N
self.number = []
def miss(self,percent=0.5):
self.missing = self.df.copy()
self.percent = percent
for i in range(self.N):
#self.seed = np.random.choice(1000,1,replace=False)
#np.random.seed(self.seed)
self.number.append(np.random.choice(int(len(self.df))-1,int(len(self.df)*self.percent),replace=False))
self.missing[self.number[i],i] = float('nan')
def first_mean(self):
self.train_mean = self.missing.copy()
for i in range(self.N):
self.train_mean[self.number[i],i] = np.nanmean(self.missing[:,i])
def second_linear(self):
self.train_linear = pd.DataFrame(self.missing)
self.train_linear.interpolate(method='linear', inplace=True)
self.train_linear = self.train_linear.fillna(0)
self.train_linear = np.array(self.train_linear).reshape(int(len(self.df)),N)
STGCN
- missing rate: 80%
- 보간방법: linear
missing 일정
= []
stgcn_train1 = []
stgcn_test1
= Missing(fiveVTS_train)
_zero = 0.8)
_zero.miss(percent
_zero.second_linear()
= _zero.number
missing_index = _zero.train_linear
interpolated_signal
= torch.tensor(interpolated_signal).reshape(int(T*0.8),N,1).float()[:int(T*0.8-1),:,:]
X = torch.tensor(interpolated_signal).reshape(int(T*0.8),N,1).float()[1:,:,:]
y
= torch.tensor(fiveVTS_test.reshape(int(T*0.2),N,1)[:-1,:,:]).float()
XX = torch.tensor(fiveVTS_test.reshape(int(T*0.2),N,1)[1:,:,:]).float()
yy
= torch.tensor(fiveVTS_train).reshape(int(T*0.8),N,1).float()[1:,:,:] real_y
for i in range(100):
= RecurrentGCN(node_features=1, filters=4)
net = torch.optim.Adam(net.parameters(), lr=0.01)
optimizer
net.train()for epoch in range(50):
for time, (xt,yt) in enumerate(zip(X,y)):
= net(xt, edge_index, edge_attr)
yt_hat = torch.mean((yt_hat-yt)**2)
cost
cost.backward()
optimizer.step()
optimizer.zero_grad()
= torch.stack([net(xt, edge_index, edge_attr) for xt in X]).detach().numpy()
yhat = torch.stack([net(xt, edge_index, edge_attr) for xt in XX]).detach().numpy()
yyhat
= (((real_y-yhat).squeeze())**2).mean()
train_mse_total_stgcn = (((yy-yyhat).squeeze())**2).mean()
test_mse_total_stgcn
stgcn_train1.append(train_mse_total_stgcn.tolist()) stgcn_test1.append(test_mse_total_stgcn.tolist())
=(12, 8))
plt.figure(figsize; plt.boxplot(stgcn_train1)
=(12, 8))
plt.figure(figsize; plt.boxplot(stgcn_test1)
missing 다르게
= []
stgcn_train2 = []
stgcn_test2
= torch.tensor(fiveVTS_test.reshape(int(T*0.2),N,1)[:-1,:,:]).float()
XX = torch.tensor(fiveVTS_test.reshape(int(T*0.2),N,1)[1:,:,:]).float()
yy
= torch.tensor(fiveVTS_train).reshape(int(T*0.8),N,1).float()[1:,:,:] real_y
for i in range(100):
= Missing(fiveVTS_train)
_zero = 0.8)
_zero.miss(percent
_zero.second_linear()
= _zero.number
missing_index = _zero.train_linear
interpolated_signal
= torch.tensor(interpolated_signal).reshape(int(T*0.8),N,1).float()[:int(T*0.8-1),:,:]
X = torch.tensor(interpolated_signal).reshape(int(T*0.8),N,1).float()[1:,:,:]
y
= RecurrentGCN(node_features=1, filters=4)
net = torch.optim.Adam(net.parameters(), lr=0.01)
optimizer
net.train()for epoch in range(50):
for time, (xt,yt) in enumerate(zip(X,y)):
= net(xt, edge_index, edge_attr)
yt_hat = torch.mean((yt_hat-yt)**2)
cost
cost.backward()
optimizer.step()
optimizer.zero_grad()
= torch.stack([net(xt, edge_index, edge_attr) for xt in X]).detach().numpy()
yhat = torch.stack([net(xt, edge_index, edge_attr) for xt in XX]).detach().numpy()
yyhat
= (((real_y-yhat).squeeze())**2).mean()
train_mse_total_stgcn = (((yy-yyhat).squeeze())**2).mean()
test_mse_total_stgcn
stgcn_train2.append(train_mse_total_stgcn.tolist()) stgcn_test2.append(test_mse_total_stgcn.tolist())
=(12, 8))
plt.figure(figsize; plt.boxplot(stgcn_train2)
=(12, 8))
plt.figure(figsize; plt.boxplot(stgcn_test2)
Enhencement of STGCN
- missing rate: 80%
- 보간방법: linear
-
결측치생성 + 보간
missing 일정
= []
estgcn_train1 = []
estgcn_test1
= Missing(fiveVTS_train)
_zero = 0.8)
_zero.miss(percent
_zero.second_linear()
= _zero.number
missing_index = _zero.train_linear
interpolated_signal
= torch.tensor(interpolated_signal).reshape(int(T*0.8),N,1).float()[:int(T*0.8-1),:,:]
X = torch.tensor(interpolated_signal).reshape(int(T*0.8),N,1).float()[1:,:,:]
y
= torch.tensor(fiveVTS_test.reshape(int(T*0.2),N,1)[:-1,:,:]).float()
XX = torch.tensor(fiveVTS_test.reshape(int(T*0.2),N,1)[1:,:,:]).float()
yy
= torch.tensor(fiveVTS_train).reshape(int(T*0.8),N,1).float()[1:,:,:] real_y
for i in range(100):
= RecurrentGCN(node_features=1, filters=4)
net = torch.optim.Adam(net.parameters(), lr=0.01)
optimizer
net.train()= interpolated_signal.copy()
signal for epoch in range(50):
= update_from_freq_domain(signal,missing_index)
signal = torch.tensor(signal).reshape(int(T*0.8),N,1).float()[:int(T*0.8-1),:,:]
X = torch.tensor(signal).reshape(int(T*0.8),N,1).float()[1:,:,:]
y for time, (xt,yt) in enumerate(zip(X,y)):
= net(xt, edge_index, edge_attr)
yt_hat = torch.mean((yt_hat-yt)**2)
cost
cost.backward()
optimizer.step()
optimizer.zero_grad()= torch.concat([X.squeeze(),yt_hat.detach().squeeze().reshape(1,-1)])
signal
= torch.stack([net(xt, edge_index, edge_attr) for xt in X]).detach().numpy()
yhat = torch.stack([net(xt, edge_index, edge_attr) for xt in XX]).detach().numpy()
yyhat
= (((real_y-yhat).squeeze())**2).mean()
train_mse_total_estgcn = (((yy-yyhat).squeeze())**2).mean()
test_mse_total_estgcn
estgcn_train1.append(train_mse_total_estgcn.tolist()) estgcn_test1.append(test_mse_total_estgcn.tolist())
=(12, 8))
plt.figure(figsize; plt.boxplot(estgcn_train1)
=(12, 8))
plt.figure(figsize; plt.boxplot(estgcn_test1)
missing 매번 다르게
= []
estgcn_train2 = []
estgcn_test2
= torch.tensor(fiveVTS_test.reshape(int(T*0.2),N,1)[:-1,:,:]).float()
XX = torch.tensor(fiveVTS_test.reshape(int(T*0.2),N,1)[1:,:,:]).float()
yy
= torch.tensor(fiveVTS_train).reshape(int(T*0.8),N,1).float()[1:,:,:] real_y
for i in range(100):
= Missing(fiveVTS_train)
_zero = 0.8)
_zero.miss(percent
_zero.second_linear()
= _zero.number
missing_index = _zero.train_linear
interpolated_signal
= torch.tensor(interpolated_signal).reshape(int(T*0.8),N,1).float()[:int(T*0.8-1),:,:]
X = torch.tensor(interpolated_signal).reshape(int(T*0.8),N,1).float()[1:,:,:]
y
= RecurrentGCN(node_features=1, filters=4)
net = torch.optim.Adam(net.parameters(), lr=0.01)
optimizer
net.train()= interpolated_signal.copy()
signal for epoch in range(50):
= update_from_freq_domain(signal,missing_index)
signal = torch.tensor(signal).reshape(int(T*0.8),N,1).float()[:int(T*0.8-1),:,:]
X = torch.tensor(signal).reshape(int(T*0.8),N,1).float()[1:,:,:]
y for time, (xt,yt) in enumerate(zip(X,y)):
= net(xt, edge_index, edge_attr)
yt_hat = torch.mean((yt_hat-yt)**2)
cost
cost.backward()
optimizer.step()
optimizer.zero_grad()= torch.concat([X.squeeze(),yt_hat.detach().squeeze().reshape(1,-1)])
signal
= torch.stack([net(xt, edge_index, edge_attr) for xt in X]).detach().numpy()
yhat = torch.stack([net(xt, edge_index, edge_attr) for xt in XX]).detach().numpy()
yyhat
= (((real_y-yhat).squeeze())**2).mean()
train_mse_total_estgcn = (((yy-yyhat).squeeze())**2).mean()
test_mse_total_estgcn
estgcn_train2.append(train_mse_total_estgcn.tolist()) estgcn_test2.append(test_mse_total_estgcn.tolist())
=(12, 8))
plt.figure(figsize; plt.boxplot(estgcn_train2)
=(12, 8))
plt.figure(figsize; plt.boxplot(estgcn_test2)
GNAR
%load_ext rpy2.ipython
%%R
library(GNAR)
library(igraph) library(zoo)
R[write to console]: Loading required package: igraph
R[write to console]:
Attaching package: ‘igraph’
R[write to console]: The following objects are masked from ‘package:stats’:
decompose, spectrum
R[write to console]: The following object is masked from ‘package:base’:
union
R[write to console]: Loading required package: wordcloud
R[write to console]: Loading required package: RColorBrewer
R[write to console]:
Attaching package: ‘zoo’
R[write to console]: The following objects are masked from ‘package:base’:
as.Date, as.Date.numeric
%%R
<- as.matrix(fiveNet) fiveNet_m
%R -o fiveNet_m
= importr('GNAR') # import GNAR
GNAR = importr('igraph') # import igraph igraph
missing 일정
= Missing(fiveVTS_train)
_zero = 0.8)
_zero.miss(percent
_zero.second_linear()
= _zero.number
missing_index = _zero.train_linear
interpolated_signal
= np.array(torch.tensor(interpolated_signal).reshape(int(T*0.8),N,1).float()[:int(T*0.8-1),:,:].squeeze())
X = np.array(torch.tensor(interpolated_signal).reshape(int(T*0.8),N,1).float()[1:,:,:].squeeze())
y
= np.array(torch.tensor(fiveVTS_test.reshape(int(T*0.2),N,1)[:-1,:,:]).float().squeeze())
XX = np.array(torch.tensor(fiveVTS_test.reshape(int(T*0.2),N,1)[1:,:,:]).float().squeeze())
yy
= np.array(torch.tensor(fiveVTS_train).reshape(int(T*0.8),N,1).float()[1:,:,:]) real_y
%R -i X
%R -i XX
%%R
<- matrix(ncol=1,nrow=100)
gnar_train1 <- matrix(ncol=1,nrow=100)
gnar_test1 for(i in 1:100){
<- GNARfit(vts = X, net = fiveNet, alphaOrder = 2, betaOrder = c(1, 1))
answer <- predict(answer,n.ahead=40)
prediction
<- mean(residuals(answer)**2)
train_mse_total_gnar <- mean((XX - prediction[1:40])**2)
test_mse_total_gnar
<- train_mse_total_gnar
gnar_train1[i] <- train_mse_total_gnar
gnar_test1[i] }
%R -o gnar_train1
%R -o gnar_test1
=(12, 8))
plt.figure(figsize; plt.boxplot(gnar_train1)
=(12, 8))
plt.figure(figsize; plt.boxplot(gnar_test1)
missing 다르게
= robjects.r.matrix(FloatVector([0,0,0,1,1,0,0,1,1,0,0,1,0,1,0,1,1,1,0,0,1,0,0,0,0]), nrow = 5, ncol = 5)
m print(m)
[,1] [,2] [,3] [,4] [,5]
[1,] 0 0 0 1 1
[2,] 0 0 1 1 0
[3,] 0 1 0 1 0
[4,] 1 1 1 0 0
[5,] 1 0 0 0 0
= []
gnar_train2 = []
gnar_test2
= torch.tensor(fiveVTS_test.reshape(int(T*0.2),N,1)[1:,:,:]).float() yy
for i in range(100):
= Missing(fiveVTS_train)
_zero = 0.8)
_zero.miss(percent
_zero.second_linear()
= _zero.number
missing_index = _zero.train_linear
interpolated_signal
= torch.tensor(interpolated_signal).reshape(int(T*0.8),N,1).float()[:int(T*0.8-2),:,:]
X
= GNAR.GNARfit(vts=robjects.r.matrix(rpyn.numpy2rpy(np.array(X).squeeze()), nrow = 160, ncol = 5),net = GNAR.matrixtoGNAR(m), alphaOrder = 2, betaOrder = FloatVector([1, 1]))
answer = GNAR.predict_GNARfit(answer,n_ahead=40)
predict
= ((pd.DataFrame(GNAR.residuals_GNARfit(answer)).values.reshape(-1,5))**2).mean()
train_mse_total_gnar = ((yy.squeeze() - pd.DataFrame(predict).values.reshape(-1,5)[:-1,:])**2).mean()
test_mse_total_gnar
gnar_train2.append(train_mse_total_gnar.tolist()) gnar_test2.append(test_mse_total_gnar.tolist())
=(12, 8))
plt.figure(figsize; plt.boxplot(gnar_train2)
=(12, 8))
plt.figure(figsize; plt.boxplot(gnar_test2)
Visualization
= plt.figure(figsize = (12,10))
fig = fig.add_subplot(111)
ax
= ax.boxplot(stgcn_train1, positions=[1], notch=True, widths=0.35, patch_artist=True, boxprops=dict(facecolor="C0"))
bp1 = ax.boxplot(estgcn_train1, positions=[2], notch=True, widths=0.35, patch_artist=True, boxprops=dict(facecolor="C1"))
bp2 = ax.boxplot(gnar_train1, positions=[3], notch=True, widths=0.35, patch_artist=True, boxprops=dict(facecolor="C2"))
bp3 "boxes"][0], bp2["boxes"][0], bp3["boxes"][0]], ["STGCN", "ESTGCN", "GNAR"], loc='upper right')
ax.legend([bp1["boxes"][0], bp2["boxes"][0]], ["STGCN", "ESTGCN"], loc='upper right')
ax.legend([bp1[
=0.5, y=0.94, s="TRAIN_Fix missing number_80% Missing", fontsize=25, ha="center", transform=fig.transFigure)
plt.text(x
fig.tight_layout() plt.show()
= plt.figure(figsize = (12,10))
fig = fig.add_subplot(111)
ax
= ax.boxplot(stgcn_test1, positions=[1], notch=True, widths=0.35, patch_artist=True, boxprops=dict(facecolor="C0"))
bp1 = ax.boxplot(estgcn_test1, positions=[2], notch=True, widths=0.35, patch_artist=True, boxprops=dict(facecolor="C1"))
bp2 = ax.boxplot(gnar_test1, positions=[3], notch=True, widths=0.35, patch_artist=True, boxprops=dict(facecolor="C2"))
bp3 "boxes"][0], bp2["boxes"][0], bp3["boxes"][0]], ["STGCN", "ESTGCN", "GNAR"], loc='upper right')
ax.legend([bp1["boxes"][0], bp2["boxes"][0]], ["STGCN", "ESTGCN"], loc='upper right')
ax.legend([bp1[
=0.5, y=0.94, s="TEST_Fix missing number_80% Missing", fontsize=25, ha="center", transform=fig.transFigure)
plt.text(x
fig.tight_layout() plt.show()
= plt.figure(figsize = (12,10))
fig = fig.add_subplot(111)
ax
= ax.boxplot(stgcn_train2, positions=[1], notch=True, widths=0.35, patch_artist=True, boxprops=dict(facecolor="C0"))
bp1 = ax.boxplot(estgcn_train2, positions=[2], notch=True, widths=0.35, patch_artist=True, boxprops=dict(facecolor="C1"))
bp2 = ax.boxplot(gnar_train2, positions=[3], notch=True, widths=0.35, patch_artist=True, boxprops=dict(facecolor="C2"))
bp3 "boxes"][0], bp2["boxes"][0], bp3["boxes"][0]], ["STGCN", "ESTGCN", "GNAR"], loc='upper right')
ax.legend([bp1[
=0.5, y=0.94, s="TRAIN_Different missing number_80% Missing", fontsize=25, ha="center", transform=fig.transFigure)
plt.text(x
fig.tight_layout() plt.show()
= plt.figure(figsize = (12,10))
fig = fig.add_subplot(111)
ax
= ax.boxplot(stgcn_test2, positions=[1], notch=True, widths=0.35, patch_artist=True, boxprops=dict(facecolor="C0"))
bp1 = ax.boxplot(estgcn_test2, positions=[2], notch=True, widths=0.35, patch_artist=True, boxprops=dict(facecolor="C1"))
bp2 = ax.boxplot(gnar_test2, positions=[3], notch=True, widths=0.35, patch_artist=True, boxprops=dict(facecolor="C2"))
bp3 "boxes"][0], bp2["boxes"][0], bp3["boxes"][0]], ["STGCN", "ESTGCN", "GNAR"], loc='upper right')
ax.legend([bp1[
=0.5, y=0.94, s="TEST_Different missing number_80% Missing", fontsize=25, ha="center", transform=fig.transFigure)
plt.text(x
fig.tight_layout() plt.show()