import rpy2
import rpy2.robjects as ro
from rpy2.robjects.vectors import FloatVector
from rpy2.robjects.packages import importr
import torch
import numpy as np
from tqdm import tqdm
import torch.nn.functional as F
from torch_geometric_temporal.nn.recurrent import GConvGRU
import matplotlib.pyplot as plt
import pandas as pd
import time
from scipy.interpolate import interp1d2nd ST-GCN Example dividing train and test
Try to divide train and test(ST-GCN WikiMathsDatasetLoader)
import
class RecurrentGCN(torch.nn.Module):
def __init__(self, node_features, filters):
super(RecurrentGCN, self).__init__()
self.recurrent = GConvGRU(node_features, filters, 2)
self.linear = torch.nn.Linear(filters, 1)
def forward(self, x, edge_index, edge_weight):
h = self.recurrent(x, edge_index, edge_weight)
h = F.relu(h)
h = self.linear(h)
return hData
from torch_geometric_temporal.dataset import WikiMathsDatasetLoader
from torch_geometric_temporal.signal import temporal_signal_splitloader = WikiMathsDatasetLoader()dataset = loader.get_dataset(lags=1)train_dataset, test_dataset = temporal_signal_split(dataset, train_ratio=0.8)Train
data_train=[]
for time, snapshot in enumerate(train_dataset):
data_train.append([time,snapshot])data_train[0][1].x.shape,data_train[0][1].y.shape,data_train[0][1].edge_index.shape,data_train[0][1].edge_attr.shape(torch.Size([1068, 1]),
torch.Size([1068]),
torch.Size([2, 27079]),
torch.Size([27079]))
time583
T_train = time
N = len(data[0][1].x)edge_index = data_train[0][1].edge_index
edge_attr = data_train[0][1].edge_attrx_train = []
for i in range(time):
x_train.append(data_train[i][1].x)data_tensor = torch.Tensor()
# Iterate over the data points of the dataset
for i in x_train:
# Concatenate the data point to the tensor
data_tensor = torch.cat((data_tensor, i), dim=0)
x_train = data_tensor.reshape(time,1068,-1)
x_train.shapetorch.Size([583, 1068, 1])
y_train = []
for i in range(time):
y_train.append(data_train[i][1].y)data_tensor = torch.Tensor()
# Iterate over the data points of the dataset
for i in y_train:
# Concatenate the data point to the tensor
data_tensor = torch.cat((data_tensor, i), dim=0)
y_train = data_tensor.reshape(time,1068)
y_train.shapetorch.Size([583, 1068])
x_train.shape, y_train.shape(torch.Size([583, 1068, 1]), torch.Size([583, 1068]))
Test
data_test=[]
for time, snapshot in enumerate(test_dataset):
data_test.append([time,snapshot])data_test[0][1].x.shape,data_test[0][1].y.shape,data_test[0][1].edge_index.shape,data_test[0][1].edge_attr.shape(torch.Size([1068, 1]),
torch.Size([1068]),
torch.Size([2, 27079]),
torch.Size([27079]))
time145
T_test = timex_test = []
for i in range(time):
x_test.append(data_test[i][1].x)data_tensor = torch.Tensor()
# Iterate over the data points of the dataset
for i in x_test:
# Concatenate the data point to the tensor
data_tensor = torch.cat((data_tensor, i), dim=0)
x_test = data_tensor.reshape(time,1068,-1)
x_test.shapetorch.Size([145, 1068, 1])
y_test = []
for i in range(time):
y_test.append(data_test[i][1].y)data_tensor = torch.Tensor()
# Iterate over the data points of the dataset
for i in y_test:
# Concatenate the data point to the tensor
data_tensor = torch.cat((data_tensor, i), dim=0)
y_test = data_tensor.reshape(time,1068)
y_test.shapetorch.Size([145, 1068])
x_test.shape, y_test.shape(torch.Size([145, 1068, 1]), torch.Size([145, 1068]))
Randomly Missing Values
x_train = x_train.reshape(-1,N)np.random.seed(90)
seed_number = np.random.choice(len(x_train),290,replace=False)x_train[seed_number] = float('nan')1) Missing Value - Mean
x_train_mean = x_train.clone()df = pd.DataFrame(x_train_mean.tolist())
mean_value = df.mean() # finds the mean value of the column A
df = df.fillna(mean_value) # replace missing values with the mean valuex_train_mean = torch.Tensor(df.values)ST-GCN
mean_f_train = x_train_mean.reshape(T_train,N,1).float()mean_X = mean_f_train[:438,:,:]
mean_y = mean_f_train[145:,:,:]mean_X.shape,mean_y.shape(torch.Size([438, 1068, 1]), torch.Size([438, 1068, 1]))
model = RecurrentGCN(node_features=1, filters=4)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
model.train()
for epoch in tqdm(range(50)):
for time, (xt,yt) in enumerate(zip(mean_X,mean_y)):
y_hat = model(xt, edge_index, edge_attr)
cost = torch.mean((y_hat-yt)**2)
cost.backward()
optimizer.step()
optimizer.zero_grad()100%|██████████| 50/50 [04:17<00:00, 5.15s/it]
mean_X_fore = mean_f_train[438:,:]mean_fhat = torch.stack([model(xt, edge_index, edge_attr) for xt in mean_X_fore]).detach().numpy()mean_X_fore.shape,x_test.shape(torch.Size([145, 1068, 1]), torch.Size([145, 1068, 1]))
2) Missing Value - Linear Interpolation
df = pd.DataFrame(x_train.tolist())
df.interpolate(method='linear', inplace=True)
df = df.fillna(0)x_train_linear = torch.Tensor(df.values).reshape(T_train,N,1)ST-GCN
linear_f_train = x_train_linear.clone()linear_X = linear_f_train[:438,:,:]
linear_y = linear_f_train[145:,:,:]model = RecurrentGCN(node_features=1, filters=4)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
model.train()
for epoch in tqdm(range(50)):
for time, (xt,yt) in enumerate(zip(linear_X,linear_y)):
y_hat = model(xt, edge_index, edge_attr)
cost = torch.mean((y_hat-yt)**2)
cost.backward()
optimizer.step()
optimizer.zero_grad()100%|██████████| 50/50 [04:20<00:00, 5.22s/it]
linear_X_fore = linear_f_train[438:,:]linear_X_fore.shapetorch.Size([145, 1068, 1])
linear_fhat = torch.stack([model(xt, edge_index, edge_attr) for xt in linear_X_fore]).detach().numpy()linear_X_fore.shape,x_test.shape(torch.Size([145, 1068, 1]), torch.Size([145, 1068, 1]))
Comparison
MSE
((pd.DataFrame(mean_fhat.reshape(T_test,N)) - pd.DataFrame(x_test.reshape(T_test,N)))**2).mean()0 0.802389
1 0.442226
2 1.248909
3 0.389663
4 0.823464
...
1063 0.170553
1064 0.774679
1065 0.701724
1066 1.213499
1067 0.787685
Length: 1068, dtype: float64
((pd.DataFrame(linear_fhat.reshape(T_test,N)) - pd.DataFrame(x_test.reshape(T_test,N)))**2).mean()0 0.802464
1 0.441771
2 1.248262
3 0.389308
4 0.822545
...
1063 0.183206
1064 0.774763
1065 0.690184
1066 1.213436
1067 0.787685
Length: 1068, dtype: float64