import rpy2
import rpy2.robjects as ro
from rpy2.robjects.vectors import FloatVector
from rpy2.robjects.packages import importr
import torch
import numpy as np
from tqdm import tqdm
import torch.nn.functional as F
from torch_geometric_temporal.nn.recurrent import GConvGRU
import matplotlib.pyplot as plt
import pandas as pd
import time
from scipy.interpolate import interp1d
2nd ST-GCN Example dividing train and test
ST-GCN
Try to divide train and test(ST-GCN WikiMathsDatasetLoader)
import
class RecurrentGCN(torch.nn.Module):
def __init__(self, node_features, filters):
super(RecurrentGCN, self).__init__()
self.recurrent = GConvGRU(node_features, filters, 2)
self.linear = torch.nn.Linear(filters, 1)
def forward(self, x, edge_index, edge_weight):
= self.recurrent(x, edge_index, edge_weight)
h = F.relu(h)
h = self.linear(h)
h return h
Data
from torch_geometric_temporal.dataset import WikiMathsDatasetLoader
from torch_geometric_temporal.signal import temporal_signal_split
= WikiMathsDatasetLoader() loader
= loader.get_dataset(lags=1) dataset
= temporal_signal_split(dataset, train_ratio=0.8) train_dataset, test_dataset
Train
=[]
data_trainfor time, snapshot in enumerate(train_dataset):
data_train.append([time,snapshot])
0][1].x.shape,data_train[0][1].y.shape,data_train[0][1].edge_index.shape,data_train[0][1].edge_attr.shape data_train[
(torch.Size([1068, 1]),
torch.Size([1068]),
torch.Size([2, 27079]),
torch.Size([27079]))
time
583
= time
T_train = len(data[0][1].x) N
= data_train[0][1].edge_index
edge_index = data_train[0][1].edge_attr edge_attr
= []
x_train for i in range(time):
1].x) x_train.append(data_train[i][
= torch.Tensor()
data_tensor # Iterate over the data points of the dataset
for i in x_train:
# Concatenate the data point to the tensor
= torch.cat((data_tensor, i), dim=0)
data_tensor = data_tensor.reshape(time,1068,-1)
x_train x_train.shape
torch.Size([583, 1068, 1])
= []
y_train for i in range(time):
1].y) y_train.append(data_train[i][
= torch.Tensor()
data_tensor # Iterate over the data points of the dataset
for i in y_train:
# Concatenate the data point to the tensor
= torch.cat((data_tensor, i), dim=0)
data_tensor = data_tensor.reshape(time,1068)
y_train y_train.shape
torch.Size([583, 1068])
x_train.shape, y_train.shape
(torch.Size([583, 1068, 1]), torch.Size([583, 1068]))
Test
=[]
data_testfor time, snapshot in enumerate(test_dataset):
data_test.append([time,snapshot])
0][1].x.shape,data_test[0][1].y.shape,data_test[0][1].edge_index.shape,data_test[0][1].edge_attr.shape data_test[
(torch.Size([1068, 1]),
torch.Size([1068]),
torch.Size([2, 27079]),
torch.Size([27079]))
time
145
= time T_test
= []
x_test for i in range(time):
1].x) x_test.append(data_test[i][
= torch.Tensor()
data_tensor # Iterate over the data points of the dataset
for i in x_test:
# Concatenate the data point to the tensor
= torch.cat((data_tensor, i), dim=0)
data_tensor = data_tensor.reshape(time,1068,-1)
x_test x_test.shape
torch.Size([145, 1068, 1])
= []
y_test for i in range(time):
1].y) y_test.append(data_test[i][
= torch.Tensor()
data_tensor # Iterate over the data points of the dataset
for i in y_test:
# Concatenate the data point to the tensor
= torch.cat((data_tensor, i), dim=0)
data_tensor = data_tensor.reshape(time,1068)
y_test y_test.shape
torch.Size([145, 1068])
x_test.shape, y_test.shape
(torch.Size([145, 1068, 1]), torch.Size([145, 1068]))
Randomly Missing Values
= x_train.reshape(-1,N) x_train
90)
np.random.seed(= np.random.choice(len(x_train),290,replace=False) seed_number
= float('nan') x_train[seed_number]
1) Missing Value - Mean
= x_train.clone() x_train_mean
= pd.DataFrame(x_train_mean.tolist())
df = df.mean() # finds the mean value of the column A
mean_value = df.fillna(mean_value) # replace missing values with the mean value df
= torch.Tensor(df.values) x_train_mean
ST-GCN
= x_train_mean.reshape(T_train,N,1).float() mean_f_train
= mean_f_train[:438,:,:]
mean_X = mean_f_train[145:,:,:] mean_y
mean_X.shape,mean_y.shape
(torch.Size([438, 1068, 1]), torch.Size([438, 1068, 1]))
= RecurrentGCN(node_features=1, filters=4)
model
= torch.optim.Adam(model.parameters(), lr=0.01)
optimizer
model.train()
for epoch in tqdm(range(50)):
for time, (xt,yt) in enumerate(zip(mean_X,mean_y)):
= model(xt, edge_index, edge_attr)
y_hat = torch.mean((y_hat-yt)**2)
cost
cost.backward()
optimizer.step() optimizer.zero_grad()
100%|██████████| 50/50 [04:17<00:00, 5.15s/it]
= mean_f_train[438:,:] mean_X_fore
= torch.stack([model(xt, edge_index, edge_attr) for xt in mean_X_fore]).detach().numpy() mean_fhat
mean_X_fore.shape,x_test.shape
(torch.Size([145, 1068, 1]), torch.Size([145, 1068, 1]))
2) Missing Value - Linear Interpolation
= pd.DataFrame(x_train.tolist())
df ='linear', inplace=True)
df.interpolate(method= df.fillna(0) df
= torch.Tensor(df.values).reshape(T_train,N,1) x_train_linear
ST-GCN
= x_train_linear.clone() linear_f_train
= linear_f_train[:438,:,:]
linear_X = linear_f_train[145:,:,:] linear_y
= RecurrentGCN(node_features=1, filters=4)
model
= torch.optim.Adam(model.parameters(), lr=0.01)
optimizer
model.train()
for epoch in tqdm(range(50)):
for time, (xt,yt) in enumerate(zip(linear_X,linear_y)):
= model(xt, edge_index, edge_attr)
y_hat = torch.mean((y_hat-yt)**2)
cost
cost.backward()
optimizer.step() optimizer.zero_grad()
100%|██████████| 50/50 [04:20<00:00, 5.22s/it]
= linear_f_train[438:,:] linear_X_fore
linear_X_fore.shape
torch.Size([145, 1068, 1])
= torch.stack([model(xt, edge_index, edge_attr) for xt in linear_X_fore]).detach().numpy() linear_fhat
linear_X_fore.shape,x_test.shape
(torch.Size([145, 1068, 1]), torch.Size([145, 1068, 1]))
Comparison
MSE
- pd.DataFrame(x_test.reshape(T_test,N)))**2).mean() ((pd.DataFrame(mean_fhat.reshape(T_test,N))
0 0.802389
1 0.442226
2 1.248909
3 0.389663
4 0.823464
...
1063 0.170553
1064 0.774679
1065 0.701724
1066 1.213499
1067 0.787685
Length: 1068, dtype: float64
- pd.DataFrame(x_test.reshape(T_test,N)))**2).mean() ((pd.DataFrame(linear_fhat.reshape(T_test,N))
0 0.802464
1 0.441771
2 1.248262
3 0.389308
4 0.822545
...
1063 0.183206
1064 0.774763
1065 0.690184
1066 1.213436
1067 0.787685
Length: 1068, dtype: float64