import pygod
import numpy as np
import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid
import torch
from pygod.generator import gen_contextual_outlier, gen_structural_outlier
from pygod.utils import load_data
from pygod.metric import eval_roc_auc
from pygod.detector import SCAN, GAE, Radar, ANOMALOUS, ONE, DOMINANT, DONE, AdONE, AnomalyDAE, GAAN, OCGNN, CoLA, GUIDE, CONAD
[ANOMALOUS]Graph
Reference
Summary
- 노드당 매핑된 속성(attribute)으로 이상치를 계산해낸다.
- 그래서 속성 특징마다 나오는 이상치라고 칭하는 노드가 다른 것 같다.
- 노드 정보와 네트워크를 기반으로 rare하거나 상당히 differ한 인스턴스 집합 찾는 것을 목표로 한다.
- there may exist some outlying attributes that do not satisfy the Homophily hypothesis
- Homophily hypothesis을 만족하지 않는 어떤 outlying attributes가 존재하며 이것이 이상치로 생각한다.
Abbr | Year | Backbone | Sampling | Class |
---|---|---|---|---|
SCAN | 2007 | Clustering | No | pygod.detector.SCAN |
GAE | 2016 | GNN+AE | Yes | pygod.detector.GAE |
Radar | 2017 | MF | No | pygod.detector.Radar |
ANOMALOUS | 2018 | MF | No | pygod.detector.ANOMALOUS |
ONE | 2019 | MF | No | pygod.detector.ONE |
DOMINANT | 2019 | GNN+AE | Yes | pygod.detector.DOMINANT |
DONE | 2020 | MLP+AE | Yes | pygod.detector.DONE |
AdONE | 2020 | MLP+AE | Yes | pygod.detector.AdONE |
AnomalyDAE | 2020 | GNN+AE | Yes | pygod.detector.AnomalyDAE |
GAAN | 2020 | GAN | Yes | pygod.detector.GAAN |
OCGNN | 2021 | GNN | Yes | pygod.detector.OCGNN |
CoLA | 2021 | GNN+AE+SSL | Yes | pygod.detector.CoLA |
GUIDE | 2021 | GNN+AE | Yes | pygod.detector.GUIDE |
CONAD | 2022 | GNN+AE+SSL | Yes | pygod.detector.CONAD |
Import
Tutorial
= Planetoid('./data/Cora', 'Cora', transform=T.NormalizeFeatures())[0] data
data
Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])
gen_contextual_outlier
의 역할: Generating contextual outliers
- 임의로 선택한 노드 중 그 노드들끼리 얼마나 떨어져 있나?
= gen_contextual_outlier(data, n=100, k=50) data, ya
ya
tensor([0, 0, 0, ..., 0, 0, 0])
len(sum(np.where(ya==1)))
100
len(sum(np.where(ya==0)))
2608
len(ya)
2708
gen_structural_outlier
의 역할: Generating structural outliers
- 임의로 선택한 노드들이 fully connected 되어있을때 그 집단과 얼마나 많이 다른가??
= gen_structural_outlier(data, m=10, n=10) data, ys
ys
tensor([0, 0, 0, ..., 0, 0, 0])
len(sum(np.where(ys==1)))
100
len(sum(np.where(ys==0)))
2608
len(ys)
2708
위에서 찾은 이상치 간에 torch.logical_or
논리 or 생성
= torch.logical_or(ys, ya).long() data.y
data.y
tensor([0, 0, 0, ..., 0, 0, 0])
len(sum(np.where(data.y==1)))
197
len(sum(np.where(data.y==0)))
2511
len(data.y)
2708
load_data(‘inj_cora’)에서 쓸 수 있는 데이터 종류
= load_data('inj_cora')
data = data.y.bool() data.y
For injected/generated datasets, the labels meanings are as follows.
-
0: inlier
-
1: contextual outlier only
-
2: structural outlier only
-
3: both contextual outlier and structural outlier
Examples to convert the labels are as follows:
= data.y.bool() # binary labels (inlier/outlier)
y = data.y >> 0 & 1 # contextual outliers
yc = data.y >> 1 & 1 # structural outliers ys
data.y
tensor([False, False, False, ..., False, False, False])
‘ANOMALOUS’ 함수 사용
= ANOMALOUS(gamma=1.,
detector =0.,
weight_decay=0.01,
lr=50,
epoch=-1,
gpu=0.1,
contamination=0) verbose
detector.fit(data)
ANOMALOUS(contamination=0.1, epoch=50, gamma=1.0, gpu=None, lr=0.01,
verbose=0, weight_decay=0.0)
class ANOMALOUSBase(nn.Module):
def __init__(self, w, r):
super(ANOMALOUSBase, self).__init__()
self.w = nn.Parameter(w)
self.r = nn.Parameter(r)
def forward(self, x):
return x @ self.w @ x, self.r
detector.decision_function(data)
tensor([0.0749, 0.0751, 0.0744, ..., 0.0823, 0.0795, 0.0759])
위에서 decision_function의 결과로 나오는 decision_score는 r의 제곱이며, 이 r은 model에서 나온 결과인데 이 model은 ANOMALOUSBase(w_init, r_init)의 결과이다.
이 r_init은 ANOMALOUS class 내에 있는 x, s, l, w_init, r_init = self.process_graph(data) 여기서 나온다.
-
return되는 거는 순서대로 x, s, laplacian, w_init, r_init
x
0] detector.process_graph(data)[
tensor([[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]])
0].shape detector.process_graph(data)[
torch.Size([2708, 1433])
\(X \in \mathbb{R}^{d \times n}\)
2708 = n
= the number of nodes
1433 = d
= dimensiotnalattribute
s
1] detector.process_graph(data)[
tensor([[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 1., ..., 0., 0., 0.],
[0., 1., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 1.],
[0., 0., 0., ..., 0., 1., 0.]])
1].shape detector.process_graph(data)[
torch.Size([2708, 2708])
\(A \in \mathbb{R}^{n \times n}\)
laplacian
2] detector.process_graph(data)[
tensor([[ 3., 0., 0., ..., 0., 0., 0.],
[ 0., 3., -1., ..., 0., 0., 0.],
[ 0., -1., 5., ..., 0., 0., 0.],
...,
[ 0., 0., 0., ..., 1., 0., 0.],
[ 0., 0., 0., ..., 0., 4., -1.],
[ 0., 0., 0., ..., 0., -1., 4.]])
2].shape detector.process_graph(data)[
torch.Size([2708, 2708])
generated Laplacian
\(\tilde{R} L \tilde{R}^T\)
w_init
3] detector.process_graph(data)[
tensor([[-1.2653, -0.4034, -0.9538, ..., 0.0590, -0.5046, 1.3625],
[-0.9114, -1.7293, -1.7256, ..., -0.9320, 0.3895, -1.3825],
[ 0.1706, -1.8746, -0.8415, ..., -0.7066, -0.5898, 1.0048],
...,
[-0.1567, 1.4902, 0.2324, ..., 0.0361, -1.5853, 1.7065],
[ 0.5649, -0.7216, 0.2013, ..., 0.8480, -1.6625, 2.0873],
[-1.3831, -0.5322, 1.8749, ..., -0.9381, -1.3708, 0.5029]])
3].shape detector.process_graph(data)[
torch.Size([1433, 2708])
r_init
4] detector.process_graph(data)[
tensor([[1.1096e-04, 1.9785e-04, 4.0764e-04, ..., 1.8104e-05, 6.9121e-03,
1.5390e-04],
[1.8015e-05, 4.5550e-05, 1.5299e-04, ..., 1.6462e-05, 8.0427e-03,
4.3956e-05],
[3.6342e-05, 1.1395e-04, 3.9942e-04, ..., 3.5643e-05, 1.8391e-03,
6.1595e-05],
...,
[5.4851e-05, 1.2452e-04, 5.3171e-04, ..., 1.6553e-05, 2.3374e-04,
2.9871e-05],
[2.3802e-04, 1.3623e-04, 5.8658e-04, ..., 2.2358e-05, 1.6297e-04,
4.2289e-04],
[3.6244e-04, 1.9283e-04, 8.4447e-04, ..., 3.0226e-05, 2.3002e-04,
6.4480e-04]])
4].shape detector.process_graph(data)[
torch.Size([2708, 1433])
Disney
Disney dataset is a network of movies including many attributes such as ratings, prices and the number of reviews
= load_data('disney')
data = data.y.bool() data.y
data.y
tensor([False, False, False, False, False, False, False, False, False, False,
False, False, False, False, False, False, False, False, True, False,
False, False, False, False, False, False, False, False, False, False,
False, False, False, False, False, False, True, False, False, False,
False, False, False, False, False, True, False, False, False, False,
False, False, False, False, False, False, False, False, False, False,
False, False, False, False, False, False, True, False, False, False,
False, False, False, False, False, False, False, False, False, False,
True, False, False, False, False, False, False, False, False, False,
False, False, False, False, False, False, False, False, False, False,
False, False, False, False, False, False, False, False, False, False,
False, False, False, False, False, False, False, False, False, False,
True, False, False, False])
sum(data.y*1)
tensor(6)
data
Data(x=[124, 28], edge_index=[2, 335], y=[124])
data.stores
[{'x': tensor([[ 2.4900e+00, 2.8000e-01, 3.0000e+00, ..., 2.0000e+00,
-1.0000e+00, 1.7333e-01],
[ 1.0000e-02, 2.3529e-01, 5.0000e+00, ..., 4.0000e+00,
-1.0000e+00, 8.8235e-02],
[ 6.3200e+00, 8.1633e-02, 5.0000e+00, ..., 5.0000e+00,
1.0000e+00, 1.0204e-01],
...,
[ 1.6450e+01, 1.2685e-02, 4.0000e+00, ..., 4.0000e+00,
5.0000e+00, 6.3425e-03],
[ 3.0640e+01, 1.1806e-01, 5.0000e+00, ..., 4.0000e+00,
5.0000e+00, 1.0417e-01],
[ 6.2800e+00, 2.7778e-01, 4.0000e+00, ..., 5.0000e+00,
-1.0000e+00, 1.1111e-01]]), 'edge_index': tensor([[ 17, 20, 28, 30, 47, 51, 63, 78, 18, 27, 56, 84, 85, 87,
31, 45, 50, 36, 89, 0, 51, 54, 57, 70, 72, 74, 77, 5,
45, 46, 50, 95, 12, 59, 64, 71, 76, 73, 6, 44, 67, 103,
107, 4, 34, 13, 17, 3, 29, 31, 38, 52, 58, 60, 79, 121,
4, 26, 73, 92, 115, 21, 29, 31, 58, 96, 51, 38, 52, 79,
20, 32, 97, 110, 19, 22, 23, 73, 15, 73, 37, 60, 17, 109,
24, 52, 5, 25, 45, 50, 95, 42, 96, 25, 45, 48, 95, 47,
12, 22, 23, 62, 63, 68, 69, 99, 108, 115, 121, 122, 58, 10,
48, 59, 64, 71, 76, 112, 118, 99, 115, 78, 65, 112, 5, 7,
9, 25, 45, 46, 50, 6, 14, 6, 66, 83, 10, 12, 61, 71,
123, 0, 77, 12, 48, 59, 64, 76, 95, 4, 23, 1, 17, 28,
30, 47, 51, 59, 13, 73, 0, 1, 8, 32, 39, 41, 72, 74,
77, 91, 100, 101, 105, 110, 82, 18, 27, 56, 85, 88, 18, 27,
87, 56, 2, 56, 85, 87, 44, 93, 2, 18, 27, 56, 84, 85,
87, 88, 1, 8, 28, 40, 41, 47, 51, 53, 55, 86, 94, 113,
119, 75, 11, 16, 53, 55, 80, 87, 52, 20, 39, 86, 101, 105,
106, 113, 22, 23, 39, 86, 98, 101, 105, 109, 113, 1, 30, 32,
39, 41, 70, 86, 105, 106, 110, 113, 1, 8, 17, 20, 28, 39,
47, 53, 54, 55, 72, 74, 81, 86, 91, 94, 98, 100, 101, 105,
106, 109, 113, 119, 7, 48, 89, 93, 95, 97, 98, 101, 106, 41,
15, 72, 91, 104, 9, 10, 12, 25, 33, 50, 59, 62, 65, 71,
76, 108, 116, 118, 122, 13, 20, 26, 92, 33, 43, 118, 11, 15,
16, 40, 53, 55, 94, 119, 11, 16, 40, 80, 94, 22, 23, 62,
78, 108, 120, 0, 8, 10, 57, 61, 66, 70, 77, 106, 112],
[ 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
3, 5, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9,
9, 9, 9, 9, 10, 10, 10, 10, 12, 13, 14, 14, 14, 14,
14, 15, 15, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21,
22, 22, 22, 22, 22, 24, 24, 24, 24, 24, 28, 29, 29, 29,
30, 30, 30, 30, 33, 34, 34, 34, 35, 35, 38, 38, 39, 39,
42, 42, 46, 46, 46, 46, 48, 49, 49, 50, 50, 50, 50, 51,
54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 60, 61,
61, 61, 61, 61, 61, 61, 61, 62, 62, 63, 64, 64, 65, 65,
65, 65, 65, 65, 65, 66, 66, 67, 67, 67, 69, 69, 69, 69,
69, 70, 70, 71, 71, 71, 71, 71, 71, 73, 73, 74, 74, 74,
74, 74, 74, 76, 78, 78, 81, 81, 81, 81, 81, 81, 81, 81,
81, 81, 81, 81, 81, 81, 83, 84, 84, 84, 84, 84, 85, 85,
85, 87, 88, 88, 88, 88, 89, 89, 90, 90, 90, 90, 90, 90,
90, 90, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91,
91, 93, 94, 94, 94, 94, 94, 94, 96, 98, 98, 98, 98, 98,
98, 98, 99, 99, 100, 100, 100, 100, 100, 100, 100, 101, 101, 101,
101, 101, 101, 101, 101, 101, 101, 101, 102, 102, 102, 102, 102, 102,
102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102,
102, 102, 102, 102, 103, 103, 103, 103, 103, 109, 109, 109, 109, 110,
111, 111, 111, 111, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112,
112, 112, 112, 112, 112, 113, 113, 113, 113, 114, 116, 116, 117, 117,
117, 117, 117, 117, 117, 117, 119, 119, 119, 119, 119, 122, 122, 122,
122, 122, 122, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123]]), 'y': tensor([False, False, False, False, False, False, False, False, False, False,
False, False, False, False, False, False, False, False, True, False,
False, False, False, False, False, False, False, False, False, False,
False, False, False, False, False, False, True, False, False, False,
False, False, False, False, False, True, False, False, False, False,
False, False, False, False, False, False, False, False, False, False,
False, False, False, False, False, False, True, False, False, False,
False, False, False, False, False, False, False, False, False, False,
True, False, False, False, False, False, False, False, False, False,
False, False, False, False, False, False, False, False, False, False,
False, False, False, False, False, False, False, False, False, False,
False, False, False, False, False, False, False, False, False, False,
True, False, False, False])}]
- node = 124
- ratio of anomalous = 4.8%
detector.fit(data)
ANOMALOUS(contamination=0.1, epoch=50, gamma=1.0, gpu=None, lr=0.01,
verbose=0, weight_decay=0.0)
detector.label_
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0,
0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
0, 0, 0, 0])
detector.decision_function(data)
tensor([1.1502e+08, 1.5140e+08, 1.5849e+08, 3.0639e+07, 6.9793e+07, 1.5558e+08,
6.0195e+07, 9.2430e+07, 1.1318e+08, 1.7366e+08, 1.3270e+08, 1.2029e+08,
1.1478e+08, 1.1221e+08, 9.5897e+07, 1.4153e+08, 2.7506e+08, 1.5072e+08,
1.4729e+08, 3.5039e+08, 2.3370e+08, 6.9244e+07, 1.0182e+08, 9.7996e+07,
7.4355e+07, 1.1415e+08, 2.1775e+08, 2.3359e+08, 2.6848e+08, 1.6824e+08,
1.9349e+08, 3.4573e+07, 4.0007e+08, 1.1317e+08, 8.1302e+07, 5.4817e+07,
3.6394e+07, 9.3513e+07, 1.1712e+08, 2.2044e+08, 1.5104e+08, 1.7797e+08,
6.9800e+07, 7.5331e+07, 1.1465e+08, 2.2185e+08, 2.1044e+08, 1.0733e+08,
1.8577e+08, 8.5166e+07, 1.3704e+08, 1.1458e+08, 5.8756e+07, 1.2532e+08,
9.6446e+07, 2.2280e+08, 1.4341e+08, 1.7808e+08, 4.2757e+07, 1.3760e+08,
6.2580e+07, 1.0512e+08, 1.8225e+08, 6.8715e+07, 1.8648e+08, 1.7304e+08,
4.9309e+07, 4.1520e+07, 1.0575e+08, 8.5466e+07, 1.7412e+08, 1.2775e+08,
9.4960e+07, 6.6485e+07, 1.2649e+08, 5.0889e+07, 8.6175e+07, 9.2335e+07,
7.0100e+07, 1.3570e+08, 1.4953e+08, 1.4573e+08, 1.8496e+07, 4.9970e+07,
2.2909e+08, 2.6657e+08, 2.2142e+08, 1.5258e+08, 1.4487e+08, 6.1357e+07,
1.8315e+08, 1.6355e+08, 7.6684e+07, 4.0956e+07, 1.5472e+08, 1.7501e+08,
5.4015e+07, 4.4187e+08, 1.8220e+08, 1.0258e+08, 1.5745e+08, 1.6791e+08,
1.5197e+08, 9.3803e+07, 1.1776e+08, 1.9169e+08, 1.0869e+08, 9.8122e+07,
8.1909e+07, 1.5611e+08, 1.4555e+08, 1.0674e+08, 1.1985e+08, 1.3628e+08,
6.4720e+07, 1.2194e+08, 2.5340e+08, 2.5233e+08, 1.1018e+08, 1.6862e+08,
1.9417e+08, 1.2930e+08, 1.0061e+08, 1.0417e+08])
detector.decision_function(data).shape
/home/csy/anaconda3/envs/temp_csy/lib/python3.8/site-packages/pygod/detector/anomalous.py:111: UserWarning: This detector is transductive only. Training from scratch with the input data.
warnings.warn("This detector is transductive only. "
torch.Size([124])
detector.predict(data)
/home/csy/anaconda3/envs/temp_csy/lib/python3.8/site-packages/pygod/detector/anomalous.py:111: UserWarning: This detector is transductive only. Training from scratch with the input data.
warnings.warn("This detector is transductive only. "
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0,
0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
0, 0, 0, 0])
sum(detector.predict())
tensor(13)