import pandas as pd
import numpy as np
import torch
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors
from torch_geometric.nn import GCNConv
import torch.nn.functional as F
from sklearn.cluster import KMeans- Age: Patients Age in years (Numeric)
- Sex: Gender (Male : 1; Female : 0) (Nominal)
- cp: Type of chest pain experienced by patient. This term categorized into 4 category.
- 0 typical angina, 1 atypical angina, 2 non- anginal pain, 3 asymptomatic (Nominal)
- trestbps: patient’s level of blood pressure at resting mode in mm/HG (Numerical)
- chol: Serum cholesterol in mg/dl (Numeric)
- fbs: Blood sugar levels on fasting > 120 mg/dl represents as 1 in case of true and 0 as false (Nominal)
- restecg: Result of electrocardiogram while at rest are represented in 3 distinct values
- 0 : Normal 1: having ST-T wave abnormality (T wave inversions and/or ST elevation or depression of > 0.05 mV)
- 2: showing probable or definite left ventricular hypertrophyby Estes’ criteria (Nominal)
- thalach: Maximum heart rate achieved (Numeric)
- exang: Angina induced by exercise 0 depicting NO 1 depicting Yes (Nominal)
- oldpeak: Exercise induced ST-depression in relative with the state of rest (Numeric)
- slope: ST segment measured in terms of slope during peak exercise
- 0: up sloping; 1: flat; 2: down sloping(Nominal)
- ca: The number of major vessels (0–3)(nominal)
- thal: A blood disorder called thalassemia
- 0: NULL 1: normal blood flow 2: fixed defect (no blood flow in some part of the heart) 3: reversible defect (a blood flow is observed but it is not normal(nominal)
- target: It is the target variable which we have to predict 1 means patient is suffering from heart disease and 0 means patient is normal.
df = pd.read_csv('../../../delete/Heart_disease_cleveland_new.csv')df.info()<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 age 303 non-null int64
1 sex 303 non-null int64
2 cp 303 non-null int64
3 trestbps 303 non-null int64
4 chol 303 non-null int64
5 fbs 303 non-null int64
6 restecg 303 non-null int64
7 thalach 303 non-null int64
8 exang 303 non-null int64
9 oldpeak 303 non-null float64
10 slope 303 non-null int64
11 ca 303 non-null int64
12 thal 303 non-null int64
13 target 303 non-null int64
dtypes: float64(1), int64(13)
memory usage: 33.3 KB
df.head(5)| age | sex | cp | trestbps | chol | fbs | restecg | thalach | exang | oldpeak | slope | ca | thal | target | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 63 | 1 | 0 | 145 | 233 | 1 | 2 | 150 | 0 | 2.3 | 2 | 0 | 2 | 0 |
| 1 | 67 | 1 | 3 | 160 | 286 | 0 | 2 | 108 | 1 | 1.5 | 1 | 3 | 1 | 1 |
| 2 | 67 | 1 | 3 | 120 | 229 | 0 | 2 | 129 | 1 | 2.6 | 1 | 2 | 3 | 1 |
| 3 | 37 | 1 | 2 | 130 | 250 | 0 | 0 | 187 | 0 | 3.5 | 2 | 0 | 1 | 0 |
| 4 | 41 | 0 | 1 | 130 | 204 | 0 | 2 | 172 | 0 | 1.4 | 0 | 0 | 1 | 0 |
X = df.drop(columns=['target']).valuesX = StandardScaler().fit_transform(X)knn = NearestNeighbors(n_neighbors=5)knnNearestNeighbors()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
NearestNeighbors()
knn.fit(X)NearestNeighbors()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
NearestNeighbors()
edges = knn.kneighbors(X,return_distance=False)edgesarray([[ 0, 124, 49, 196, 139],
[ 1, 37, 155, 235, 65],
[ 2, 24, 206, 76, 62],
...,
[300, 250, 64, 177, 127],
[301, 135, 125, 230, 276],
[302, 269, 283, 35, 190]])
edge_index = []
for i in range(len(edges)):
for j in edges[i]:
edge_index.append([i,j])len(edge_index)1515
torch.tensor(edge_index).shapetorch.Size([1515, 2])
torch.tensor(edge_index).t().shapetorch.Size([2, 1515])
torch.tensor(edge_index).t().contiguous().shapetorch.Size([2, 1515])
edge_index = torch.tensor(edge_index).t().contiguous()x = torch.tensor(X, dtype=torch.float)xtensor([[ 0.9487, 0.6862, -2.2518, ..., 2.2746, -0.7111, 0.1762],
[ 1.3920, 0.6862, 0.8780, ..., 0.6491, 2.5049, -0.8708],
[ 1.3920, 0.6862, 0.8780, ..., 0.6491, 1.4329, 1.2232],
...,
[ 0.2838, 0.6862, 0.8780, ..., 0.6491, 0.3609, 1.2232],
[ 0.2838, -1.4573, -1.2085, ..., 0.6491, 0.3609, -0.8708],
[-1.8217, 0.6862, -0.1653, ..., -0.9764, -0.7111, -0.8708]])
class GCNencode(torch.nn.Module):
def __init__(self, in_channels):
super().__init__()
self.conv1 = GCNConv(in_channels, 64)
self.conv2 = GCNConv(64,32)
self.conv3 = GCNConv(32,13)
def forward(self, x, edge_index):
x = self.conv1(x,edge_index)
x = F.relu(x)
x = self.conv2(x,edge_index)
x = F.relu(x)
x = self.conv3(x,edge_index)
return x x.shapetorch.Size([303, 13])
x.shape[0]303
x.shape[1]13
model = GCNencode(x.shape[1])modelGCNencode(
(conv1): GCNConv(13, 64)
(conv2): GCNConv(64, 32)
(conv3): GCNConv(32, 13)
)
optimizer = torch.optim.Adam(model.parameters(), lr = 0.01)x[:,:13]tensor([[ 0.9487, 0.6862, -2.2518, ..., 2.2746, -0.7111, 0.1762],
[ 1.3920, 0.6862, 0.8780, ..., 0.6491, 2.5049, -0.8708],
[ 1.3920, 0.6862, 0.8780, ..., 0.6491, 1.4329, 1.2232],
...,
[ 0.2838, 0.6862, 0.8780, ..., 0.6491, 0.3609, 1.2232],
[ 0.2838, -1.4573, -1.2085, ..., 0.6491, 0.3609, -0.8708],
[-1.8217, 0.6862, -0.1653, ..., -0.9764, -0.7111, -0.8708]])
for epoch in range(100):
model.train()
optimizer.zero_grad()
z = model(x, edge_index)
loss = F.mse_loss(z,x[:,:13])
loss.backward()
optimizer.step()model.eval()GCNencode(
(conv1): GCNConv(13, 64)
(conv2): GCNConv(64, 32)
(conv3): GCNConv(32, 13)
)
embeddings = model(x,edge_index).detach().numpy()embeddingsarray([[ 0.93777424, 1.1018754 , -2.2913668 , ..., 1.904715 ,
-0.5608 , -0.23465104],
[ 1.4721875 , 0.82115763, -0.11787386, ..., 0.45280465,
2.554476 , -0.60368335],
[ 0.7943949 , 0.8147143 , 0.9416017 , ..., 0.30953175,
1.5317379 , 1.3029628 ],
...,
[ 0.28482667, 0.437107 , 0.8282606 , ..., 1.0481589 ,
-0.33025122, 0.8082806 ],
[ 0.0184263 , -1.4629475 , -1.0692531 , ..., 0.26454 ,
-0.32980496, -0.8803086 ],
[-1.9536707 , 0.6859256 , -0.09410449, ..., -1.2229915 ,
-0.93635803, -0.6885667 ]], dtype=float32)
embeddings.shapetorch.Size([303, 13])
kmeans = KMeans(n_clusters=2)kmeansKMeans(n_clusters=2)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
KMeans(n_clusters=2)