import pandas as pd
import kan
import matplotlib.pyplot as plt
import numpy as np
import torch
from kan import KAN, create_dataset
from sklearn.datasets import make_classification, load_iris
from sklearn.model_selection import train_test_split
import torchvision
from fastai.vision.all import *
ref: 1
Import
Data
= load_iris() iris
KAN
Symbolic regression
특정 함수로 표현 가능 \(\rightarrow\) 아핀 변환으로 고정 \(\rightarrow\) 파라메터 수 줄이기 가능 \(\rightarrow\) 재학습
= lambda x: torch.exp(torch.sin(torch.pi*x[:,[0]]) + x[:,[1]]**2) f
0)
torch.manual_seed(= torch.rand(1000, 2) * 2 - 1 # Random points in [-1, 1]^2
X = f(X)
y = train_test_split(X, y, test_size=0.2, random_state=0)
X_train, X_test, y_train, y_test = dict(train_input=X_train, train_label=y_train, test_input=X_test, test_label=y_test) dataset
= plt.figure()
fig = fig.add_subplot(111, projection='3d')
ax 'train_input'][:, 0], dataset['train_input'][:, 1], dataset['train_label'], c='c', marker='o')
ax.scatter(dataset[ plt.show()
- \(2\)
- \(x_1, x_2\)
- \(5\)
- \(2x+1 = 5\)
- \(1\)
- \(2-1\)
= KAN(width=[2,5,1], grid=3, k=3, seed=0) model
'train_input'])
model(dataset[=100, in_vars=['$x_1$', '$x_2$'], out_vars=['y']) # beta controls the transparency of the plot model.plot(beta
= model.train(dataset, opt="LBFGS", steps=20, lamb=0.01, lamb_entropy=10.) res
train loss: 1.09e-01 | test loss: 9.79e-02 | reg: 2.11e+01 : 100%|██| 20/20 [00:04<00:00, 4.61it/s]
model.plot()
가지치기
= model.prune()
model2 'train_input'])
model2(dataset[ model2.plot()
0,0,0) model2.suggest_symbolic(
function , r2
sin , 0.9966473579406738
gaussian , 0.9244089722633362
tanh , 0.8837200999259949
sigmoid , 0.8783647418022156
arctan , 0.8637073636054993
('sin',
(<function kan.utils.<lambda>(x)>, <function kan.utils.<lambda>(x)>),
0.9966473579406738)
Classification
iris data
from sklearn.preprocessing import OneHotEncoder
= OneHotEncoder()
enc
# Iris dataset
= load_iris()
iris = torch.tensor(iris.data, dtype=torch.float32)
X = enc.fit_transform(iris.target.reshape(-1, 1)).toarray()
y = torch.tensor(y, dtype=torch.float32)
y
= train_test_split(X, y, test_size=0.2, random_state=0)
X_train, X_test, y_train, y_test = dict(train_input=X_train, train_label=y_train, test_input=X_test, test_label=y_test) dataset
\(4 \times 2 + 1 =9\)
X.shape, y.shape
(torch.Size([150, 4]), torch.Size([150, 3]))
# Create KAN
= KAN(width=[4,9,3], grid=3, k=3, seed=0)
model
model(X_train) model.plot()
# Train KAN
from sklearn.metrics import accuracy_score
def train_acc():
return torch.mean((torch.argmax(model(dataset['train_input']), dim=1) == torch.argmax(dataset['train_label'], dim=1)).float())
def test_acc():
return torch.mean((torch.argmax(model(dataset['test_input']), dim=1) == torch.argmax(dataset['test_label'], dim=1)).float())
= model.train(dataset, opt="LBFGS", metrics=[train_acc, test_acc], steps=20, lamb=0.01, lamb_entropy=10., loss_fn=torch.nn.CrossEntropyLoss())
res
# Accuracy plot
= plt.subplots()
fig, ax "train_acc"], label="train")
ax.plot(res["test_acc"], label="test")
ax.plot(res[
ax.legend() plt.show()
train loss: 2.88e-01 | test loss: 2.52e-01 | reg: 6.61e+01 : 100%|██| 20/20 [00:08<00:00, 2.35it/s]
ECG
- abnormal = 0, normal = 1
- ~139 열까지는 시간에 따른 심전도
= pd.read_csv('http://storage.googleapis.com/download.tensorflow.org/data/ecg.csv', header=None)
dataframe = dataframe.values
raw_data # dataframe.head()
Normalize the data
def normalize(df):
return (df - df.min()) / (df.max() - df.min())
= np.array(normalize(raw_data[:, 0:-1])) raw_dat
from sklearn.preprocessing import OneHotEncoder
= OneHotEncoder()
enc
= torch.tensor(raw_dat[:, :], dtype=torch.float32)
X = enc.fit_transform(raw_data[:, -1].reshape(-1, 1)).toarray()
y = torch.tensor(y, dtype=torch.float32) y
from sklearn.decomposition import PCA
= PCA(n_components=4) pca
= torch.tensor(pca.fit_transform(X)) X1
X1.shape, y.shape
(torch.Size([4998, 4]), torch.Size([4998, 2]))
= train_test_split(X1, y, test_size=0.3, random_state=0)
X_train, X_test, y_train, y_test = dict(train_input=X_train, train_label=y_train, test_input=X_test, test_label=y_test) dataset
= KAN(width=[4,9,2], grid=3, k=3, seed=0)
model
model(X_train) model.plot()
from sklearn.metrics import accuracy_score
def train_acc():
return torch.mean((torch.argmax(model(dataset['train_input']), dim=1) == torch.argmax(dataset['train_label'], dim=1)).float())
def test_acc():
return torch.mean((torch.argmax(model(dataset['test_input']), dim=1) == torch.argmax(dataset['test_label'], dim=1)).float())
= model.train(dataset, opt="LBFGS", metrics=[train_acc, test_acc], steps=20, lamb=0.01, lamb_entropy=10., loss_fn=torch.nn.CrossEntropyLoss())
res
# Accuracy plot
= plt.subplots()
fig, ax "train_acc"], label="train")
ax.plot(res["test_acc"], label="test")
ax.plot(res[
ax.legend() plt.show()
train loss: 2.69e-01 | test loss: 2.88e-01 | reg: 2.94e+01 : 100%|██| 20/20 [00:19<00:00, 1.05it/s]
= pd.read_csv('http://storage.googleapis.com/download.tensorflow.org/data/ecg.csv', header=None)
dataframe = dataframe.values
raw_data # dataframe.head()
Normalize the data
def normalize(df):
return (df - df.min()) / (df.max() - df.min())
= np.array(normalize(raw_data[:, 0:-1])) raw_dat
from sklearn.preprocessing import OneHotEncoder
= OneHotEncoder()
enc
= torch.tensor(raw_dat[:, :], dtype=torch.float32)
X = enc.fit_transform(raw_data[:, -1].reshape(-1, 1)).toarray()
y = torch.tensor(y, dtype=torch.float32) y
from sklearn.decomposition import PCA
= PCA(n_components=4) pca
= torch.tensor(pca.fit_transform(X)) X1
= train_test_split(X1, y, test_size=0.3, random_state=0)
X_train, X_test, y_train, y_test = dict(train_input=X_train, train_label=y_train, test_input=X_test, test_label=y_test) dataset
= KAN(width=[4,9,2], grid=3, k=3, seed=0) model
'train_input'])
model(dataset[=100, in_vars=['$x_1$', '$x_2$', '$x_3$', '$x_4$'], out_vars=['$y_1$','$y_2$']) # beta controls the transparency of the plot model.plot(beta
= model.train(dataset, opt="LBFGS", steps=20, lamb=0.01, lamb_entropy=10.) res
train loss: 2.06e-01 | test loss: 2.10e-01 | reg: 3.68e+01 : 100%|██| 20/20 [00:17<00:00, 1.13it/s]
model.plot()
가지치기
= model.prune()
model2 'train_input'])
model2(dataset[ model2.plot()
0,0,0) model2.suggest_symbolic(
function , r2
sigmoid , 0.9998123908724215
tanh , 0.9998079275604033
sin , 0.9996524257432485
arctan , 0.9995603813134804
gaussian , 0.9994074762181313
('sigmoid', (<function kan.utils.<lambda>(x)>, sigmoid), 0.9998123908724215)
from scipy.misc import electrocardiogram
= electrocardiogram()
ecg ecg
DeprecationWarning: scipy.misc.electrocardiogram has been deprecated in SciPy v1.10.0; and will be completely removed in SciPy v1.12.0. Dataset methods have moved into the scipy.datasets module. Use scipy.datasets.electrocardiogram instead.
ecg = electrocardiogram()
array([-0.245, -0.215, -0.185, ..., -0.405, -0.395, -0.385])
= ['x','x^2','x^3','x^4','exp','log','sqrt','tanh','sin','tan','abs']
lib =lib)
model.auto_symbolic(lib= model.symbolic_formula()[0][0] formula
skipping (0,0,0) since already symbolic
skipping (0,0,1) since already symbolic
skipping (0,0,2) since already symbolic
skipping (0,0,3) since already symbolic
skipping (0,0,4) since already symbolic
skipping (0,0,5) since already symbolic
skipping (0,0,6) since already symbolic
skipping (0,0,7) since already symbolic
skipping (0,0,8) since already symbolic
skipping (0,1,0) since already symbolic
skipping (0,1,1) since already symbolic
skipping (0,1,2) since already symbolic
skipping (0,1,3) since already symbolic
skipping (0,1,4) since already symbolic
skipping (0,1,5) since already symbolic
skipping (0,1,6) since already symbolic
skipping (0,1,7) since already symbolic
skipping (0,1,8) since already symbolic
skipping (0,2,0) since already symbolic
skipping (0,2,1) since already symbolic
skipping (0,2,2) since already symbolic
skipping (0,2,3) since already symbolic
skipping (0,2,4) since already symbolic
skipping (0,2,5) since already symbolic
skipping (0,2,6) since already symbolic
skipping (0,2,7) since already symbolic
skipping (0,2,8) since already symbolic
skipping (0,3,0) since already symbolic
skipping (0,3,1) since already symbolic
skipping (0,3,2) since already symbolic
skipping (0,3,3) since already symbolic
skipping (0,3,4) since already symbolic
skipping (0,3,5) since already symbolic
skipping (0,3,6) since already symbolic
skipping (0,3,7) since already symbolic
skipping (0,3,8) since already symbolic
skipping (1,0,0) since already symbolic
skipping (1,0,1) since already symbolic
skipping (1,1,0) since already symbolic
skipping (1,1,1) since already symbolic
skipping (1,2,0) since already symbolic
skipping (1,2,1) since already symbolic
skipping (1,3,0) since already symbolic
skipping (1,3,1) since already symbolic
skipping (1,4,0) since already symbolic
skipping (1,4,1) since already symbolic
skipping (1,5,0) since already symbolic
skipping (1,5,1) since already symbolic
skipping (1,6,0) since already symbolic
skipping (1,6,1) since already symbolic
skipping (1,7,0) since already symbolic
skipping (1,7,1) since already symbolic
skipping (1,8,0) since already symbolic
skipping (1,8,1) since already symbolic
https://docs.scipy.org/doc/scipy/reference/generated/scipy.misc.electrocardiogram.html
from scipy.misc import electrocardiogram
= electrocardiogram()
ecg = 360
fs = (torch.tensor(np.arange(ecg.size) / fs)).reshape(-1,1)
time = torch.tensor(ecg.reshape(-1,1)) ecg
DeprecationWarning: scipy.misc.electrocardiogram has been deprecated in SciPy v1.10.0; and will be completely removed in SciPy v1.12.0. Dataset methods have moved into the scipy.datasets module. Use scipy.datasets.electrocardiogram instead.
ecg = electrocardiogram()
= train_test_split(time, ecg, test_size=0.3, random_state=0)
X_train, X_test, y_train, y_test = dict(train_input=X_train, train_label=y_train, test_input=X_test, test_label=y_test) dataset
= KAN(width=[1,2], grid=200, k=3, noise_scale=0.1, sp_trainable=False, sb_trainable=False) model
="LBFGS", steps=20, lamb=0.01, lamb_entropy=10.) model.train(dataset, opt
train loss: 4.09e-01 | test loss: 4.07e-01 | reg: 1.08e+01 : 100%|██| 20/20 [02:27<00:00, 7.39s/it]
{'train_loss': [array(2.30627008),
array(0.80433106),
array(0.5503011),
array(0.53079867),
array(0.52704084),
array(0.52385807),
array(0.51145847),
array(0.47409133),
array(0.42562252),
array(0.4092666),
array(0.40882232),
array(0.40879502),
array(0.40879192),
array(0.40878918),
array(0.40877626),
array(0.40873278),
array(0.40871899),
array(0.40871734),
array(0.40871611),
array(0.40871634)],
'test_loss': [array(2.34457763),
array(0.82191735),
array(0.55857336),
array(0.53869434),
array(0.53538231),
array(0.53201989),
array(0.51609869),
array(0.47529749),
array(0.42279748),
array(0.40744141),
array(0.40717845),
array(0.40715045),
array(0.40715205),
array(0.407151),
array(0.40709911),
array(0.40717846),
array(0.40709756),
array(0.40710753),
array(0.40710321),
array(0.40708267)],
'reg': [array(10.7843032),
array(10.78427902),
array(10.78425403),
array(10.7842325),
array(10.78421462),
array(10.7841614),
array(10.7837926),
array(10.78307848),
array(10.78178383),
array(10.78148019),
array(10.78148019),
array(10.78143652),
array(10.78139944),
array(10.78132401),
array(10.78064275),
array(10.7786567),
array(10.77790221),
array(10.77773408),
array(10.77766523),
array(10.77742246)]}
model.plot()
= ['x','x^2','x^3','x^4','exp','log','sqrt','tanh','sin','tan','abs']
lib =lib)
model.auto_symbolic(lib= model.symbolic_formula()[0][0] formula
fixing (0,0,0) with sin, r2=0.057382220367117785
fixing (0,0,1) with sin, r2=0.057381458884463804