import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import warnings
warnings.simplefilter("ignore", np.ComplexWarning)
from haversine import haversine
from IPython.display import HTMLImport
from matplotlib import cm
from pygsp import graphs, filters, plotting, utils
import plotly.graph_objects as go
import rpy2
import rpy2.robjects as ro
from rpy2.robjects.vectors import FloatVector
from rpy2.robjects.packages import importrimport warnings
warnings.filterwarnings("ignore")Linear
np.random.seed(6)
n = 1000
eta_sparsity = 0.05
epsilon = np.around(np.random.normal(size=n),15)
signal = np.random.choice(np.concatenate((np.random.uniform(-7, -5, round(n*eta_sparsity/2)).round(15), np.random.uniform(5, 7, round(n*eta_sparsity/2)).round(15), np.repeat(0, n - round(n*eta_sparsity)))), n)
eta = signal + epsilon
outlier_true_linear= signal.copy()
outlier_true_linear = list(map(lambda x: 1 if x!=0 else 0,outlier_true_linear))
x_1 = np.linspace(0,2,n)
y1_1 = 5 * x_1
y_1 = y1_1 + eta # eta = signal + epsilon
_df=pd.DataFrame({'x':x_1, 'y':y_1})
w=np.zeros((n,n))
for i in range(n):
for j in range(n):
if i==j :
w[i,j] = 0
elif np.abs(i-j) <= 1 :
w[i,j] = 1
index_of_trueoutlier_bool = signal!=0class Linear:
def __init__(self,df):
self.df = df
self.y = df.y.to_numpy()
self.x = df.x.to_numpy()
self.n = len(self.y)
self.W = w
def _eigen(self):
d= self.W.sum(axis=1)
D= np.diag(d)
self.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
self.lamb, self.Psi = np.linalg.eigh(self.L)
self.Lamb = np.diag(self.lamb)
def fit(self,sd=20): # fit with ebayesthresh
self._eigen()
self.ybar = self.Psi.T @ self.y # fbar := graph fourier transform of f
self.power = self.ybar**2
ebayesthresh = importr('EbayesThresh').ebayesthresh
self.power_threshed=np.array(ebayesthresh(FloatVector(self.power),sd=sd))
self.ybar_threshed = np.where(self.power_threshed>0,self.ybar,0)
self.yhat = self.Psi@self.ybar_threshed
self.df = self.df.assign(yHat = self.yhat)
self.df = self.df.assign(Residual = self.df.y- self.df.yHat)
def fig(self,ymin=-5,ymax=20,cuts=0,cutf=1495):
outlier_GODE_linear_old = (self.df['Residual']**2).tolist()
sorted_data = sorted(outlier_GODE_linear_old,reverse=True)
index = int(len(sorted_data) * eta_sparsity)
five_percent = sorted_data[index]
outlier_GODE_linear = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_linear_old))
outlier_GODE_linear_index = [i for i, value in enumerate(outlier_GODE_linear_old) if value > five_percent]
fig,ax = plt.subplots(figsize=(10,10))
ax.scatter(self.x,self.y,color='gray',s=50)
# ax.scatter(self.x[index_of_trueoutlier_bool],self.y[index_of_trueoutlier_bool],color='red',s=50)
ax.scatter(self.x[index_of_trueoutlier_bool],self.y[index_of_trueoutlier_bool],color='red',s=100)
ax.plot(self.x[cuts:cutf],self.yhat[cuts:cutf], '--k',lw=3)
ax.scatter(self.df.x[outlier_GODE_linear_index],self.df.y[outlier_GODE_linear_index],color='red',s=550,facecolors='none', edgecolors='r')
fig.tight_layout()
# fig.savefig('fig1_231103.eps',format='eps')
# fig.savefig('linear_240623.pdf',format='pdf')_Linear = Linear(_df)
_Linear.fit(sd=20)_Linear.fig()
Orbit
n = 1000
eta_sparsity = 0.05
random_seed=77np.random.seed(777)
epsilon = np.around(np.random.normal(size=n),15)
signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
eta = signal + epsilon
pi=np.pi
ang=np.linspace(-pi,pi-2*pi/n,n)
r=5+np.cos(np.linspace(0,12*pi,n))
vx=r*np.cos(ang)
vy=r*np.sin(ang)
f1=10*np.sin(np.linspace(0,6*pi,n))
f = f1 + eta
_df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
outlier_true_orbit = signal.copy()
outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
index_of_trueoutlier_bool = signal!=0class Orbit:
def __init__(self,df):
self.df = df
self.f = df.f.to_numpy()
self.f1 = df.f1.to_numpy()
self.x = df.x.to_numpy()
self.y = df.y.to_numpy()
self.n = len(self.f)
self.theta= None
def get_distance(self):
self.D = np.zeros([self.n,self.n])
locations = np.stack([self.x, self.y],axis=1)
for i in tqdm.tqdm(range(self.n)):
for j in range(i,self.n):
self.D[i,j]=np.linalg.norm(locations[i]-locations[j])
self.D = self.D + self.D.T
def get_weightmatrix(self,theta=1,beta=0.5,kappa=4000):
self.theta = theta
dist = np.where(self.D < kappa,self.D,0)
self.W = np.exp(-(dist/self.theta)**2)
def _eigen(self):
d= self.W.sum(axis=1)
D= np.diag(d)
self.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
self.lamb, self.Psi = np.linalg.eigh(self.L)
self.Lamb = np.diag(self.lamb)
def fit(self,sd=5): # fit with ebayesthresh
self._eigen()
self.fbar = self.Psi.T @ self.f # fbar := graph fourier transform of f
self.power = self.fbar**2
ebayesthresh = importr('EbayesThresh').ebayesthresh
self.power_threshed=np.array(ebayesthresh(FloatVector(self.fbar**2),sd=sd))
self.fbar_threshed = np.where(self.power_threshed>0,self.fbar,0)
self.fhat = self.Psi@self.fbar_threshed
self.df = self.df.assign(fHat = self.fhat)
self.df = self.df.assign(Residual = self.df.f- self.df.fHat)
def fig(self):
outlier_GODE_one_old = (_Orbit.df['Residual']**2).tolist()
sorted_data = sorted(outlier_GODE_one_old,reverse=True)
index = int(len(sorted_data) * 0.05)
five_percent = sorted_data[index]
outlier_GODE_one = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_one_old))
outlier_GODE_one_index = [i for i, value in enumerate(outlier_GODE_one_old) if value > five_percent]
# fig, (ax1,ax2,ax3) = plt.subplots(1,3,figsize=(30,15),subplot_kw={"projection":"3d"})
# ax1.grid(False)
# ax1.scatter3D(self.x[~index_of_trueoutlier_bool],self.y[~index_of_trueoutlier_bool],self.f[~index_of_trueoutlier_bool],zdir='z',color='gray',alpha=0.99,zorder=1)
# ax1.scatter3D(self.x[index_of_trueoutlier_bool],self.y[index_of_trueoutlier_bool],self.f[index_of_trueoutlier_bool],zdir='z',s=75,color='red',alpha=0.99,zorder=2)
# ax1.scatter3D(self.x[outlier_GODE_one_index],self.y[outlier_GODE_one_index],self.f[outlier_GODE_one_index],edgecolors='red',zdir='z',s=300,facecolors='none',alpha=0.99,zorder=3)
# ax1.plot3D(self.x,self.y,self.f1,'--k',lw=3,zorder=10)
# ax1.xaxis.pane.fill = False
# ax1.yaxis.pane.fill = False
# ax1.zaxis.pane.fill = False
# ax1.view_init(elev=30., azim=60)
# ax2.grid(False)
# ax2.scatter3D(self.x[~index_of_trueoutlier_bool],self.y[~index_of_trueoutlier_bool],self.f[~index_of_trueoutlier_bool],zdir='z',color='gray',alpha=0.99,zorder=1)
# ax2.scatter3D(self.x[index_of_trueoutlier_bool],self.y[index_of_trueoutlier_bool],self.f[index_of_trueoutlier_bool],zdir='z',s=75,color='red',alpha=0.99,zorder=2)
# ax2.scatter3D(self.x[outlier_GODE_one_index],self.y[outlier_GODE_one_index],self.f[outlier_GODE_one_index],edgecolors='red',zdir='z',s=300,facecolors='none',alpha=0.99,zorder=3)
# ax2.plot3D(self.x,self.y,self.f1,'--k',lw=3,zorder=10)
# ax2.xaxis.pane.fill = False
# ax2.yaxis.pane.fill = False
# ax2.zaxis.pane.fill = False
# ax2.view_init(elev=30., azim=40)
# ax3.grid(False)
# ax3.scatter3D(self.x[~index_of_trueoutlier_bool],self.y[~index_of_trueoutlier_bool],self.f[~index_of_trueoutlier_bool],zdir='z',color='gray',alpha=0.99,zorder=1)
# ax3.scatter3D(self.x[index_of_trueoutlier_bool],self.y[index_of_trueoutlier_bool],self.f[index_of_trueoutlier_bool],zdir='z',s=75,color='red',alpha=0.99,zorder=2)
# ax3.scatter3D(self.x[outlier_GODE_one_index],self.y[outlier_GODE_one_index],self.f[outlier_GODE_one_index],edgecolors='red',zdir='z',s=300,facecolors='none',alpha=0.99,zorder=3)
# ax3.plot3D(self.x,self.y,self.f1,'--k',lw=3,zorder=10)
# ax3.xaxis.pane.fill = False
# ax3.yaxis.pane.fill = False
# ax3.zaxis.pane.fill = False
# ax3.view_init(elev=30., azim=10)
# fig.savefig('fig2_231129.eps',format='eps')
# fig.savefig('orbit_231129_3.pdf',format='pdf')# _Orbit = Orbit(_df)# _Orbit.get_distance()# _Orbit.get_weightmatrix(theta=(_Orbit.D[_Orbit.D>0].mean()),kappa=2500)
# _Orbit.fit(sd=15)# %%capture --no-display
# _Orbit.fig()from sklearn import metrics_Orbit = Orbit(_df)
_Orbit.get_distance()
_Orbit.get_weightmatrix(theta=(_Orbit.D[_Orbit.D>0].mean()),kappa=10)
_Orbit.fit(sd=15)
outlier_GODE_orbit_old = (_Orbit.df['Residual']**2).tolist()
sorted_data = sorted(outlier_GODE_orbit_old,reverse=True)
index = int(len(sorted_data) * eta_sparsity)
five_percent = sorted_data[index]
outlier_GODE_orbit = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_orbit_old))
fpr, tpr, thresh = metrics.roc_curve(outlier_true_orbit, outlier_GODE_orbit)
AUC = auc(fpr, tpr)
AUC100%|██████████| 1000/1000 [00:01<00:00, 740.81it/s]
NameError: name 'auc' is not defined
kappa = 1.21
n_values = list([1000,5000,10000])
eta_sparsity_list = list([0.01,0.05,0.1])
random_seed=77
tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC","N","Contamination"])for n in n_values:
for eta_sparsity in eta_sparsity_list:
np.random.seed(777)
epsilon = np.around(np.random.normal(size=n),15)
signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
eta = signal + epsilon
pi=np.pi
ang=np.linspace(-pi,pi-2*pi/n,n)
r=5+np.cos(np.linspace(0,12*pi,n))
vx=r*np.cos(ang)
vy=r*np.sin(ang)
f1=10*np.sin(np.linspace(0,6*pi,n))
f = f1 + eta
_df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
outlier_true_orbit = signal.copy()
outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
index_of_trueoutlier_bool = signal!=0
_Orbit = Orbit(_df)
_Orbit.get_distance()
_Orbit.get_weightmatrix(theta=(_Orbit.D[_Orbit.D>0].mean()),kappa=kappa)
_Orbit.fit(sd=15)
outlier_GODE_orbit_old = (_Orbit.df['Residual']**2).tolist()
sorted_data = sorted(outlier_GODE_orbit_old,reverse=True)
index = int(len(sorted_data) * eta_sparsity)
five_percent = sorted_data[index]
outlier_GODE_orbit = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_orbit_old))
fpr, tpr, thresh = roc_curve(outlier_true_orbit, outlier_GODE_orbit)
fold_AUC = auc(fpr, tpr)
tab = pd.concat([tab,
pd.DataFrame({"n":[n],"kappa":[kappa],"eta_sparsity":[eta_sparsity],"AUC":[fold_AUC]})])100%|██████████| 1000/1000 [00:01<00:00, 694.62it/s]
NameError: name 'roc_curve' is not defined
tabplt.figure(figsize=(8, 6)) # 그림 크기 설정 (선택사항)
sns.heatmap(np.array(tab.AUC).reshape(10,10), annot=True, cmap='coolwarm', square=True)
plt.show()Bunny
with open("../../2_research/Bunny.pkl", "rb") as file:
loaded_obj = pickle.load(file)_df = pd.DataFrame({'x':loaded_obj['x'],'y':loaded_obj['y'],'z':loaded_obj['z'],'fnoise':loaded_obj['f']+loaded_obj['noise'],'f':loaded_obj['f'],'noise':loaded_obj['noise']})
outlier_true_bunny = loaded_obj['unif'].copy()
outlier_true_bunny = list(map(lambda x: 1 if x !=0 else 0,outlier_true_bunny))
index_of_trueoutlier_bool_bunny = loaded_obj['unif']!=0
_W = loaded_obj['W'].copy()class BUNNY:
def __init__(self,df):
self.df = df
self.f = df.f.to_numpy()
self.z = df.z.to_numpy()
self.x = df.x.to_numpy()
self.y = df.y.to_numpy()
self.noise = df.noise.to_numpy()
self.fnoise = self.f + self.noise
self.W = _W
self.n = len(self.f)
self.theta= None
def _eigen(self):
d= self.W.sum(axis=1)
D= np.diag(d)
self.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
self.lamb, self.Psi = np.linalg.eigh(self.L)
self.Lamb = np.diag(self.lamb)
def fit(self,sd=5): # fit with ebayesthresh
self._eigen()
self.fbar = self.Psi.T @ self.fnoise # fbar := graph fourier transform of f
self.power = self.fbar**2
ebayesthresh = importr('EbayesThresh').ebayesthresh
self.power_threshed=np.array(ebayesthresh(FloatVector(self.power),sd=sd))
self.fbar_threshed = np.where(self.power_threshed>0,self.fbar,0)
self.fhat = self.Psi@self.fbar_threshed
self.df = self.df.assign(fnoise = self.fnoise)
self.df = self.df.assign(fHat = self.fhat)
self.df = self.df.assign(Residual = self.df.f + self.df.noise - self.df.fHat)
def fig(self):
outlier_GODE_one_old = (self.df['Residual']**2).tolist()
sorted_data = sorted(outlier_GODE_one_old,reverse=True)
index = int(len(sorted_data) * 0.05)
five_percent = sorted_data[index]
outlier_GODE_one = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_one_old))
outlier_GODE_one_index = [i for i, value in enumerate(outlier_GODE_one_old) if value > five_percent]
fig = plt.figure(figsize=(30,12),dpi=400)
ax1 = fig.add_subplot(251, projection='3d')
ax1.grid(False)
ax1.scatter3D(self.x,self.y,self.z,c='gray',zdir='z',alpha=0.5,marker='.')
ax1.view_init(elev=60., azim=-90)
ax2= fig.add_subplot(252, projection='3d')
ax2.grid(False)
ax2.scatter3D(self.x,self.y,self.z,c=self.f,cmap='hsv',zdir='z',marker='.',alpha=0.5,vmin=-12,vmax=10)
ax2.view_init(elev=60., azim=-90)
ax3= fig.add_subplot(253, projection='3d')
ax3.grid(False)
ax3.scatter3D(self.x,self.y,self.z,c=self.fnoise,cmap='hsv',zdir='z',marker='.',alpha=0.5,vmin=-12,vmax=10)
ax3.view_init(elev=60., azim=-90)
ax4= fig.add_subplot(254, projection='3d')
ax4.grid(False)
ax4.scatter3D(self.x,self.y,self.z,c=self.fnoise,cmap='hsv',zdir='z',marker='.',vmin=-12,vmax=10,s=1)
ax4.scatter3D(self.x[index_of_trueoutlier_bool_bunny],self.y[index_of_trueoutlier_bool_bunny],self.z[index_of_trueoutlier_bool_bunny],c=self.fnoise[index_of_trueoutlier_bool_bunny],cmap='hsv',zdir='z',marker='.',s=50)
ax4.view_init(elev=60., azim=-90)
ax5= fig.add_subplot(255, projection='3d')
ax5.grid(False)
ax5.scatter3D(self.x,self.y,self.z,c=self.fnoise,cmap='hsv',zdir='z',marker='.',vmin=-12,vmax=10,s=1)
ax5.scatter3D(self.x[index_of_trueoutlier_bool_bunny],self.y[index_of_trueoutlier_bool_bunny],self.z[index_of_trueoutlier_bool_bunny],c=self.fnoise[index_of_trueoutlier_bool_bunny],cmap='hsv',zdir='z',marker='.',s=50)
ax5.scatter3D(self.df.x[outlier_GODE_one_index],self.df.y[outlier_GODE_one_index],self.df.z[outlier_GODE_one_index],zdir='z',s=550,marker='.',edgecolors='red',facecolors='none')
ax5.view_init(elev=60., azim=-90)
ax6 = fig.add_subplot(256, projection='3d')
ax6.grid(False)
ax6.scatter3D(self.x,self.y,self.z,c='gray',zdir='z',alpha=0.5,marker='.')
ax6.view_init(elev=-60., azim=-90)
ax7= fig.add_subplot(257, projection='3d')
ax7.grid(False)
ax7.scatter3D(self.x,self.y,self.z,c=self.f,cmap='hsv',zdir='z',marker='.',alpha=0.5,vmin=-12,vmax=10)
ax7.view_init(elev=-60., azim=-90)
ax8= fig.add_subplot(258, projection='3d')
ax8.grid(False)
ax8.scatter3D(self.x,self.y,self.z,c=self.fnoise,cmap='hsv',zdir='z',marker='.',alpha=0.5,vmin=-12,vmax=10)
ax8.view_init(elev=-60., azim=-90)
ax9= fig.add_subplot(259, projection='3d')
ax9.grid(False)
ax9.scatter3D(self.x,self.y,self.z,c=self.fnoise,cmap='hsv',zdir='z',marker='.',vmin=-12,vmax=10,s=1)
ax9.scatter3D(self.x[index_of_trueoutlier_bool_bunny],self.y[index_of_trueoutlier_bool_bunny],self.z[index_of_trueoutlier_bool_bunny],c=self.fnoise[index_of_trueoutlier_bool_bunny],cmap='hsv',zdir='z',marker='.',s=50)
ax9.view_init(elev=-60., azim=-90)
ax10= fig.add_subplot(2,5,10, projection='3d')
ax10.grid(False)
ax10.scatter3D(self.x,self.y,self.z,c=self.fnoise,cmap='hsv',zdir='z',marker='.',vmin=-12,vmax=10,s=1)
ax10.scatter3D(self.x[index_of_trueoutlier_bool_bunny],self.y[index_of_trueoutlier_bool_bunny],self.z[index_of_trueoutlier_bool_bunny],c=self.fnoise[index_of_trueoutlier_bool_bunny],cmap='hsv',zdir='z',marker='.',s=50)
ax10.scatter3D(self.df.x[outlier_GODE_one_index],self.df.y[outlier_GODE_one_index],self.df.z[outlier_GODE_one_index],zdir='z',s=550,marker='.',edgecolors='red',facecolors='none')
ax10.view_init(elev=-60., azim=-90)
# fig.savefig('fig_bunny.eps',format='eps')_BUNNY = BUNNY(_df)
_BUNNY.fit(sd=20)_BUNNY.fig()
import plotly.express as px
import pandas as pdLinear appendix
data = pd.read_csv('../3_table/Example_1_Dataset.csv')
data.rename(columns={'Unnamed: 0': 'Method', 'Comtamination':'Contamination'}, inplace=True)
# data['Contamination'] = data['Contamination'].astype(str)N = 1000, Sparsity = 1%
Number = 1000
Sparsity = 0.01fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
width=800, height=600)
fig.show()N = 1000, Sparsity = 5%
Number = 1000
Sparsity = 0.05fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
width=800, height=600)
fig.show()N = 1000, Sparsity = 10%
Number = 1000
Sparsity = 0.1fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
width=800, height=600)
fig.show()N = 5000, Sparsity = 1%
Number = 5000
Sparsity = 0.01fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
width=800, height=600)
fig.show()N = 5000, Sparsity = 5%
Number = 5000
Sparsity = 0.05fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
width=800, height=600)
fig.show()N = 5000, Sparsity = 10%
Number = 5000
Sparsity = 0.1fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
width=800, height=600)
fig.show()N = 10000, Sparsity = 1%
Number = 10000
Sparsity = 0.01fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
width=800, height=600)
fig.show()N = 10000, Sparsity = 5%
Number = 10000
Sparsity = 0.05fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
width=800, height=600)
fig.show()N = 10000, Sparsity = 10%
Number = 10000
Sparsity = 0.1fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
width=800, height=600)
fig.show()Orbit appendix
data = pd.read_csv('../3_table/Example_2_Dataset.csv')
data.rename(columns={'Unnamed: 0': 'Method'}, inplace=True)
# data['Contamination'] = data['Contamination'].astype(str)data| Method | Accuracy | Precision | Recall | F1 | AUC | N | Contamination | kappa | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | GODE | 0.9910 | 0.400 | 0.571429 | 0.470588 | 0.953532 | 1000.0 | 0.01 | NaN |
| 1 | GODE | 0.9862 | 0.460 | 0.353846 | 0.400000 | 0.875514 | 5000.0 | 0.01 | NaN |
| 2 | GODE | 0.9873 | 0.380 | 0.368932 | 0.374384 | 0.887724 | 10000.0 | 0.01 | NaN |
| 3 | GODE | 0.9570 | 0.560 | 0.571429 | 0.565657 | 0.893088 | 1000.0 | 0.05 | NaN |
| 4 | GODE | 0.9566 | 0.640 | 0.557491 | 0.595903 | 0.884623 | 5000.0 | 0.05 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 103 | LSCP | 0.9502 | 0.576 | 0.501742 | 0.536313 | 0.869403 | 5000.0 | 0.05 | NaN |
| 104 | LSCP | 0.9078 | 0.570 | 0.536723 | 0.552861 | 0.840433 | 5000.0 | 0.10 | NaN |
| 105 | LSCP | 0.9861 | 0.320 | 0.310680 | 0.315271 | 0.858496 | 10000.0 | 0.01 | NaN |
| 106 | LSCP | 0.9542 | 0.548 | 0.541502 | 0.544732 | 0.878705 | 10000.0 | 0.05 | NaN |
| 107 | LSCP | 0.9200 | 0.610 | 0.598039 | 0.603960 | 0.862852 | 10000.0 | 0.10 | NaN |
108 rows × 9 columns
N = 1000, Sparsity = 1%
Number = 1000
Sparsity = 0.01fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
width=800, height=600)
fig.show()N = 1000, Sparsity = 5%
Number = 1000
Sparsity = 0.05fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
width=800, height=600)
fig.show()N = 1000, Sparsity = 10%
Number = 1000
Sparsity = 0.1fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
width=800, height=600)
fig.show()N = 5000, Sparsity = 1%
Number = 5000
Sparsity = 0.01fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
width=800, height=600)
fig.show()N = 5000, Sparsity = 5%
Number = 5000
Sparsity = 0.05fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
width=800, height=600)
fig.show()N = 5000, Sparsity = 10%
Number = 5000
Sparsity = 0.1fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
width=800, height=600)
fig.show()N = 10000, Sparsity = 1%
Number = 10000
Sparsity = 0.01fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
width=800, height=600)
fig.show()N = 10000, Sparsity = 5%
Number = 10000
Sparsity = 0.05fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
width=800, height=600)
fig.show()N = 10000, Sparsity = 10%
Number = 10000
Sparsity = 0.1fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
width=800, height=600)
fig.show()Bunny
data = pd.read_csv('../3_table/Example_3_Dataset.csv')
data.rename(columns={'Unnamed: 0': 'Method'}, inplace=True)
# data['Contamination'] = data['Contamination'].astype(str)Sparsity = 1%
Sparsity = 0.01fig = px.bar(data.query(f"Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
width=800, height=600)
fig.show()Sparsity = 5%
Sparsity = 0.05fig = px.bar(data.query(f"Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
width=800, height=600)
fig.show()Sparsity = 10%
Sparsity = 0.1fig = px.bar(data.query(f"Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
width=800, height=600)
fig.show()Earthquake
class Earthquake:
def __init__(self,df):
self.df = df
self.f = df.Magnitude.to_numpy()
self.year = df.Year.to_numpy()
self.lat = df.Latitude.to_numpy()
self.long = df.Longitude.to_numpy()
self.n = len(self.f)
self.theta= None
def get_distance(self):
self.D = np.zeros([self.n,self.n])
locations = np.stack([self.lat, self.long],axis=1)
for i in tqdm.tqdm(range(self.n)):
for j in range(i,self.n):
self.D[i,j]=haversine(locations[i],locations[j])
self.D1 = self.D+self.D.T
def get_weightmatrix(self,theta=1,beta=0.5,kappa=4000):
self.theta = theta
dist = np.where(self.D<kappa,self.D,0)
self.W = np.exp(-(dist/self.theta)**2)
def _eigen(self):
d= self.W.sum(axis=1)
D= np.diag(d)
self.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
self.lamb, self.Psi = np.linalg.eigh(self.L)
self.Lamb = np.diag(self.lamb)
def fit(self,m):
self._eigen()
self.fhat = self.Psi[:,0:m]@self.Psi[:,0:m].T@self.f
self.df = self.df.assign(MagnitudeHat = self.fhat)
self.df = self.df.assign(Residual = self.df.Magnitude- self.df.MagnitudeHat)
plt.plot(self.f,'.')
plt.plot(self.fhat,'x')class Earthquake2(Earthquake): # ebayesthresh 기능추가
def fit2(self): # fit with ebayesthresh
self._eigen()
self.fbar = self.Psi.T @ self.f # fbar := graph fourier transform of f
self.power = self.fbar**2
ebayesthresh = importr('EbayesThresh').ebayesthresh
self.power_threshed=np.array(ebayesthresh(FloatVector(self.fbar**2)))
self.fbar_threshed = np.where(self.power_threshed>0,self.fbar,0)
self.fhat = self.Psi@self.fbar_threshed
self.df = self.df.assign(MagnitudeHat = self.fhat)
self.df = self.df.assign(Residual = self.df.Magnitude- self.df.MagnitudeHat)
self.con = np.where(self.df.Residual>0.7,1,0)df= pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/earthquakes-23k.csv')df_global= pd.concat([pd.read_csv('00_05.csv'),pd.read_csv('05_10.csv'),pd.read_csv('10_15.csv'),pd.read_csv('15_20.csv')]).iloc[:,[0,1,2,4]].rename(columns={'latitude':'Latitude','longitude':'Longitude','mag':'Magnitude'}).reset_index().iloc[:,1:]df_global = df_global.assign(Year=list(map(lambda x: x.split('-')[0], df_global.time))).iloc[:,1:]df_global.Year = df_global.Year.astype(np.float64)each_location=Earthquake2(df_global.query("2010 <= Year < 2015"))each_location.get_distance()100%|██████████| 12498/12498 [03:01<00:00, 68.84it/s]
Distance 분포
- 너무 데이터가 많아 그려지는데 한참 걸리거나 시각적으로 보기 좋지 않음..
plt.plot(np.array(each_location.D).reshape(-1)[np.array(each_location.D).reshape(-1) != 0])
np.array(each_location.D).reshape(-1)[np.array(each_location.D).reshape(-1) != 0].max()20013.30596923459
np.array(each_location.D).reshape(-1)[np.array(each_location.D).reshape(-1) != 0].min()0.08979301632451746
np.save('earth_distance.npy', each_location.D1)map 에 distance가 2500이하인 구간 표시
from geopy.distance import distance
import numpy as np
import matplotlib.pyplot as plt
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
import geopandas as gpd
from matplotlib.patches import Polygon as mpl_polygon
# 서울의 위도와 경도
seoul_lat, seoul_lon = 37.5665, 126.978
# 원형 경계 구간 생성 함수
def create_circle(lon, lat, radius_km, num_points=100):
"""Create a circle in lat/lon coordinates"""
circle = []
for i in range(num_points):
angle = 2 * np.pi * i / num_points
destination = distance(kilometers=radius_km).destination((lat, lon), angle)
circle.append((destination.longitude, destination.latitude))
return circle
# 서울을 중심으로 2,500 km 거리의 원형 경계 구간 생성
radius_km = 2500
circle_coords = create_circle(seoul_lon, seoul_lat, radius_km)
# 원형 경계 구간을 GeoDataFrame으로 변환
polygon = Polygon(circle_coords)
gdf = gpd.GeoDataFrame(index=[0], geometry=[polygon], crs='EPSG:4326')import folium
from folium import features
# 서울의 위치
seoul_location = [seoul_lat, seoul_lon]
# 기본 지도 생성
m = folium.Map(location=seoul_location, zoom_start=6)
# 원형 마커 추가
folium.Circle(
location=seoul_location,
radius=2500 * 1000, # 2,500 km를 미터로 변환
color='blue',
fill=True,
fill_opacity=0.5
).add_to(m)
# 서울 위치 표시
folium.Marker(
location=seoul_location,
popup='Seoul',
icon=folium.Icon(color='red', icon='info-sign')
).add_to(m)<folium.map.Marker at 0x7f98de7f8bb0>
mMake this Notebook Trusted to load map: File -> Trust Notebook
- 위 그래프 안 보일까봐 캡쳐본 추가
from IPython.display import Image
Image('poly.png')