import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import warnings
"ignore", np.ComplexWarning)
warnings.simplefilter(from haversine import haversine
from IPython.display import HTML
Import
from matplotlib import cm
from pygsp import graphs, filters, plotting, utils
import plotly.graph_objects as go
import rpy2
import rpy2.robjects as ro
from rpy2.robjects.vectors import FloatVector
from rpy2.robjects.packages import importr
import warnings
"ignore") warnings.filterwarnings(
Linear
6)
np.random.seed(
= 1000
n = 0.05
eta_sparsity
= np.around(np.random.normal(size=n),15)
epsilon = np.random.choice(np.concatenate((np.random.uniform(-7, -5, round(n*eta_sparsity/2)).round(15), np.random.uniform(5, 7, round(n*eta_sparsity/2)).round(15), np.repeat(0, n - round(n*eta_sparsity)))), n)
signal = signal + epsilon
eta
= signal.copy()
outlier_true_linear= list(map(lambda x: 1 if x!=0 else 0,outlier_true_linear))
outlier_true_linear
= np.linspace(0,2,n)
x_1 = 5 * x_1
y1_1 = y1_1 + eta # eta = signal + epsilon
y_1
=pd.DataFrame({'x':x_1, 'y':y_1})
_df
=np.zeros((n,n))
w
for i in range(n):
for j in range(n):
if i==j :
= 0
w[i,j] elif np.abs(i-j) <= 1 :
= 1
w[i,j]
= signal!=0 index_of_trueoutlier_bool
class Linear:
def __init__(self,df):
self.df = df
self.y = df.y.to_numpy()
self.x = df.x.to_numpy()
self.n = len(self.y)
self.W = w
def _eigen(self):
= self.W.sum(axis=1)
d= np.diag(d)
Dself.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
self.lamb, self.Psi = np.linalg.eigh(self.L)
self.Lamb = np.diag(self.lamb)
def fit(self,sd=20): # fit with ebayesthresh
self._eigen()
self.ybar = self.Psi.T @ self.y # fbar := graph fourier transform of f
self.power = self.ybar**2
= importr('EbayesThresh').ebayesthresh
ebayesthresh self.power_threshed=np.array(ebayesthresh(FloatVector(self.power),sd=sd))
self.ybar_threshed = np.where(self.power_threshed>0,self.ybar,0)
self.yhat = self.Psi@self.ybar_threshed
self.df = self.df.assign(yHat = self.yhat)
self.df = self.df.assign(Residual = self.df.y- self.df.yHat)
def fig(self,ymin=-5,ymax=20,cuts=0,cutf=1495):
= (self.df['Residual']**2).tolist()
outlier_GODE_linear_old = sorted(outlier_GODE_linear_old,reverse=True)
sorted_data = int(len(sorted_data) * eta_sparsity)
index = sorted_data[index]
five_percent = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_linear_old))
outlier_GODE_linear = [i for i, value in enumerate(outlier_GODE_linear_old) if value > five_percent]
outlier_GODE_linear_index
= plt.subplots(figsize=(10,10))
fig,ax self.x,self.y,color='gray',s=50)
ax.scatter(# ax.scatter(self.x[index_of_trueoutlier_bool],self.y[index_of_trueoutlier_bool],color='red',s=50)
self.x[index_of_trueoutlier_bool],self.y[index_of_trueoutlier_bool],color='red',s=100)
ax.scatter(self.x[cuts:cutf],self.yhat[cuts:cutf], '--k',lw=3)
ax.plot(self.df.x[outlier_GODE_linear_index],self.df.y[outlier_GODE_linear_index],color='red',s=550,facecolors='none', edgecolors='r')
ax.scatter(
fig.tight_layout()# fig.savefig('fig1_231103.eps',format='eps')
# fig.savefig('linear_240623.pdf',format='pdf')
= Linear(_df)
_Linear =20) _Linear.fit(sd
_Linear.fig()
Orbit
= 1000
n = 0.05
eta_sparsity =77 random_seed
777)
np.random.seed(= np.around(np.random.normal(size=n),15)
epsilon = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
signal = signal + epsilon
eta =np.pi
pi=np.linspace(-pi,pi-2*pi/n,n)
ang=5+np.cos(np.linspace(0,12*pi,n))
r=r*np.cos(ang)
vx=r*np.sin(ang)
vy=10*np.sin(np.linspace(0,6*pi,n))
f1= f1 + eta
f = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
_df = signal.copy()
outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
outlier_true_orbit = signal!=0 index_of_trueoutlier_bool
class Orbit:
def __init__(self,df):
self.df = df
self.f = df.f.to_numpy()
self.f1 = df.f1.to_numpy()
self.x = df.x.to_numpy()
self.y = df.y.to_numpy()
self.n = len(self.f)
self.theta= None
def get_distance(self):
self.D = np.zeros([self.n,self.n])
= np.stack([self.x, self.y],axis=1)
locations for i in tqdm.tqdm(range(self.n)):
for j in range(i,self.n):
self.D[i,j]=np.linalg.norm(locations[i]-locations[j])
self.D = self.D + self.D.T
def get_weightmatrix(self,theta=1,beta=0.5,kappa=4000):
self.theta = theta
= np.where(self.D < kappa,self.D,0)
dist self.W = np.exp(-(dist/self.theta)**2)
def _eigen(self):
= self.W.sum(axis=1)
d= np.diag(d)
Dself.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
self.lamb, self.Psi = np.linalg.eigh(self.L)
self.Lamb = np.diag(self.lamb)
def fit(self,sd=5): # fit with ebayesthresh
self._eigen()
self.fbar = self.Psi.T @ self.f # fbar := graph fourier transform of f
self.power = self.fbar**2
= importr('EbayesThresh').ebayesthresh
ebayesthresh self.power_threshed=np.array(ebayesthresh(FloatVector(self.fbar**2),sd=sd))
self.fbar_threshed = np.where(self.power_threshed>0,self.fbar,0)
self.fhat = self.Psi@self.fbar_threshed
self.df = self.df.assign(fHat = self.fhat)
self.df = self.df.assign(Residual = self.df.f- self.df.fHat)
def fig(self):
= (_Orbit.df['Residual']**2).tolist()
outlier_GODE_one_old = sorted(outlier_GODE_one_old,reverse=True)
sorted_data = int(len(sorted_data) * 0.05)
index = sorted_data[index]
five_percent = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_one_old))
outlier_GODE_one = [i for i, value in enumerate(outlier_GODE_one_old) if value > five_percent]
outlier_GODE_one_index
# fig, (ax1,ax2,ax3) = plt.subplots(1,3,figsize=(30,15),subplot_kw={"projection":"3d"})
# ax1.grid(False)
# ax1.scatter3D(self.x[~index_of_trueoutlier_bool],self.y[~index_of_trueoutlier_bool],self.f[~index_of_trueoutlier_bool],zdir='z',color='gray',alpha=0.99,zorder=1)
# ax1.scatter3D(self.x[index_of_trueoutlier_bool],self.y[index_of_trueoutlier_bool],self.f[index_of_trueoutlier_bool],zdir='z',s=75,color='red',alpha=0.99,zorder=2)
# ax1.scatter3D(self.x[outlier_GODE_one_index],self.y[outlier_GODE_one_index],self.f[outlier_GODE_one_index],edgecolors='red',zdir='z',s=300,facecolors='none',alpha=0.99,zorder=3)
# ax1.plot3D(self.x,self.y,self.f1,'--k',lw=3,zorder=10)
# ax1.xaxis.pane.fill = False
# ax1.yaxis.pane.fill = False
# ax1.zaxis.pane.fill = False
# ax1.view_init(elev=30., azim=60)
# ax2.grid(False)
# ax2.scatter3D(self.x[~index_of_trueoutlier_bool],self.y[~index_of_trueoutlier_bool],self.f[~index_of_trueoutlier_bool],zdir='z',color='gray',alpha=0.99,zorder=1)
# ax2.scatter3D(self.x[index_of_trueoutlier_bool],self.y[index_of_trueoutlier_bool],self.f[index_of_trueoutlier_bool],zdir='z',s=75,color='red',alpha=0.99,zorder=2)
# ax2.scatter3D(self.x[outlier_GODE_one_index],self.y[outlier_GODE_one_index],self.f[outlier_GODE_one_index],edgecolors='red',zdir='z',s=300,facecolors='none',alpha=0.99,zorder=3)
# ax2.plot3D(self.x,self.y,self.f1,'--k',lw=3,zorder=10)
# ax2.xaxis.pane.fill = False
# ax2.yaxis.pane.fill = False
# ax2.zaxis.pane.fill = False
# ax2.view_init(elev=30., azim=40)
# ax3.grid(False)
# ax3.scatter3D(self.x[~index_of_trueoutlier_bool],self.y[~index_of_trueoutlier_bool],self.f[~index_of_trueoutlier_bool],zdir='z',color='gray',alpha=0.99,zorder=1)
# ax3.scatter3D(self.x[index_of_trueoutlier_bool],self.y[index_of_trueoutlier_bool],self.f[index_of_trueoutlier_bool],zdir='z',s=75,color='red',alpha=0.99,zorder=2)
# ax3.scatter3D(self.x[outlier_GODE_one_index],self.y[outlier_GODE_one_index],self.f[outlier_GODE_one_index],edgecolors='red',zdir='z',s=300,facecolors='none',alpha=0.99,zorder=3)
# ax3.plot3D(self.x,self.y,self.f1,'--k',lw=3,zorder=10)
# ax3.xaxis.pane.fill = False
# ax3.yaxis.pane.fill = False
# ax3.zaxis.pane.fill = False
# ax3.view_init(elev=30., azim=10)
# fig.savefig('fig2_231129.eps',format='eps')
# fig.savefig('orbit_231129_3.pdf',format='pdf')
# _Orbit = Orbit(_df)
# _Orbit.get_distance()
# _Orbit.get_weightmatrix(theta=(_Orbit.D[_Orbit.D>0].mean()),kappa=2500)
# _Orbit.fit(sd=15)
# %%capture --no-display
# _Orbit.fig()
from sklearn import metrics
= Orbit(_df)
_Orbit
_Orbit.get_distance()=(_Orbit.D[_Orbit.D>0].mean()),kappa=10)
_Orbit.get_weightmatrix(theta=15)
_Orbit.fit(sd
= (_Orbit.df['Residual']**2).tolist()
outlier_GODE_orbit_old = sorted(outlier_GODE_orbit_old,reverse=True)
sorted_data = int(len(sorted_data) * eta_sparsity)
index = sorted_data[index]
five_percent = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_orbit_old))
outlier_GODE_orbit = metrics.roc_curve(outlier_true_orbit, outlier_GODE_orbit)
fpr, tpr, thresh = auc(fpr, tpr)
AUC AUC
100%|██████████| 1000/1000 [00:01<00:00, 740.81it/s]
NameError: name 'auc' is not defined
= 1.21
kappa = list([1000,5000,10000])
n_values = list([0.01,0.05,0.1])
eta_sparsity_list =77
random_seed= pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC","N","Contamination"]) tab_orbit
for n in n_values:
for eta_sparsity in eta_sparsity_list:
777)
np.random.seed(= np.around(np.random.normal(size=n),15)
epsilon = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
signal = signal + epsilon
eta =np.pi
pi=np.linspace(-pi,pi-2*pi/n,n)
ang=5+np.cos(np.linspace(0,12*pi,n))
r=r*np.cos(ang)
vx=r*np.sin(ang)
vy=10*np.sin(np.linspace(0,6*pi,n))
f1= f1 + eta
f = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
_df = signal.copy()
outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
outlier_true_orbit = signal!=0
index_of_trueoutlier_bool
= Orbit(_df)
_Orbit
_Orbit.get_distance()=(_Orbit.D[_Orbit.D>0].mean()),kappa=kappa)
_Orbit.get_weightmatrix(theta=15)
_Orbit.fit(sd
= (_Orbit.df['Residual']**2).tolist()
outlier_GODE_orbit_old = sorted(outlier_GODE_orbit_old,reverse=True)
sorted_data = int(len(sorted_data) * eta_sparsity)
index = sorted_data[index]
five_percent = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_orbit_old))
outlier_GODE_orbit = roc_curve(outlier_true_orbit, outlier_GODE_orbit)
fpr, tpr, thresh = auc(fpr, tpr)
fold_AUC
= pd.concat([tab,
tab "n":[n],"kappa":[kappa],"eta_sparsity":[eta_sparsity],"AUC":[fold_AUC]})]) pd.DataFrame({
100%|██████████| 1000/1000 [00:01<00:00, 694.62it/s]
NameError: name 'roc_curve' is not defined
tab
=(8, 6)) # 그림 크기 설정 (선택사항)
plt.figure(figsize10,10), annot=True, cmap='coolwarm', square=True)
sns.heatmap(np.array(tab.AUC).reshape( plt.show()
Bunny
with open("../../2_research/Bunny.pkl", "rb") as file:
= pickle.load(file) loaded_obj
= pd.DataFrame({'x':loaded_obj['x'],'y':loaded_obj['y'],'z':loaded_obj['z'],'fnoise':loaded_obj['f']+loaded_obj['noise'],'f':loaded_obj['f'],'noise':loaded_obj['noise']})
_df = loaded_obj['unif'].copy()
outlier_true_bunny = list(map(lambda x: 1 if x !=0 else 0,outlier_true_bunny))
outlier_true_bunny = loaded_obj['unif']!=0
index_of_trueoutlier_bool_bunny = loaded_obj['W'].copy() _W
class BUNNY:
def __init__(self,df):
self.df = df
self.f = df.f.to_numpy()
self.z = df.z.to_numpy()
self.x = df.x.to_numpy()
self.y = df.y.to_numpy()
self.noise = df.noise.to_numpy()
self.fnoise = self.f + self.noise
self.W = _W
self.n = len(self.f)
self.theta= None
def _eigen(self):
= self.W.sum(axis=1)
d= np.diag(d)
Dself.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
self.lamb, self.Psi = np.linalg.eigh(self.L)
self.Lamb = np.diag(self.lamb)
def fit(self,sd=5): # fit with ebayesthresh
self._eigen()
self.fbar = self.Psi.T @ self.fnoise # fbar := graph fourier transform of f
self.power = self.fbar**2
= importr('EbayesThresh').ebayesthresh
ebayesthresh self.power_threshed=np.array(ebayesthresh(FloatVector(self.power),sd=sd))
self.fbar_threshed = np.where(self.power_threshed>0,self.fbar,0)
self.fhat = self.Psi@self.fbar_threshed
self.df = self.df.assign(fnoise = self.fnoise)
self.df = self.df.assign(fHat = self.fhat)
self.df = self.df.assign(Residual = self.df.f + self.df.noise - self.df.fHat)
def fig(self):
= (self.df['Residual']**2).tolist()
outlier_GODE_one_old = sorted(outlier_GODE_one_old,reverse=True)
sorted_data = int(len(sorted_data) * 0.05)
index = sorted_data[index]
five_percent = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_one_old))
outlier_GODE_one = [i for i, value in enumerate(outlier_GODE_one_old) if value > five_percent]
outlier_GODE_one_index
= plt.figure(figsize=(30,12),dpi=400)
fig = fig.add_subplot(251, projection='3d')
ax1 False)
ax1.grid(self.x,self.y,self.z,c='gray',zdir='z',alpha=0.5,marker='.')
ax1.scatter3D(=60., azim=-90)
ax1.view_init(elev
= fig.add_subplot(252, projection='3d')
ax2False)
ax2.grid(self.x,self.y,self.z,c=self.f,cmap='hsv',zdir='z',marker='.',alpha=0.5,vmin=-12,vmax=10)
ax2.scatter3D(=60., azim=-90)
ax2.view_init(elev
= fig.add_subplot(253, projection='3d')
ax3False)
ax3.grid(self.x,self.y,self.z,c=self.fnoise,cmap='hsv',zdir='z',marker='.',alpha=0.5,vmin=-12,vmax=10)
ax3.scatter3D(=60., azim=-90)
ax3.view_init(elev
= fig.add_subplot(254, projection='3d')
ax4False)
ax4.grid(self.x,self.y,self.z,c=self.fnoise,cmap='hsv',zdir='z',marker='.',vmin=-12,vmax=10,s=1)
ax4.scatter3D(self.x[index_of_trueoutlier_bool_bunny],self.y[index_of_trueoutlier_bool_bunny],self.z[index_of_trueoutlier_bool_bunny],c=self.fnoise[index_of_trueoutlier_bool_bunny],cmap='hsv',zdir='z',marker='.',s=50)
ax4.scatter3D(=60., azim=-90)
ax4.view_init(elev
= fig.add_subplot(255, projection='3d')
ax5False)
ax5.grid(self.x,self.y,self.z,c=self.fnoise,cmap='hsv',zdir='z',marker='.',vmin=-12,vmax=10,s=1)
ax5.scatter3D(self.x[index_of_trueoutlier_bool_bunny],self.y[index_of_trueoutlier_bool_bunny],self.z[index_of_trueoutlier_bool_bunny],c=self.fnoise[index_of_trueoutlier_bool_bunny],cmap='hsv',zdir='z',marker='.',s=50)
ax5.scatter3D(self.df.x[outlier_GODE_one_index],self.df.y[outlier_GODE_one_index],self.df.z[outlier_GODE_one_index],zdir='z',s=550,marker='.',edgecolors='red',facecolors='none')
ax5.scatter3D(=60., azim=-90)
ax5.view_init(elev
= fig.add_subplot(256, projection='3d')
ax6 False)
ax6.grid(self.x,self.y,self.z,c='gray',zdir='z',alpha=0.5,marker='.')
ax6.scatter3D(=-60., azim=-90)
ax6.view_init(elev
= fig.add_subplot(257, projection='3d')
ax7False)
ax7.grid(self.x,self.y,self.z,c=self.f,cmap='hsv',zdir='z',marker='.',alpha=0.5,vmin=-12,vmax=10)
ax7.scatter3D(=-60., azim=-90)
ax7.view_init(elev
= fig.add_subplot(258, projection='3d')
ax8False)
ax8.grid(self.x,self.y,self.z,c=self.fnoise,cmap='hsv',zdir='z',marker='.',alpha=0.5,vmin=-12,vmax=10)
ax8.scatter3D(=-60., azim=-90)
ax8.view_init(elev
= fig.add_subplot(259, projection='3d')
ax9False)
ax9.grid(self.x,self.y,self.z,c=self.fnoise,cmap='hsv',zdir='z',marker='.',vmin=-12,vmax=10,s=1)
ax9.scatter3D(self.x[index_of_trueoutlier_bool_bunny],self.y[index_of_trueoutlier_bool_bunny],self.z[index_of_trueoutlier_bool_bunny],c=self.fnoise[index_of_trueoutlier_bool_bunny],cmap='hsv',zdir='z',marker='.',s=50)
ax9.scatter3D(=-60., azim=-90)
ax9.view_init(elev
= fig.add_subplot(2,5,10, projection='3d')
ax10False)
ax10.grid(self.x,self.y,self.z,c=self.fnoise,cmap='hsv',zdir='z',marker='.',vmin=-12,vmax=10,s=1)
ax10.scatter3D(self.x[index_of_trueoutlier_bool_bunny],self.y[index_of_trueoutlier_bool_bunny],self.z[index_of_trueoutlier_bool_bunny],c=self.fnoise[index_of_trueoutlier_bool_bunny],cmap='hsv',zdir='z',marker='.',s=50)
ax10.scatter3D(self.df.x[outlier_GODE_one_index],self.df.y[outlier_GODE_one_index],self.df.z[outlier_GODE_one_index],zdir='z',s=550,marker='.',edgecolors='red',facecolors='none')
ax10.scatter3D(=-60., azim=-90)
ax10.view_init(elev# fig.savefig('fig_bunny.eps',format='eps')
= BUNNY(_df)
_BUNNY =20) _BUNNY.fit(sd
_BUNNY.fig()
import plotly.express as px
import pandas as pd
Linear appendix
= pd.read_csv('../3_table/Example_1_Dataset.csv')
data ={'Unnamed: 0': 'Method', 'Comtamination':'Contamination'}, inplace=True)
data.rename(columns# data['Contamination'] = data['Contamination'].astype(str)
N = 1000, Sparsity = 1%
= 1000
Number = 0.01 Sparsity
= px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
fig =800, height=600)
width fig.show()
N = 1000, Sparsity = 5%
= 1000
Number = 0.05 Sparsity
= px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
fig =800, height=600)
width fig.show()
N = 1000, Sparsity = 10%
= 1000
Number = 0.1 Sparsity
= px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
fig =800, height=600)
width fig.show()
N = 5000, Sparsity = 1%
= 5000
Number = 0.01 Sparsity
= px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
fig =800, height=600)
width fig.show()
N = 5000, Sparsity = 5%
= 5000
Number = 0.05 Sparsity
= px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
fig =800, height=600)
width fig.show()
N = 5000, Sparsity = 10%
= 5000
Number = 0.1 Sparsity
= px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
fig =800, height=600)
width fig.show()
N = 10000, Sparsity = 1%
= 10000
Number = 0.01 Sparsity
= px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
fig =800, height=600)
width fig.show()
N = 10000, Sparsity = 5%
= 10000
Number = 0.05 Sparsity
= px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
fig =800, height=600)
width fig.show()
N = 10000, Sparsity = 10%
= 10000
Number = 0.1 Sparsity
= px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
fig =800, height=600)
width fig.show()
Orbit appendix
= pd.read_csv('../3_table/Example_2_Dataset.csv')
data ={'Unnamed: 0': 'Method'}, inplace=True)
data.rename(columns# data['Contamination'] = data['Contamination'].astype(str)
data
Method | Accuracy | Precision | Recall | F1 | AUC | N | Contamination | kappa | |
---|---|---|---|---|---|---|---|---|---|
0 | GODE | 0.9910 | 0.400 | 0.571429 | 0.470588 | 0.953532 | 1000.0 | 0.01 | NaN |
1 | GODE | 0.9862 | 0.460 | 0.353846 | 0.400000 | 0.875514 | 5000.0 | 0.01 | NaN |
2 | GODE | 0.9873 | 0.380 | 0.368932 | 0.374384 | 0.887724 | 10000.0 | 0.01 | NaN |
3 | GODE | 0.9570 | 0.560 | 0.571429 | 0.565657 | 0.893088 | 1000.0 | 0.05 | NaN |
4 | GODE | 0.9566 | 0.640 | 0.557491 | 0.595903 | 0.884623 | 5000.0 | 0.05 | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
103 | LSCP | 0.9502 | 0.576 | 0.501742 | 0.536313 | 0.869403 | 5000.0 | 0.05 | NaN |
104 | LSCP | 0.9078 | 0.570 | 0.536723 | 0.552861 | 0.840433 | 5000.0 | 0.10 | NaN |
105 | LSCP | 0.9861 | 0.320 | 0.310680 | 0.315271 | 0.858496 | 10000.0 | 0.01 | NaN |
106 | LSCP | 0.9542 | 0.548 | 0.541502 | 0.544732 | 0.878705 | 10000.0 | 0.05 | NaN |
107 | LSCP | 0.9200 | 0.610 | 0.598039 | 0.603960 | 0.862852 | 10000.0 | 0.10 | NaN |
108 rows × 9 columns
N = 1000, Sparsity = 1%
= 1000
Number = 0.01 Sparsity
= px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
fig =800, height=600)
width fig.show()
N = 1000, Sparsity = 5%
= 1000
Number = 0.05 Sparsity
= px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
fig =800, height=600)
width fig.show()
N = 1000, Sparsity = 10%
= 1000
Number = 0.1 Sparsity
= px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
fig =800, height=600)
width fig.show()
N = 5000, Sparsity = 1%
= 5000
Number = 0.01 Sparsity
= px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
fig =800, height=600)
width fig.show()
N = 5000, Sparsity = 5%
= 5000
Number = 0.05 Sparsity
= px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
fig =800, height=600)
width fig.show()
N = 5000, Sparsity = 10%
= 5000
Number = 0.1 Sparsity
= px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
fig =800, height=600)
width fig.show()
N = 10000, Sparsity = 1%
= 10000
Number = 0.01 Sparsity
= px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
fig =800, height=600)
width fig.show()
N = 10000, Sparsity = 5%
= 10000
Number = 0.05 Sparsity
= px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
fig =800, height=600)
width fig.show()
N = 10000, Sparsity = 10%
= 10000
Number = 0.1 Sparsity
= px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
fig =800, height=600)
width fig.show()
Bunny
= pd.read_csv('../3_table/Example_3_Dataset.csv')
data ={'Unnamed: 0': 'Method'}, inplace=True)
data.rename(columns# data['Contamination'] = data['Contamination'].astype(str)
Sparsity = 1%
= 0.01 Sparsity
= px.bar(data.query(f"Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
fig =800, height=600)
width fig.show()
Sparsity = 5%
= 0.05 Sparsity
= px.bar(data.query(f"Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
fig =800, height=600)
width fig.show()
Sparsity = 10%
= 0.1 Sparsity
= px.bar(data.query(f"Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
fig =800, height=600)
width fig.show()
Earthquake
class Earthquake:
def __init__(self,df):
self.df = df
self.f = df.Magnitude.to_numpy()
self.year = df.Year.to_numpy()
self.lat = df.Latitude.to_numpy()
self.long = df.Longitude.to_numpy()
self.n = len(self.f)
self.theta= None
def get_distance(self):
self.D = np.zeros([self.n,self.n])
= np.stack([self.lat, self.long],axis=1)
locations for i in tqdm.tqdm(range(self.n)):
for j in range(i,self.n):
self.D[i,j]=haversine(locations[i],locations[j])
self.D1 = self.D+self.D.T
def get_weightmatrix(self,theta=1,beta=0.5,kappa=4000):
self.theta = theta
= np.where(self.D<kappa,self.D,0)
dist self.W = np.exp(-(dist/self.theta)**2)
def _eigen(self):
= self.W.sum(axis=1)
d= np.diag(d)
Dself.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
self.lamb, self.Psi = np.linalg.eigh(self.L)
self.Lamb = np.diag(self.lamb)
def fit(self,m):
self._eigen()
self.fhat = self.Psi[:,0:m]@self.Psi[:,0:m].T@self.f
self.df = self.df.assign(MagnitudeHat = self.fhat)
self.df = self.df.assign(Residual = self.df.Magnitude- self.df.MagnitudeHat)
self.f,'.')
plt.plot(self.fhat,'x') plt.plot(
class Earthquake2(Earthquake): # ebayesthresh 기능추가
def fit2(self): # fit with ebayesthresh
self._eigen()
self.fbar = self.Psi.T @ self.f # fbar := graph fourier transform of f
self.power = self.fbar**2
= importr('EbayesThresh').ebayesthresh
ebayesthresh self.power_threshed=np.array(ebayesthresh(FloatVector(self.fbar**2)))
self.fbar_threshed = np.where(self.power_threshed>0,self.fbar,0)
self.fhat = self.Psi@self.fbar_threshed
self.df = self.df.assign(MagnitudeHat = self.fhat)
self.df = self.df.assign(Residual = self.df.Magnitude- self.df.MagnitudeHat)
self.con = np.where(self.df.Residual>0.7,1,0)
= pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/earthquakes-23k.csv') df
= pd.concat([pd.read_csv('00_05.csv'),pd.read_csv('05_10.csv'),pd.read_csv('10_15.csv'),pd.read_csv('15_20.csv')]).iloc[:,[0,1,2,4]].rename(columns={'latitude':'Latitude','longitude':'Longitude','mag':'Magnitude'}).reset_index().iloc[:,1:] df_global
= df_global.assign(Year=list(map(lambda x: x.split('-')[0], df_global.time))).iloc[:,1:] df_global
= df_global.Year.astype(np.float64) df_global.Year
=Earthquake2(df_global.query("2010 <= Year < 2015")) each_location
each_location.get_distance()
100%|██████████| 12498/12498 [03:01<00:00, 68.84it/s]
Distance 분포
- 너무 데이터가 많아 그려지는데 한참 걸리거나 시각적으로 보기 좋지 않음..
-1)[np.array(each_location.D).reshape(-1) != 0]) plt.plot(np.array(each_location.D).reshape(
-1)[np.array(each_location.D).reshape(-1) != 0].max() np.array(each_location.D).reshape(
20013.30596923459
-1)[np.array(each_location.D).reshape(-1) != 0].min() np.array(each_location.D).reshape(
0.08979301632451746
'earth_distance.npy', each_location.D1) np.save(
map 에 distance가 2500이하인 구간 표시
from geopy.distance import distance
import numpy as np
import matplotlib.pyplot as plt
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
import geopandas as gpd
from matplotlib.patches import Polygon as mpl_polygon
# 서울의 위도와 경도
= 37.5665, 126.978
seoul_lat, seoul_lon
# 원형 경계 구간 생성 함수
def create_circle(lon, lat, radius_km, num_points=100):
"""Create a circle in lat/lon coordinates"""
= []
circle for i in range(num_points):
= 2 * np.pi * i / num_points
angle = distance(kilometers=radius_km).destination((lat, lon), angle)
destination
circle.append((destination.longitude, destination.latitude))return circle
# 서울을 중심으로 2,500 km 거리의 원형 경계 구간 생성
= 2500
radius_km = create_circle(seoul_lon, seoul_lat, radius_km)
circle_coords
# 원형 경계 구간을 GeoDataFrame으로 변환
= Polygon(circle_coords)
polygon = gpd.GeoDataFrame(index=[0], geometry=[polygon], crs='EPSG:4326') gdf
import folium
from folium import features
# 서울의 위치
= [seoul_lat, seoul_lon]
seoul_location
# 기본 지도 생성
= folium.Map(location=seoul_location, zoom_start=6)
m
# 원형 마커 추가
folium.Circle(=seoul_location,
location=2500 * 1000, # 2,500 km를 미터로 변환
radius='blue',
color=True,
fill=0.5
fill_opacity
).add_to(m)
# 서울 위치 표시
folium.Marker(=seoul_location,
location='Seoul',
popup=folium.Icon(color='red', icon='info-sign')
icon ).add_to(m)
<folium.map.Marker at 0x7f98de7f8bb0>
m
Make this Notebook Trusted to load map: File -> Trust Notebook
- 위 그래프 안 보일까봐 캡쳐본 추가
from IPython.display import Image
'poly.png') Image(