[GODE]Final Graph code for GODE Paper

Author

SEOYEON CHOI

Published

June 22, 2024

Import

import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import plotly.express as px
import warnings
warnings.simplefilter("ignore", np.ComplexWarning)
from haversine import haversine
from IPython.display import HTML
from matplotlib import cm
from pygsp import graphs, filters, plotting, utils
import plotly.graph_objects as go

import rpy2
import rpy2.robjects as ro 
from rpy2.robjects.vectors import FloatVector 
from rpy2.robjects.packages import importr
import warnings
warnings.filterwarnings("ignore")

Linear

np.random.seed(6)

n = 1000
eta_sparsity = 0.05

epsilon = np.around(np.random.normal(size=n),15)
signal = np.random.choice(np.concatenate((np.random.uniform(-7, -5, round(n*eta_sparsity/2)).round(15), np.random.uniform(5, 7, round(n*eta_sparsity/2)).round(15), np.repeat(0, n - round(n*eta_sparsity)))), n)
eta = signal + epsilon

outlier_true_linear= signal.copy()
outlier_true_linear = list(map(lambda x: 1 if x!=0 else 0,outlier_true_linear))

x_1 = np.linspace(0,2,n)
y1_1 = 5 * x_1
y_1 = y1_1 + eta # eta = signal + epsilon

_df=pd.DataFrame({'x':x_1, 'y':y_1})

w=np.zeros((n,n))

for i in range(n):
    for j in range(n):
        if i==j :
            w[i,j] = 0
        elif np.abs(i-j) <= 1 : 
            w[i,j] = 1

index_of_trueoutlier_bool = signal!=0
class Linear:
    def __init__(self,df):
        self.df = df
        self.y = df.y.to_numpy()
        self.x = df.x.to_numpy()
        self.n = len(self.y)
        self.W = w
    def _eigen(self):
        d= self.W.sum(axis=1)
        D= np.diag(d)
        self.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
        self.lamb, self.Psi = np.linalg.eigh(self.L)
        self.Lamb = np.diag(self.lamb)      
    def fit(self,sd=20): # fit with ebayesthresh
        self._eigen()
        self.ybar = self.Psi.T @ self.y # fbar := graph fourier transform of f
        self.power = self.ybar**2 
        ebayesthresh = importr('EbayesThresh').ebayesthresh
        self.power_threshed=np.array(ebayesthresh(FloatVector(self.power),sd=sd))
        self.ybar_threshed = np.where(self.power_threshed>0,self.ybar,0)
        self.yhat = self.Psi@self.ybar_threshed
        self.df = self.df.assign(yHat = self.yhat)
        self.df = self.df.assign(Residual = self.df.y- self.df.yHat)
    def fig(self,ymin=-5,ymax=20,cuts=0,cutf=1495):
        outlier_GODE_linear_old = (self.df['Residual']**2).tolist()
        sorted_data = sorted(outlier_GODE_linear_old,reverse=True)
        index = int(len(sorted_data) * eta_sparsity)
        five_percent = sorted_data[index]
        outlier_GODE_linear = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_linear_old))
        outlier_GODE_linear_index = [i for i, value in enumerate(outlier_GODE_linear_old) if value > five_percent]

        fig,ax = plt.subplots(figsize=(10,10))
        ax.scatter(self.x,self.y,color='gray',s=50)
        # ax.scatter(self.x[index_of_trueoutlier_bool],self.y[index_of_trueoutlier_bool],color='red',s=50)        
        ax.scatter(self.x[index_of_trueoutlier_bool],self.y[index_of_trueoutlier_bool],color='red',s=100)
        ax.plot(self.x[cuts:cutf],self.yhat[cuts:cutf], '--k',lw=3)
        ax.scatter(self.df.x[outlier_GODE_linear_index],self.df.y[outlier_GODE_linear_index],color='red',s=550,facecolors='none', edgecolors='r')
        fig.tight_layout()
        # fig.savefig('fig1_231103.eps',format='eps')
        # fig.savefig('linear_240623.pdf',format='pdf')
_Linear = Linear(_df)
_Linear.fit(sd=20)
_Linear.fig()

Orbit

n = 1000
eta_sparsity = 0.05
random_seed=77
np.random.seed(777)
epsilon = np.around(np.random.normal(size=n),15)
signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
eta = signal + epsilon
pi=np.pi
ang=np.linspace(-pi,pi-2*pi/n,n)
r=5+np.cos(np.linspace(0,12*pi,n))
vx=r*np.cos(ang)
vy=r*np.sin(ang)
f1=10*np.sin(np.linspace(0,6*pi,n))
f = f1 + eta
_df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
outlier_true_orbit = signal.copy()
outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
index_of_trueoutlier_bool = signal!=0
class Orbit:
    def __init__(self,df):
        self.df = df 
        self.f = df.f.to_numpy()
        self.f1 = df.f1.to_numpy()
        self.x = df.x.to_numpy()
        self.y = df.y.to_numpy()
        self.n = len(self.f)
        self.theta= None
    def get_distance(self):
        self.D = np.zeros([self.n,self.n])
        locations = np.stack([self.x, self.y],axis=1)
        for i in tqdm.tqdm(range(self.n)):
            for j in range(i,self.n):
                self.D[i,j]=np.linalg.norm(locations[i]-locations[j])
        self.D = self.D + self.D.T
    def get_weightmatrix(self,theta=1,beta=0.5,kappa=4000):
        self.theta = theta
        dist = np.where(self.D < kappa,self.D,0)
        self.W = np.exp(-(dist/self.theta)**2)
    def _eigen(self):
        d= self.W.sum(axis=1)
        D= np.diag(d)
        self.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
        self.lamb, self.Psi = np.linalg.eigh(self.L)
        self.Lamb = np.diag(self.lamb)       
    def fit(self,sd=5): # fit with ebayesthresh
        self._eigen()
        self.fbar = self.Psi.T @ self.f # fbar := graph fourier transform of f
        self.power = self.fbar**2 
        ebayesthresh = importr('EbayesThresh').ebayesthresh
        self.power_threshed=np.array(ebayesthresh(FloatVector(self.fbar**2),sd=sd))
        self.fbar_threshed = np.where(self.power_threshed>0,self.fbar,0)
        self.fhat = self.Psi@self.fbar_threshed
        self.df = self.df.assign(fHat = self.fhat)
        self.df = self.df.assign(Residual = self.df.f- self.df.fHat)
        
    def fig(self):
        outlier_GODE_one_old = (_Orbit.df['Residual']**2).tolist()
        sorted_data = sorted(outlier_GODE_one_old,reverse=True)
        index = int(len(sorted_data) * 0.05)
        five_percent = sorted_data[index]
        outlier_GODE_one = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_one_old))
        outlier_GODE_one_index = [i for i, value in enumerate(outlier_GODE_one_old) if value > five_percent]

        # fig, (ax1,ax2,ax3) = plt.subplots(1,3,figsize=(30,15),subplot_kw={"projection":"3d"})
        # ax1.grid(False)
        # ax1.scatter3D(self.x[~index_of_trueoutlier_bool],self.y[~index_of_trueoutlier_bool],self.f[~index_of_trueoutlier_bool],zdir='z',color='gray',alpha=0.99,zorder=1)
        # ax1.scatter3D(self.x[index_of_trueoutlier_bool],self.y[index_of_trueoutlier_bool],self.f[index_of_trueoutlier_bool],zdir='z',s=75,color='red',alpha=0.99,zorder=2)
        # ax1.scatter3D(self.x[outlier_GODE_one_index],self.y[outlier_GODE_one_index],self.f[outlier_GODE_one_index],edgecolors='red',zdir='z',s=300,facecolors='none',alpha=0.99,zorder=3)
        # ax1.plot3D(self.x,self.y,self.f1,'--k',lw=3,zorder=10)
        # ax1.xaxis.pane.fill = False
        # ax1.yaxis.pane.fill = False
        # ax1.zaxis.pane.fill = False
        # ax1.view_init(elev=30., azim=60)
        
        # ax2.grid(False)
        # ax2.scatter3D(self.x[~index_of_trueoutlier_bool],self.y[~index_of_trueoutlier_bool],self.f[~index_of_trueoutlier_bool],zdir='z',color='gray',alpha=0.99,zorder=1)
        # ax2.scatter3D(self.x[index_of_trueoutlier_bool],self.y[index_of_trueoutlier_bool],self.f[index_of_trueoutlier_bool],zdir='z',s=75,color='red',alpha=0.99,zorder=2)
        # ax2.scatter3D(self.x[outlier_GODE_one_index],self.y[outlier_GODE_one_index],self.f[outlier_GODE_one_index],edgecolors='red',zdir='z',s=300,facecolors='none',alpha=0.99,zorder=3)      
        # ax2.plot3D(self.x,self.y,self.f1,'--k',lw=3,zorder=10)
        # ax2.xaxis.pane.fill = False
        # ax2.yaxis.pane.fill = False
        # ax2.zaxis.pane.fill = False
        # ax2.view_init(elev=30., azim=40)
        
        # ax3.grid(False)
        # ax3.scatter3D(self.x[~index_of_trueoutlier_bool],self.y[~index_of_trueoutlier_bool],self.f[~index_of_trueoutlier_bool],zdir='z',color='gray',alpha=0.99,zorder=1)
        # ax3.scatter3D(self.x[index_of_trueoutlier_bool],self.y[index_of_trueoutlier_bool],self.f[index_of_trueoutlier_bool],zdir='z',s=75,color='red',alpha=0.99,zorder=2)
        # ax3.scatter3D(self.x[outlier_GODE_one_index],self.y[outlier_GODE_one_index],self.f[outlier_GODE_one_index],edgecolors='red',zdir='z',s=300,facecolors='none',alpha=0.99,zorder=3)
        # ax3.plot3D(self.x,self.y,self.f1,'--k',lw=3,zorder=10)
        # ax3.xaxis.pane.fill = False
        # ax3.yaxis.pane.fill = False
        # ax3.zaxis.pane.fill = False
        # ax3.view_init(elev=30., azim=10)
        
        # fig.savefig('fig2_231129.eps',format='eps')
        # fig.savefig('orbit_231129_3.pdf',format='pdf')
# _Orbit = Orbit(_df)
# _Orbit.get_distance()
# _Orbit.get_weightmatrix(theta=(_Orbit.D[_Orbit.D>0].mean()),kappa=2500) 
# _Orbit.fit(sd=15)
# %%capture --no-display
# _Orbit.fig()
from sklearn import metrics
_Orbit = Orbit(_df)
_Orbit.get_distance()
_Orbit.get_weightmatrix(theta=(_Orbit.D[_Orbit.D>0].mean()),kappa=10) 
_Orbit.fit(sd=15)

outlier_GODE_orbit_old = (_Orbit.df['Residual']**2).tolist()
sorted_data = sorted(outlier_GODE_orbit_old,reverse=True)
index = int(len(sorted_data) * eta_sparsity)
five_percent = sorted_data[index]
outlier_GODE_orbit = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_orbit_old))
fpr, tpr, thresh = metrics.roc_curve(outlier_true_orbit, outlier_GODE_orbit)      
AUC = auc(fpr, tpr)
AUC
100%|██████████| 1000/1000 [00:01<00:00, 740.81it/s]
NameError: name 'auc' is not defined
kappa = 1.21
n_values = list([1000,5000,10000])
eta_sparsity_list = list([0.01,0.05,0.1])
random_seed=77
tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC","N","Contamination"])
for n in n_values:
    for eta_sparsity in eta_sparsity_list:
        np.random.seed(777)
        epsilon = np.around(np.random.normal(size=n),15)
        signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
        eta = signal + epsilon
        pi=np.pi
        ang=np.linspace(-pi,pi-2*pi/n,n)
        r=5+np.cos(np.linspace(0,12*pi,n))
        vx=r*np.cos(ang)
        vy=r*np.sin(ang)
        f1=10*np.sin(np.linspace(0,6*pi,n))
        f = f1 + eta
        _df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
        outlier_true_orbit = signal.copy()
        outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
        index_of_trueoutlier_bool = signal!=0

        _Orbit = Orbit(_df)
        _Orbit.get_distance()
        _Orbit.get_weightmatrix(theta=(_Orbit.D[_Orbit.D>0].mean()),kappa=kappa) 
        _Orbit.fit(sd=15)

        outlier_GODE_orbit_old = (_Orbit.df['Residual']**2).tolist()
        sorted_data = sorted(outlier_GODE_orbit_old,reverse=True)
        index = int(len(sorted_data) * eta_sparsity)
        five_percent = sorted_data[index]
        outlier_GODE_orbit = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_orbit_old))
        fpr, tpr, thresh = roc_curve(outlier_true_orbit, outlier_GODE_orbit)      
        fold_AUC = auc(fpr, tpr)

        tab = pd.concat([tab,
                   pd.DataFrame({"n":[n],"kappa":[kappa],"eta_sparsity":[eta_sparsity],"AUC":[fold_AUC]})])
100%|██████████| 1000/1000 [00:01<00:00, 694.62it/s]
NameError: name 'roc_curve' is not defined
tab
plt.figure(figsize=(8, 6))  # 그림 크기 설정 (선택사항)
sns.heatmap(np.array(tab.AUC).reshape(10,10), annot=True, cmap='coolwarm', square=True)
plt.show()

Bunny

with open("../../2_research/Bunny.pkl", "rb") as file:
    loaded_obj = pickle.load(file)
_df = pd.DataFrame({'x':loaded_obj['x'],'y':loaded_obj['y'],'z':loaded_obj['z'],'fnoise':loaded_obj['f']+loaded_obj['noise'],'f':loaded_obj['f'],'noise':loaded_obj['noise']})
outlier_true_bunny = loaded_obj['unif'].copy()
outlier_true_bunny = list(map(lambda x: 1 if x !=0  else 0,outlier_true_bunny))
index_of_trueoutlier_bool_bunny = loaded_obj['unif']!=0
_W = loaded_obj['W'].copy()
class BUNNY:
    def __init__(self,df):
        self.df = df 
        self.f = df.f.to_numpy()
        self.z = df.z.to_numpy()
        self.x = df.x.to_numpy()
        self.y = df.y.to_numpy()
        self.noise = df.noise.to_numpy()
        self.fnoise = self.f + self.noise
        self.W = _W
        self.n = len(self.f)
        self.theta= None
    def _eigen(self):
        d= self.W.sum(axis=1)
        D= np.diag(d)
        self.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
        self.lamb, self.Psi = np.linalg.eigh(self.L)
        self.Lamb = np.diag(self.lamb)       
    def fit(self,sd=5): # fit with ebayesthresh
        self._eigen()
        self.fbar = self.Psi.T @ self.fnoise # fbar := graph fourier transform of f
        self.power = self.fbar**2 
        ebayesthresh = importr('EbayesThresh').ebayesthresh
        self.power_threshed=np.array(ebayesthresh(FloatVector(self.power),sd=sd))
        self.fbar_threshed = np.where(self.power_threshed>0,self.fbar,0)
        self.fhat = self.Psi@self.fbar_threshed
        self.df = self.df.assign(fnoise = self.fnoise)
        self.df = self.df.assign(fHat = self.fhat)
        self.df = self.df.assign(Residual = self.df.f + self.df.noise - self.df.fHat)

    def fig(self):

        outlier_GODE_one_old = (self.df['Residual']**2).tolist()
        sorted_data = sorted(outlier_GODE_one_old,reverse=True)
        index = int(len(sorted_data) * 0.05)
        five_percent = sorted_data[index]
        outlier_GODE_one = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_one_old))
        outlier_GODE_one_index = [i for i, value in enumerate(outlier_GODE_one_old) if value > five_percent]

        fig = plt.figure(figsize=(30,12),dpi=400)
        ax1 = fig.add_subplot(251, projection='3d')
        ax1.grid(False)
        ax1.scatter3D(self.x,self.y,self.z,c='gray',zdir='z',alpha=0.5,marker='.')
        ax1.view_init(elev=60., azim=-90)

        ax2= fig.add_subplot(252, projection='3d')
        ax2.grid(False)
        ax2.scatter3D(self.x,self.y,self.z,c=self.f,cmap='hsv',zdir='z',marker='.',alpha=0.5,vmin=-12,vmax=10)
        ax2.view_init(elev=60., azim=-90)

        ax3= fig.add_subplot(253, projection='3d')
        ax3.grid(False)
        ax3.scatter3D(self.x,self.y,self.z,c=self.fnoise,cmap='hsv',zdir='z',marker='.',alpha=0.5,vmin=-12,vmax=10)
        ax3.view_init(elev=60., azim=-90)
        
        ax4= fig.add_subplot(254, projection='3d')
        ax4.grid(False)
        ax4.scatter3D(self.x,self.y,self.z,c=self.fnoise,cmap='hsv',zdir='z',marker='.',vmin=-12,vmax=10,s=1)
        ax4.scatter3D(self.x[index_of_trueoutlier_bool_bunny],self.y[index_of_trueoutlier_bool_bunny],self.z[index_of_trueoutlier_bool_bunny],c=self.fnoise[index_of_trueoutlier_bool_bunny],cmap='hsv',zdir='z',marker='.',s=50)
        ax4.view_init(elev=60., azim=-90)

        ax5= fig.add_subplot(255, projection='3d')
        ax5.grid(False)
        ax5.scatter3D(self.x,self.y,self.z,c=self.fnoise,cmap='hsv',zdir='z',marker='.',vmin=-12,vmax=10,s=1)
        ax5.scatter3D(self.x[index_of_trueoutlier_bool_bunny],self.y[index_of_trueoutlier_bool_bunny],self.z[index_of_trueoutlier_bool_bunny],c=self.fnoise[index_of_trueoutlier_bool_bunny],cmap='hsv',zdir='z',marker='.',s=50)
        ax5.scatter3D(self.df.x[outlier_GODE_one_index],self.df.y[outlier_GODE_one_index],self.df.z[outlier_GODE_one_index],zdir='z',s=550,marker='.',edgecolors='red',facecolors='none')
        ax5.view_init(elev=60., azim=-90)
        
        ax6 = fig.add_subplot(256, projection='3d')
        ax6.grid(False)
        ax6.scatter3D(self.x,self.y,self.z,c='gray',zdir='z',alpha=0.5,marker='.')
        ax6.view_init(elev=-60., azim=-90)

        ax7= fig.add_subplot(257, projection='3d')
        ax7.grid(False)
        ax7.scatter3D(self.x,self.y,self.z,c=self.f,cmap='hsv',zdir='z',marker='.',alpha=0.5,vmin=-12,vmax=10)
        ax7.view_init(elev=-60., azim=-90)

        ax8= fig.add_subplot(258, projection='3d')
        ax8.grid(False)
        ax8.scatter3D(self.x,self.y,self.z,c=self.fnoise,cmap='hsv',zdir='z',marker='.',alpha=0.5,vmin=-12,vmax=10)
        ax8.view_init(elev=-60., azim=-90)
        
        ax9= fig.add_subplot(259, projection='3d')
        ax9.grid(False)
        ax9.scatter3D(self.x,self.y,self.z,c=self.fnoise,cmap='hsv',zdir='z',marker='.',vmin=-12,vmax=10,s=1)
        ax9.scatter3D(self.x[index_of_trueoutlier_bool_bunny],self.y[index_of_trueoutlier_bool_bunny],self.z[index_of_trueoutlier_bool_bunny],c=self.fnoise[index_of_trueoutlier_bool_bunny],cmap='hsv',zdir='z',marker='.',s=50)
        ax9.view_init(elev=-60., azim=-90)

        ax10= fig.add_subplot(2,5,10, projection='3d')
        ax10.grid(False)
        ax10.scatter3D(self.x,self.y,self.z,c=self.fnoise,cmap='hsv',zdir='z',marker='.',vmin=-12,vmax=10,s=1)
        ax10.scatter3D(self.x[index_of_trueoutlier_bool_bunny],self.y[index_of_trueoutlier_bool_bunny],self.z[index_of_trueoutlier_bool_bunny],c=self.fnoise[index_of_trueoutlier_bool_bunny],cmap='hsv',zdir='z',marker='.',s=50)
        ax10.scatter3D(self.df.x[outlier_GODE_one_index],self.df.y[outlier_GODE_one_index],self.df.z[outlier_GODE_one_index],zdir='z',s=550,marker='.',edgecolors='red',facecolors='none')
        ax10.view_init(elev=-60., azim=-90)        
        # fig.savefig('fig_bunny.eps',format='eps')
_BUNNY = BUNNY(_df)
_BUNNY.fit(sd=20)
_BUNNY.fig()


import plotly.express as px
import pandas as pd

Linear appendix

data = pd.read_csv('../3_table/Example_1_Dataset.csv')
data.rename(columns={'Unnamed: 0': 'Method', 'Comtamination':'Contamination'}, inplace=True)
# data['Contamination'] = data['Contamination'].astype(str)

N = 1000, Sparsity = 1%

Number = 1000
Sparsity = 0.01
fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 1000, Sparsity = 5%

Number = 1000
Sparsity = 0.05
fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 1000, Sparsity = 10%

Number = 1000
Sparsity = 0.1
fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 5000, Sparsity = 1%

Number = 5000
Sparsity = 0.01
fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 5000, Sparsity = 5%

Number = 5000
Sparsity = 0.05
fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 5000, Sparsity = 10%

Number = 5000
Sparsity = 0.1
fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 10000, Sparsity = 1%

Number = 10000
Sparsity = 0.01
fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 10000, Sparsity = 5%

Number = 10000
Sparsity = 0.05
fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 10000, Sparsity = 10%

Number = 10000
Sparsity = 0.1
fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

Orbit appendix

data = pd.read_csv('../3_table/Example_2_Dataset.csv')
data.rename(columns={'Unnamed: 0': 'Method'}, inplace=True)
# data['Contamination'] = data['Contamination'].astype(str)
data
Method Accuracy Precision Recall F1 AUC N Contamination kappa
0 GODE 0.9910 0.400 0.571429 0.470588 0.953532 1000.0 0.01 NaN
1 GODE 0.9862 0.460 0.353846 0.400000 0.875514 5000.0 0.01 NaN
2 GODE 0.9873 0.380 0.368932 0.374384 0.887724 10000.0 0.01 NaN
3 GODE 0.9570 0.560 0.571429 0.565657 0.893088 1000.0 0.05 NaN
4 GODE 0.9566 0.640 0.557491 0.595903 0.884623 5000.0 0.05 NaN
... ... ... ... ... ... ... ... ... ...
103 LSCP 0.9502 0.576 0.501742 0.536313 0.869403 5000.0 0.05 NaN
104 LSCP 0.9078 0.570 0.536723 0.552861 0.840433 5000.0 0.10 NaN
105 LSCP 0.9861 0.320 0.310680 0.315271 0.858496 10000.0 0.01 NaN
106 LSCP 0.9542 0.548 0.541502 0.544732 0.878705 10000.0 0.05 NaN
107 LSCP 0.9200 0.610 0.598039 0.603960 0.862852 10000.0 0.10 NaN

108 rows × 9 columns

N = 1000, Sparsity = 1%

Number = 1000
Sparsity = 0.01
fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 1000, Sparsity = 5%

Number = 1000
Sparsity = 0.05
fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 1000, Sparsity = 10%

Number = 1000
Sparsity = 0.1
fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 5000, Sparsity = 1%

Number = 5000
Sparsity = 0.01
fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 5000, Sparsity = 5%

Number = 5000
Sparsity = 0.05
fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 5000, Sparsity = 10%

Number = 5000
Sparsity = 0.1
fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 10000, Sparsity = 1%

Number = 10000
Sparsity = 0.01
fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 10000, Sparsity = 5%

Number = 10000
Sparsity = 0.05
fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 10000, Sparsity = 10%

Number = 10000
Sparsity = 0.1
fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

Bunny

data = pd.read_csv('../3_table/Example_3_Dataset.csv')
data.rename(columns={'Unnamed: 0': 'Method'}, inplace=True)
# data['Contamination'] = data['Contamination'].astype(str)

Sparsity = 1%

Sparsity = 0.01
fig = px.bar(data.query(f"Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

Sparsity = 5%

Sparsity = 0.05
fig = px.bar(data.query(f"Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

Sparsity = 10%

Sparsity = 0.1
fig = px.bar(data.query(f"Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

Earthquake

class Earthquake:
    def __init__(self,df):
        self.df = df 
        self.f = df.Magnitude.to_numpy()
        self.year = df.Year.to_numpy()
        self.lat = df.Latitude.to_numpy()
        self.long = df.Longitude.to_numpy()
        self.n = len(self.f)
        
        self.theta= None
    def get_distance(self):
        self.D = np.zeros([self.n,self.n])
        locations = np.stack([self.lat, self.long],axis=1)
        for i in tqdm.tqdm(range(self.n)):
            for j in range(i,self.n): 
                self.D[i,j]=haversine(locations[i],locations[j])
        self.D1 = self.D+self.D.T
    def get_weightmatrix(self,theta=1,beta=0.5,kappa=4000):
        self.theta = theta
        dist = np.where(self.D<kappa,self.D,0)
        self.W = np.exp(-(dist/self.theta)**2)

    def _eigen(self):
        d= self.W.sum(axis=1)
        D= np.diag(d)
        self.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
        self.lamb, self.Psi = np.linalg.eigh(self.L)
        self.Lamb = np.diag(self.lamb)        
    def fit(self,m):
        self._eigen()
        self.fhat = self.Psi[:,0:m]@self.Psi[:,0:m].T@self.f
        self.df = self.df.assign(MagnitudeHat = self.fhat)
        self.df = self.df.assign(Residual = self.df.Magnitude- self.df.MagnitudeHat)
        plt.plot(self.f,'.')
        plt.plot(self.fhat,'x')
class Earthquake2(Earthquake): # ebayesthresh 기능추가
    def fit2(self): # fit with ebayesthresh
        self._eigen()
        self.fbar = self.Psi.T @ self.f # fbar := graph fourier transform of f
        self.power = self.fbar**2 
        ebayesthresh = importr('EbayesThresh').ebayesthresh
        self.power_threshed=np.array(ebayesthresh(FloatVector(self.fbar**2)))
        self.fbar_threshed = np.where(self.power_threshed>0,self.fbar,0)
        self.fhat = self.Psi@self.fbar_threshed
        self.df = self.df.assign(MagnitudeHat = self.fhat)
        self.df = self.df.assign(Residual = self.df.Magnitude- self.df.MagnitudeHat)
        self.con = np.where(self.df.Residual>0.7,1,0)
df= pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/earthquakes-23k.csv')
df_global= pd.concat([pd.read_csv('00_05.csv'),pd.read_csv('05_10.csv'),pd.read_csv('10_15.csv'),pd.read_csv('15_20.csv')]).iloc[:,[0,1,2,4]].rename(columns={'latitude':'Latitude','longitude':'Longitude','mag':'Magnitude'}).reset_index().iloc[:,1:]
df_global = df_global.assign(Year=list(map(lambda x: x.split('-')[0], df_global.time))).iloc[:,1:]
df_global.Year = df_global.Year.astype(np.float64)
each_location=Earthquake2(df_global.query("2010 <= Year < 2015"))
each_location.get_distance()
100%|██████████| 12498/12498 [03:01<00:00, 68.84it/s] 

Distance 분포

  • 너무 데이터가 많아 그려지는데 한참 걸리거나 시각적으로 보기 좋지 않음..
plt.plot(np.array(each_location.D).reshape(-1)[np.array(each_location.D).reshape(-1) != 0])

np.array(each_location.D).reshape(-1)[np.array(each_location.D).reshape(-1) != 0].max()
20013.30596923459
np.array(each_location.D).reshape(-1)[np.array(each_location.D).reshape(-1) != 0].min()
0.08979301632451746
np.save('earth_distance.npy', each_location.D1)

map 에 distance가 2500이하인 구간 표시

from geopy.distance import distance
import numpy as np
import matplotlib.pyplot as plt
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
import geopandas as gpd
from matplotlib.patches import Polygon as mpl_polygon

# 서울의 위도와 경도
seoul_lat, seoul_lon = 37.5665, 126.978

# 원형 경계 구간 생성 함수
def create_circle(lon, lat, radius_km, num_points=100):
    """Create a circle in lat/lon coordinates"""
    circle = []
    for i in range(num_points):
        angle = 2 * np.pi * i / num_points
        destination = distance(kilometers=radius_km).destination((lat, lon), angle)
        circle.append((destination.longitude, destination.latitude))
    return circle

# 서울을 중심으로 2,500 km 거리의 원형 경계 구간 생성
radius_km = 2500
circle_coords = create_circle(seoul_lon, seoul_lat, radius_km)

# 원형 경계 구간을 GeoDataFrame으로 변환
polygon = Polygon(circle_coords)
gdf = gpd.GeoDataFrame(index=[0], geometry=[polygon], crs='EPSG:4326')
import folium
from folium import features

# 서울의 위치
seoul_location = [seoul_lat, seoul_lon]

# 기본 지도 생성
m = folium.Map(location=seoul_location, zoom_start=6)

# 원형 마커 추가
folium.Circle(
    location=seoul_location,
    radius=2500 * 1000,  # 2,500 km를 미터로 변환
    color='blue',
    fill=True,
    fill_opacity=0.5
).add_to(m)

# 서울 위치 표시
folium.Marker(
    location=seoul_location,
    popup='Seoul',
    icon=folium.Icon(color='red', icon='info-sign')
).add_to(m)
<folium.map.Marker at 0x7f98de7f8bb0>
m
Make this Notebook Trusted to load map: File -> Trust Notebook
  • 위 그래프 안 보일까봐 캡쳐본 추가
from IPython.display import Image

Image('poly.png')