Import

import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import plotly.express as px
import warnings
warnings.simplefilter("ignore", np.ComplexWarning)
from haversine import haversine
from IPython.display import HTML

from matplotlib import cm
from pygsp import graphs, filters, plotting, utils
import plotly.graph_objects as go

import rpy2
import rpy2.robjects as ro 
from rpy2.robjects.vectors import FloatVector 
from rpy2.robjects.packages import importr

import warnings
warnings.filterwarnings("ignore")

Linear

np.random.seed(6)

n = 1000
eta_sparsity = 0.05

epsilon = np.around(np.random.normal(size=n),15)
signal = np.random.choice(np.concatenate((np.random.uniform(-7, -5, round(n*eta_sparsity/2)).round(15), np.random.uniform(5, 7, round(n*eta_sparsity/2)).round(15), np.repeat(0, n - round(n*eta_sparsity)))), n)
eta = signal + epsilon

outlier_true_linear= signal.copy()
outlier_true_linear = list(map(lambda x: 1 if x!=0 else 0,outlier_true_linear))

x_1 = np.linspace(0,2,n)
y1_1 = 5 * x_1
y_1 = y1_1 + eta # eta = signal + epsilon

_df=pd.DataFrame({'x':x_1, 'y':y_1})

w=np.zeros((n,n))

for i in range(n):
    for j in range(n):
        if i==j :
            w[i,j] = 0
        elif np.abs(i-j) <= 1 : 
            w[i,j] = 1

index_of_trueoutlier_bool = signal!=0

class Linear:
    def __init__(self,df):
        self.df = df
        self.y = df.y.to_numpy()
        self.x = df.x.to_numpy()
        self.n = len(self.y)
        self.W = w
    def _eigen(self):
        d= self.W.sum(axis=1)
        D= np.diag(d)
        self.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
        self.lamb, self.Psi = np.linalg.eigh(self.L)
        self.Lamb = np.diag(self.lamb)      
    def fit(self,sd=20): # fit with ebayesthresh
        self._eigen()
        self.ybar = self.Psi.T @ self.y # fbar := graph fourier transform of f
        self.power = self.ybar**2 
        ebayesthresh = importr('EbayesThresh').ebayesthresh
        self.power_threshed=np.array(ebayesthresh(FloatVector(self.power),sd=sd))
        self.ybar_threshed = np.where(self.power_threshed>0,self.ybar,0)
        self.yhat = self.Psi@self.ybar_threshed
        self.df = self.df.assign(yHat = self.yhat)
        self.df = self.df.assign(Residual = self.df.y- self.df.yHat)
    def fig(self,ymin=-5,ymax=20,cuts=0,cutf=1495):
        outlier_GODE_linear_old = (self.df['Residual']**2).tolist()
        sorted_data = sorted(outlier_GODE_linear_old,reverse=True)
        index = int(len(sorted_data) * eta_sparsity)
        five_percent = sorted_data[index]
        outlier_GODE_linear = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_linear_old))
        outlier_GODE_linear_index = [i for i, value in enumerate(outlier_GODE_linear_old) if value > five_percent]

        fig,ax = plt.subplots(figsize=(10,10))
        ax.scatter(self.x,self.y,color='gray',s=50)
        # ax.scatter(self.x[index_of_trueoutlier_bool],self.y[index_of_trueoutlier_bool],color='red',s=50)        
        ax.scatter(self.x[index_of_trueoutlier_bool],self.y[index_of_trueoutlier_bool],color='red',s=100)
        ax.plot(self.x[cuts:cutf],self.yhat[cuts:cutf], '--k',lw=3)
        ax.scatter(self.df.x[outlier_GODE_linear_index],self.df.y[outlier_GODE_linear_index],color='red',s=550,facecolors='none', edgecolors='r')
        fig.tight_layout()
        # fig.savefig('fig1_231103.eps',format='eps')
        # fig.savefig('linear_240623.pdf',format='pdf')

_Linear = Linear(_df)
_Linear.fit(sd=20)

_Linear.fig()

Orbit

n = 1000
eta_sparsity = 0.05
random_seed=77

np.random.seed(777)
epsilon = np.around(np.random.normal(size=n),15)
signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
eta = signal + epsilon
pi=np.pi
ang=np.linspace(-pi,pi-2*pi/n,n)
r=5+np.cos(np.linspace(0,12*pi,n))
vx=r*np.cos(ang)
vy=r*np.sin(ang)
f1=10*np.sin(np.linspace(0,6*pi,n))
f = f1 + eta
_df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
outlier_true_orbit = signal.copy()
outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
index_of_trueoutlier_bool = signal!=0

class Orbit:
    def __init__(self,df):
        self.df = df 
        self.f = df.f.to_numpy()
        self.f1 = df.f1.to_numpy()
        self.x = df.x.to_numpy()
        self.y = df.y.to_numpy()
        self.n = len(self.f)
        self.theta= None
    def get_distance(self):
        self.D = np.zeros([self.n,self.n])
        locations = np.stack([self.x, self.y],axis=1)
        for i in tqdm.tqdm(range(self.n)):
            for j in range(i,self.n):
                self.D[i,j]=np.linalg.norm(locations[i]-locations[j])
        self.D = self.D + self.D.T
    def get_weightmatrix(self,theta=1,beta=0.5,kappa=4000):
        self.theta = theta
        dist = np.where(self.D < kappa,self.D,0)
        self.W = np.exp(-(dist/self.theta)**2)
    def _eigen(self):
        d= self.W.sum(axis=1)
        D= np.diag(d)
        self.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
        self.lamb, self.Psi = np.linalg.eigh(self.L)
        self.Lamb = np.diag(self.lamb)       
    def fit(self,sd=5): # fit with ebayesthresh
        self._eigen()
        self.fbar = self.Psi.T @ self.f # fbar := graph fourier transform of f
        self.power = self.fbar**2 
        ebayesthresh = importr('EbayesThresh').ebayesthresh
        self.power_threshed=np.array(ebayesthresh(FloatVector(self.fbar**2),sd=sd))
        self.fbar_threshed = np.where(self.power_threshed>0,self.fbar,0)
        self.fhat = self.Psi@self.fbar_threshed
        self.df = self.df.assign(fHat = self.fhat)
        self.df = self.df.assign(Residual = self.df.f- self.df.fHat)
        
    def fig(self):
        outlier_GODE_one_old = (_Orbit.df['Residual']**2).tolist()
        sorted_data = sorted(outlier_GODE_one_old,reverse=True)
        index = int(len(sorted_data) * 0.05)
        five_percent = sorted_data[index]
        outlier_GODE_one = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_one_old))
        outlier_GODE_one_index = [i for i, value in enumerate(outlier_GODE_one_old) if value > five_percent]

        # fig, (ax1,ax2,ax3) = plt.subplots(1,3,figsize=(30,15),subplot_kw={"projection":"3d"})
        # ax1.grid(False)
        # ax1.scatter3D(self.x[~index_of_trueoutlier_bool],self.y[~index_of_trueoutlier_bool],self.f[~index_of_trueoutlier_bool],zdir='z',color='gray',alpha=0.99,zorder=1)
        # ax1.scatter3D(self.x[index_of_trueoutlier_bool],self.y[index_of_trueoutlier_bool],self.f[index_of_trueoutlier_bool],zdir='z',s=75,color='red',alpha=0.99,zorder=2)
        # ax1.scatter3D(self.x[outlier_GODE_one_index],self.y[outlier_GODE_one_index],self.f[outlier_GODE_one_index],edgecolors='red',zdir='z',s=300,facecolors='none',alpha=0.99,zorder=3)
        # ax1.plot3D(self.x,self.y,self.f1,'--k',lw=3,zorder=10)
        # ax1.xaxis.pane.fill = False
        # ax1.yaxis.pane.fill = False
        # ax1.zaxis.pane.fill = False
        # ax1.view_init(elev=30., azim=60)
        
        # ax2.grid(False)
        # ax2.scatter3D(self.x[~index_of_trueoutlier_bool],self.y[~index_of_trueoutlier_bool],self.f[~index_of_trueoutlier_bool],zdir='z',color='gray',alpha=0.99,zorder=1)
        # ax2.scatter3D(self.x[index_of_trueoutlier_bool],self.y[index_of_trueoutlier_bool],self.f[index_of_trueoutlier_bool],zdir='z',s=75,color='red',alpha=0.99,zorder=2)
        # ax2.scatter3D(self.x[outlier_GODE_one_index],self.y[outlier_GODE_one_index],self.f[outlier_GODE_one_index],edgecolors='red',zdir='z',s=300,facecolors='none',alpha=0.99,zorder=3)      
        # ax2.plot3D(self.x,self.y,self.f1,'--k',lw=3,zorder=10)
        # ax2.xaxis.pane.fill = False
        # ax2.yaxis.pane.fill = False
        # ax2.zaxis.pane.fill = False
        # ax2.view_init(elev=30., azim=40)
        
        # ax3.grid(False)
        # ax3.scatter3D(self.x[~index_of_trueoutlier_bool],self.y[~index_of_trueoutlier_bool],self.f[~index_of_trueoutlier_bool],zdir='z',color='gray',alpha=0.99,zorder=1)
        # ax3.scatter3D(self.x[index_of_trueoutlier_bool],self.y[index_of_trueoutlier_bool],self.f[index_of_trueoutlier_bool],zdir='z',s=75,color='red',alpha=0.99,zorder=2)
        # ax3.scatter3D(self.x[outlier_GODE_one_index],self.y[outlier_GODE_one_index],self.f[outlier_GODE_one_index],edgecolors='red',zdir='z',s=300,facecolors='none',alpha=0.99,zorder=3)
        # ax3.plot3D(self.x,self.y,self.f1,'--k',lw=3,zorder=10)
        # ax3.xaxis.pane.fill = False
        # ax3.yaxis.pane.fill = False
        # ax3.zaxis.pane.fill = False
        # ax3.view_init(elev=30., azim=10)
        
        # fig.savefig('fig2_231129.eps',format='eps')
        # fig.savefig('orbit_231129_3.pdf',format='pdf')

# _Orbit = Orbit(_df)

# _Orbit.get_distance()

# _Orbit.get_weightmatrix(theta=(_Orbit.D[_Orbit.D>0].mean()),kappa=2500) 
# _Orbit.fit(sd=15)

# %%capture --no-display
# _Orbit.fig()

from sklearn import metrics

_Orbit = Orbit(_df)
_Orbit.get_distance()
_Orbit.get_weightmatrix(theta=(_Orbit.D[_Orbit.D>0].mean()),kappa=10) 
_Orbit.fit(sd=15)

outlier_GODE_orbit_old = (_Orbit.df['Residual']**2).tolist()
sorted_data = sorted(outlier_GODE_orbit_old,reverse=True)
index = int(len(sorted_data) * eta_sparsity)
five_percent = sorted_data[index]
outlier_GODE_orbit = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_orbit_old))
fpr, tpr, thresh = metrics.roc_curve(outlier_true_orbit, outlier_GODE_orbit)      
AUC = auc(fpr, tpr)
AUC

100%|██████████| 1000/1000 [00:01<00:00, 740.81it/s]

NameError: name 'auc' is not defined

kappa = 1.21
n_values = list([1000,5000,10000])
eta_sparsity_list = list([0.01,0.05,0.1])
random_seed=77
tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC","N","Contamination"])

for n in n_values:
    for eta_sparsity in eta_sparsity_list:
        np.random.seed(777)
        epsilon = np.around(np.random.normal(size=n),15)
        signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
        eta = signal + epsilon
        pi=np.pi
        ang=np.linspace(-pi,pi-2*pi/n,n)
        r=5+np.cos(np.linspace(0,12*pi,n))
        vx=r*np.cos(ang)
        vy=r*np.sin(ang)
        f1=10*np.sin(np.linspace(0,6*pi,n))
        f = f1 + eta
        _df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
        outlier_true_orbit = signal.copy()
        outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
        index_of_trueoutlier_bool = signal!=0

        _Orbit = Orbit(_df)
        _Orbit.get_distance()
        _Orbit.get_weightmatrix(theta=(_Orbit.D[_Orbit.D>0].mean()),kappa=kappa) 
        _Orbit.fit(sd=15)

        outlier_GODE_orbit_old = (_Orbit.df['Residual']**2).tolist()
        sorted_data = sorted(outlier_GODE_orbit_old,reverse=True)
        index = int(len(sorted_data) * eta_sparsity)
        five_percent = sorted_data[index]
        outlier_GODE_orbit = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_orbit_old))
        fpr, tpr, thresh = roc_curve(outlier_true_orbit, outlier_GODE_orbit)      
        fold_AUC = auc(fpr, tpr)

        tab = pd.concat([tab,
                   pd.DataFrame({"n":[n],"kappa":[kappa],"eta_sparsity":[eta_sparsity],"AUC":[fold_AUC]})])

100%|██████████| 1000/1000 [00:01<00:00, 694.62it/s]

NameError: name 'roc_curve' is not defined

tab

plt.figure(figsize=(8, 6))  # 그림 크기 설정 (선택사항)
sns.heatmap(np.array(tab.AUC).reshape(10,10), annot=True, cmap='coolwarm', square=True)
plt.show()

Bunny

with open("../../2_research/Bunny.pkl", "rb") as file:
    loaded_obj = pickle.load(file)

_df = pd.DataFrame({'x':loaded_obj['x'],'y':loaded_obj['y'],'z':loaded_obj['z'],'fnoise':loaded_obj['f']+loaded_obj['noise'],'f':loaded_obj['f'],'noise':loaded_obj['noise']})
outlier_true_bunny = loaded_obj['unif'].copy()
outlier_true_bunny = list(map(lambda x: 1 if x !=0  else 0,outlier_true_bunny))
index_of_trueoutlier_bool_bunny = loaded_obj['unif']!=0
_W = loaded_obj['W'].copy()

class BUNNY:
    def __init__(self,df):
        self.df = df 
        self.f = df.f.to_numpy()
        self.z = df.z.to_numpy()
        self.x = df.x.to_numpy()
        self.y = df.y.to_numpy()
        self.noise = df.noise.to_numpy()
        self.fnoise = self.f + self.noise
        self.W = _W
        self.n = len(self.f)
        self.theta= None
    def _eigen(self):
        d= self.W.sum(axis=1)
        D= np.diag(d)
        self.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
        self.lamb, self.Psi = np.linalg.eigh(self.L)
        self.Lamb = np.diag(self.lamb)       
    def fit(self,sd=5): # fit with ebayesthresh
        self._eigen()
        self.fbar = self.Psi.T @ self.fnoise # fbar := graph fourier transform of f
        self.power = self.fbar**2 
        ebayesthresh = importr('EbayesThresh').ebayesthresh
        self.power_threshed=np.array(ebayesthresh(FloatVector(self.power),sd=sd))
        self.fbar_threshed = np.where(self.power_threshed>0,self.fbar,0)
        self.fhat = self.Psi@self.fbar_threshed
        self.df = self.df.assign(fnoise = self.fnoise)
        self.df = self.df.assign(fHat = self.fhat)
        self.df = self.df.assign(Residual = self.df.f + self.df.noise - self.df.fHat)

    def fig(self):

        outlier_GODE_one_old = (self.df['Residual']**2).tolist()
        sorted_data = sorted(outlier_GODE_one_old,reverse=True)
        index = int(len(sorted_data) * 0.05)
        five_percent = sorted_data[index]
        outlier_GODE_one = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_one_old))
        outlier_GODE_one_index = [i for i, value in enumerate(outlier_GODE_one_old) if value > five_percent]

        fig = plt.figure(figsize=(30,12),dpi=400)
        ax1 = fig.add_subplot(251, projection='3d')
        ax1.grid(False)
        ax1.scatter3D(self.x,self.y,self.z,c='gray',zdir='z',alpha=0.5,marker='.')
        ax1.view_init(elev=60., azim=-90)

        ax2= fig.add_subplot(252, projection='3d')
        ax2.grid(False)
        ax2.scatter3D(self.x,self.y,self.z,c=self.f,cmap='hsv',zdir='z',marker='.',alpha=0.5,vmin=-12,vmax=10)
        ax2.view_init(elev=60., azim=-90)

        ax3= fig.add_subplot(253, projection='3d')
        ax3.grid(False)
        ax3.scatter3D(self.x,self.y,self.z,c=self.fnoise,cmap='hsv',zdir='z',marker='.',alpha=0.5,vmin=-12,vmax=10)
        ax3.view_init(elev=60., azim=-90)
        
        ax4= fig.add_subplot(254, projection='3d')
        ax4.grid(False)
        ax4.scatter3D(self.x,self.y,self.z,c=self.fnoise,cmap='hsv',zdir='z',marker='.',vmin=-12,vmax=10,s=1)
        ax4.scatter3D(self.x[index_of_trueoutlier_bool_bunny],self.y[index_of_trueoutlier_bool_bunny],self.z[index_of_trueoutlier_bool_bunny],c=self.fnoise[index_of_trueoutlier_bool_bunny],cmap='hsv',zdir='z',marker='.',s=50)
        ax4.view_init(elev=60., azim=-90)

        ax5= fig.add_subplot(255, projection='3d')
        ax5.grid(False)
        ax5.scatter3D(self.x,self.y,self.z,c=self.fnoise,cmap='hsv',zdir='z',marker='.',vmin=-12,vmax=10,s=1)
        ax5.scatter3D(self.x[index_of_trueoutlier_bool_bunny],self.y[index_of_trueoutlier_bool_bunny],self.z[index_of_trueoutlier_bool_bunny],c=self.fnoise[index_of_trueoutlier_bool_bunny],cmap='hsv',zdir='z',marker='.',s=50)
        ax5.scatter3D(self.df.x[outlier_GODE_one_index],self.df.y[outlier_GODE_one_index],self.df.z[outlier_GODE_one_index],zdir='z',s=550,marker='.',edgecolors='red',facecolors='none')
        ax5.view_init(elev=60., azim=-90)
        
        ax6 = fig.add_subplot(256, projection='3d')
        ax6.grid(False)
        ax6.scatter3D(self.x,self.y,self.z,c='gray',zdir='z',alpha=0.5,marker='.')
        ax6.view_init(elev=-60., azim=-90)

        ax7= fig.add_subplot(257, projection='3d')
        ax7.grid(False)
        ax7.scatter3D(self.x,self.y,self.z,c=self.f,cmap='hsv',zdir='z',marker='.',alpha=0.5,vmin=-12,vmax=10)
        ax7.view_init(elev=-60., azim=-90)

        ax8= fig.add_subplot(258, projection='3d')
        ax8.grid(False)
        ax8.scatter3D(self.x,self.y,self.z,c=self.fnoise,cmap='hsv',zdir='z',marker='.',alpha=0.5,vmin=-12,vmax=10)
        ax8.view_init(elev=-60., azim=-90)
        
        ax9= fig.add_subplot(259, projection='3d')
        ax9.grid(False)
        ax9.scatter3D(self.x,self.y,self.z,c=self.fnoise,cmap='hsv',zdir='z',marker='.',vmin=-12,vmax=10,s=1)
        ax9.scatter3D(self.x[index_of_trueoutlier_bool_bunny],self.y[index_of_trueoutlier_bool_bunny],self.z[index_of_trueoutlier_bool_bunny],c=self.fnoise[index_of_trueoutlier_bool_bunny],cmap='hsv',zdir='z',marker='.',s=50)
        ax9.view_init(elev=-60., azim=-90)

        ax10= fig.add_subplot(2,5,10, projection='3d')
        ax10.grid(False)
        ax10.scatter3D(self.x,self.y,self.z,c=self.fnoise,cmap='hsv',zdir='z',marker='.',vmin=-12,vmax=10,s=1)
        ax10.scatter3D(self.x[index_of_trueoutlier_bool_bunny],self.y[index_of_trueoutlier_bool_bunny],self.z[index_of_trueoutlier_bool_bunny],c=self.fnoise[index_of_trueoutlier_bool_bunny],cmap='hsv',zdir='z',marker='.',s=50)
        ax10.scatter3D(self.df.x[outlier_GODE_one_index],self.df.y[outlier_GODE_one_index],self.df.z[outlier_GODE_one_index],zdir='z',s=550,marker='.',edgecolors='red',facecolors='none')
        ax10.view_init(elev=-60., azim=-90)        
        # fig.savefig('fig_bunny.eps',format='eps')

_BUNNY = BUNNY(_df)
_BUNNY.fit(sd=20)

_BUNNY.fig()

import plotly.express as px
import pandas as pd

Linear appendix

data = pd.read_csv('../3_table/Example_1_Dataset.csv')
data.rename(columns={'Unnamed: 0': 'Method', 'Comtamination':'Contamination'}, inplace=True)
# data['Contamination'] = data['Contamination'].astype(str)

N = 1000, Sparsity = 1%

Number = 1000
Sparsity = 0.01

fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 1000, Sparsity = 5%

Number = 1000
Sparsity = 0.05

fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 1000, Sparsity = 10%

Number = 1000
Sparsity = 0.1

fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 5000, Sparsity = 1%

Number = 5000
Sparsity = 0.01

fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 5000, Sparsity = 5%

Number = 5000
Sparsity = 0.05

fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 5000, Sparsity = 10%

Number = 5000
Sparsity = 0.1

fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 10000, Sparsity = 1%

Number = 10000
Sparsity = 0.01

fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 10000, Sparsity = 5%

Number = 10000
Sparsity = 0.05

fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 10000, Sparsity = 10%

Number = 10000
Sparsity = 0.1

fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

Orbit appendix

data = pd.read_csv('../3_table/Example_2_Dataset.csv')
data.rename(columns={'Unnamed: 0': 'Method'}, inplace=True)
# data['Contamination'] = data['Contamination'].astype(str)

data

	Method	Accuracy	Precision	Recall	F1	AUC	N	Contamination	kappa
0	GODE	0.9910	0.400	0.571429	0.470588	0.953532	1000.0	0.01	NaN
1	GODE	0.9862	0.460	0.353846	0.400000	0.875514	5000.0	0.01	NaN
2	GODE	0.9873	0.380	0.368932	0.374384	0.887724	10000.0	0.01	NaN
3	GODE	0.9570	0.560	0.571429	0.565657	0.893088	1000.0	0.05	NaN
4	GODE	0.9566	0.640	0.557491	0.595903	0.884623	5000.0	0.05	NaN
...	...	...	...	...	...	...	...	...	...
103	LSCP	0.9502	0.576	0.501742	0.536313	0.869403	5000.0	0.05	NaN
104	LSCP	0.9078	0.570	0.536723	0.552861	0.840433	5000.0	0.10	NaN
105	LSCP	0.9861	0.320	0.310680	0.315271	0.858496	10000.0	0.01	NaN
106	LSCP	0.9542	0.548	0.541502	0.544732	0.878705	10000.0	0.05	NaN
107	LSCP	0.9200	0.610	0.598039	0.603960	0.862852	10000.0	0.10	NaN

108 rows × 9 columns

N = 1000, Sparsity = 1%

Number = 1000
Sparsity = 0.01

fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 1000, Sparsity = 5%

Number = 1000
Sparsity = 0.05

fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 1000, Sparsity = 10%

Number = 1000
Sparsity = 0.1

fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 5000, Sparsity = 1%

Number = 5000
Sparsity = 0.01

fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 5000, Sparsity = 5%

Number = 5000
Sparsity = 0.05

fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 5000, Sparsity = 10%

Number = 5000
Sparsity = 0.1

fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 10000, Sparsity = 1%

Number = 10000
Sparsity = 0.01

fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 10000, Sparsity = 5%

Number = 10000
Sparsity = 0.05

fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

N = 10000, Sparsity = 10%

Number = 10000
Sparsity = 0.1

fig = px.bar(data.query(f"N=={Number} and Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

Bunny

data = pd.read_csv('../3_table/Example_3_Dataset.csv')
data.rename(columns={'Unnamed: 0': 'Method'}, inplace=True)
# data['Contamination'] = data['Contamination'].astype(str)

Sparsity = 1%

Sparsity = 0.01

fig = px.bar(data.query(f"Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

Sparsity = 5%

Sparsity = 0.05

fig = px.bar(data.query(f"Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

Sparsity = 10%

Sparsity = 0.1

fig = px.bar(data.query(f"Contamination=={Sparsity}").sort_values('AUC',ascending=False), x='Method', y="AUC",
            width=800, height=600)
fig.show()

Earthquake

class Earthquake:
    def __init__(self,df):
        self.df = df 
        self.f = df.Magnitude.to_numpy()
        self.year = df.Year.to_numpy()
        self.lat = df.Latitude.to_numpy()
        self.long = df.Longitude.to_numpy()
        self.n = len(self.f)
        
        self.theta= None
    def get_distance(self):
        self.D = np.zeros([self.n,self.n])
        locations = np.stack([self.lat, self.long],axis=1)
        for i in tqdm.tqdm(range(self.n)):
            for j in range(i,self.n): 
                self.D[i,j]=haversine(locations[i],locations[j])
        self.D1 = self.D+self.D.T
    def get_weightmatrix(self,theta=1,beta=0.5,kappa=4000):
        self.theta = theta
        dist = np.where(self.D<kappa,self.D,0)
        self.W = np.exp(-(dist/self.theta)**2)

    def _eigen(self):
        d= self.W.sum(axis=1)
        D= np.diag(d)
        self.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
        self.lamb, self.Psi = np.linalg.eigh(self.L)
        self.Lamb = np.diag(self.lamb)        
    def fit(self,m):
        self._eigen()
        self.fhat = self.Psi[:,0:m]@self.Psi[:,0:m].T@self.f
        self.df = self.df.assign(MagnitudeHat = self.fhat)
        self.df = self.df.assign(Residual = self.df.Magnitude- self.df.MagnitudeHat)
        plt.plot(self.f,'.')
        plt.plot(self.fhat,'x')

class Earthquake2(Earthquake): # ebayesthresh 기능추가
    def fit2(self): # fit with ebayesthresh
        self._eigen()
        self.fbar = self.Psi.T @ self.f # fbar := graph fourier transform of f
        self.power = self.fbar**2 
        ebayesthresh = importr('EbayesThresh').ebayesthresh
        self.power_threshed=np.array(ebayesthresh(FloatVector(self.fbar**2)))
        self.fbar_threshed = np.where(self.power_threshed>0,self.fbar,0)
        self.fhat = self.Psi@self.fbar_threshed
        self.df = self.df.assign(MagnitudeHat = self.fhat)
        self.df = self.df.assign(Residual = self.df.Magnitude- self.df.MagnitudeHat)
        self.con = np.where(self.df.Residual>0.7,1,0)

df= pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/earthquakes-23k.csv')

df_global= pd.concat([pd.read_csv('00_05.csv'),pd.read_csv('05_10.csv'),pd.read_csv('10_15.csv'),pd.read_csv('15_20.csv')]).iloc[:,[0,1,2,4]].rename(columns={'latitude':'Latitude','longitude':'Longitude','mag':'Magnitude'}).reset_index().iloc[:,1:]

df_global = df_global.assign(Year=list(map(lambda x: x.split('-')[0], df_global.time))).iloc[:,1:]

df_global.Year = df_global.Year.astype(np.float64)

each_location=Earthquake2(df_global.query("2010 <= Year < 2015"))

each_location.get_distance()

100%|██████████| 12498/12498 [03:01<00:00, 68.84it/s]

Distance 분포

너무 데이터가 많아 그려지는데 한참 걸리거나 시각적으로 보기 좋지 않음..

plt.plot(np.array(each_location.D).reshape(-1)[np.array(each_location.D).reshape(-1) != 0])

np.array(each_location.D).reshape(-1)[np.array(each_location.D).reshape(-1) != 0].max()

20013.30596923459

np.array(each_location.D).reshape(-1)[np.array(each_location.D).reshape(-1) != 0].min()

0.08979301632451746

np.save('earth_distance.npy', each_location.D1)

map 에 distance가 2500이하인 구간 표시

from geopy.distance import distance
import numpy as np
import matplotlib.pyplot as plt
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
import geopandas as gpd
from matplotlib.patches import Polygon as mpl_polygon

# 서울의 위도와 경도
seoul_lat, seoul_lon = 37.5665, 126.978

# 원형 경계 구간 생성 함수
def create_circle(lon, lat, radius_km, num_points=100):
    """Create a circle in lat/lon coordinates"""
    circle = []
    for i in range(num_points):
        angle = 2 * np.pi * i / num_points
        destination = distance(kilometers=radius_km).destination((lat, lon), angle)
        circle.append((destination.longitude, destination.latitude))
    return circle

# 서울을 중심으로 2,500 km 거리의 원형 경계 구간 생성
radius_km = 2500
circle_coords = create_circle(seoul_lon, seoul_lat, radius_km)

# 원형 경계 구간을 GeoDataFrame으로 변환
polygon = Polygon(circle_coords)
gdf = gpd.GeoDataFrame(index=[0], geometry=[polygon], crs='EPSG:4326')

import folium
from folium import features

# 서울의 위치
seoul_location = [seoul_lat, seoul_lon]

# 기본 지도 생성
m = folium.Map(location=seoul_location, zoom_start=6)

# 원형 마커 추가
folium.Circle(
    location=seoul_location,
    radius=2500 * 1000,  # 2,500 km를 미터로 변환
    color='blue',
    fill=True,
    fill_opacity=0.5
).add_to(m)

# 서울 위치 표시
folium.Marker(
    location=seoul_location,
    popup='Seoul',
    icon=folium.Icon(color='red', icon='info-sign')
).add_to(m)

<folium.map.Marker at 0x7f98de7f8bb0>

Make this Notebook Trusted to load map: File -> Trust Notebook

위 그래프 안 보일까봐 캡쳐본 추가

from IPython.display import Image

Image('poly.png')

Import

Linear

Orbit

Bunny

Linear appendix

N = 1000, Sparsity = 1%

N = 1000, Sparsity = 5%

N = 1000, Sparsity = 10%

N = 5000, Sparsity = 1%

N = 5000, Sparsity = 5%

N = 5000, Sparsity = 10%

N = 10000, Sparsity = 1%

N = 10000, Sparsity = 5%

N = 10000, Sparsity = 10%

Orbit appendix

N = 1000, Sparsity = 1%

N = 1000, Sparsity = 5%

N = 1000, Sparsity = 10%

N = 5000, Sparsity = 1%

N = 5000, Sparsity = 5%

N = 5000, Sparsity = 10%

N = 10000, Sparsity = 1%

N = 10000, Sparsity = 5%

N = 10000, Sparsity = 10%

Bunny

Sparsity = 1%

Sparsity = 5%

Sparsity = 10%

Earthquake

Distance 분포

map 에 distance가 2500이하인 구간 표시

map 에 distance가 2500이하인 구간 표시