k fold 교차검증 orbit

Import

import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import pandas as pd
import random
import pickle

import warnings
warnings.simplefilter("ignore", np.ComplexWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
from haversine import haversine
from IPython.display import HTML
import plotly.graph_objects as go
import copy 

import tqdm
from rpy2.robjects.packages import importr
from rpy2.robjects.vectors import FloatVector 

from pygsp import graphs, filters, plotting, utils

from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score, roc_curve, auc

from pyod.models.lof import LOF
from pyod.models.knn import KNN
from pyod.models.cblof import CBLOF
from pyod.models.ocsvm import OCSVM
from pyod.models.mcd import MCD
from pyod.models.feature_bagging import FeatureBagging
from pyod.models.abod import ABOD
from pyod.models.iforest import IForest
from pyod.models.hbos import HBOS
from pyod.models.sos import SOS
from pyod.models.so_gaal import SO_GAAL
from pyod.models.mo_gaal import MO_GAAL
from pyod.models.lscp import LSCP

Class

class Conf_matrx:
    def __init__(self,original,compare):
        self.original = original
        self.compare = compare
    def conf(self,name):
        self.name = name
        self.conf_matrix = confusion_matrix(self.original, self.compare)
        
        # fig, ax = plt.subplots(figsize=(5, 5))
        # ax.matshow(self.conf_matrix, cmap=plt.cm.Oranges, alpha=0.3)
        # for i in range(self.conf_matrix.shape[0]):
        #     for j in range(self.conf_matrix.shape[1]):
        #         ax.text(x=j, y=i,s=self.conf_matrix[i, j], va='center', ha='center', size='xx-large')
        # plt.xlabel('Predictions', fontsize=18)
        # plt.ylabel('Actuals', fontsize=18)
        # plt.title('Confusion Matrix of ' + str(name), fontsize=18)
        # plt.show()
        
        self.acc = accuracy_score(self.original, self.compare)
        self.pre = precision_score(self.original, self.compare)
        self.rec = recall_score(self.original, self.compare)
        self.f1 = f1_score(self.original, self.compare)
        
        # print('Accuracy: %.3f' % self.acc)
        # print('Precision: %.3f' % self.pre)
        # print('Recall: %.3f' % self.rec)
        # print('F1 Score: %.3f' % self.f1)

class Linear:
    def __init__(self,df):
        self.df = df
        self.y = df.y.to_numpy()
        self.x = df.x.to_numpy()
        self.n = len(self.y)
        self.W = w
    def _eigen(self):
        d= self.W.sum(axis=1)
        D= np.diag(d)
        self.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
        self.lamb, self.Psi = np.linalg.eigh(self.L)
        self.Lamb = np.diag(self.lamb)      
    def fit(self,sd=20): # fit with ebayesthresh
        self._eigen()
        self.ybar = self.Psi.T @ self.y # fbar := graph fourier transform of f
        self.power = self.ybar**2 
        ebayesthresh = importr('EbayesThresh').ebayesthresh
        self.power_threshed=np.array(ebayesthresh(FloatVector(self.power),sd=sd))
        self.ybar_threshed = np.where(self.power_threshed>0,self.ybar,0)
        self.yhat = self.Psi@self.ybar_threshed
        self.df = self.df.assign(yHat = self.yhat)
        self.df = self.df.assign(Residual = self.df.y- self.df.yHat)

class Orbit:
    def __init__(self,df):
        self.df = df 
        self.f = df.f.to_numpy()
        self.x = df.x.to_numpy()
        self.y = df.y.to_numpy()
        self.n = len(self.f)
        self.theta= None
    def get_distance(self):
        self.D = np.zeros([self.n,self.n])
        locations = np.stack([self.x, self.y],axis=1)
        for i in tqdm.tqdm(range(self.n)):
            for j in range(i,self.n):
                self.D[i,j]=np.linalg.norm(locations[i]-locations[j])
        self.D = self.D + self.D.T
    def get_weightmatrix(self,theta=1,beta=0.5,kappa=4000):
        self.theta = theta
        dist = np.where(self.D < kappa,self.D,0)
        self.W = np.exp(-(dist/self.theta)**2)
    def _eigen(self):
        d= self.W.sum(axis=1)
        D= np.diag(d)
        self.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
        self.lamb, self.Psi = np.linalg.eigh(self.L)
        self.Lamb = np.diag(self.lamb)       
    def fit(self,sd=5): # fit with ebayesthresh
        self._eigen()
        self.fbar = self.Psi.T @ self.f # fbar := graph fourier transform of f
        self.power = self.fbar**2 
        ebayesthresh = importr('EbayesThresh').ebayesthresh
        self.power_threshed=np.array(ebayesthresh(FloatVector(self.power),sd=sd))
        self.fbar_threshed = np.where(self.power_threshed>0,self.fbar,0)
        self.fhat = self.Psi@self.fbar_threshed
        self.df = self.df.assign(fHat = self.fhat)
        self.df = self.df.assign(Residual = self.df.f- self.df.fHat)

class BUNNY:
    def __init__(self,df):
        self.df = df 
        self.f = df.f.to_numpy()
        self.z = df.z.to_numpy()
        self.x = df.x.to_numpy()
        self.y = df.y.to_numpy()
        self.noise = df.noise.to_numpy()
        self.fnoise = self.f + self.noise
        self.W = _W
        self.n = len(self.f)
        self.theta= None
    def _eigen(self):
        d= self.W.sum(axis=1)
        D= np.diag(d)
        self.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
        self.lamb, self.Psi = np.linalg.eigh(self.L)
        self.Lamb = np.diag(self.lamb)       
    def fit(self,sd=5): # fit with ebayesthresh
        self._eigen()
        self.fbar = self.Psi.T @ self.fnoise # fbar := graph fourier transform of f
        self.power = self.fbar**2 
        ebayesthresh = importr('EbayesThresh').ebayesthresh
        self.power_threshed=np.array(ebayesthresh(FloatVector(self.power),sd=sd))
        self.fbar_threshed = np.where(self.power_threshed>0,self.fbar,0)
        self.fhat = self.Psi@self.fbar_threshed
        self.df = self.df.assign(fnoise = self.fnoise)
        self.df = self.df.assign(fHat = self.fhat)
        self.df = self.df.assign(Residual = self.df.f + self.df.noise - self.df.fHat)

class fortable:
    def __init__(self, df, clf, tab, outlier_true, conf_name = "Method"):
        self.df = df
        self.clf = clf
        self.conf_name = conf_name
        self.tabb = tab
        self.outlier_true = outlier_true
        
    def _forfit(self):
        if 'fnoise' in self.df.columns:
            self.clf.fit(self.df[['x', 'y','fnoise']])
        elif 'f' in self.df.columns:
            self.clf.fit(self.df[['x', 'y', 'f']])
        if 'f' not in self.df.columns:
            self.clf.fit(self.df[['x', 'y']])

    def _forlabels(self):
        self.labels = list(self.clf.labels_)

    def _forpredict(self):
        self.predict = self.clf.fit_predict(self.df)
        
    def comparison(self, compare_outlier = None, conf_outlier = None, gode = False):
        if gode == False:
            self._forfit()
            self._forlabels()
            if 'fnoise' in self.df.columns:
                compare_outlier = self.clf.decision_function(np.array(self.df[['x', 'y','fnoise']]))
            else:
                compare_outlier = self.clf.decision_function(self.df)
            if self.conf_name == "LOF":
                self._forpredict()
                conf_outlier = self.predict
            elif self.conf_name != "LOF":
                conf_outlier = self.labels
        elif gode == True:
            compare_outlier = compare_outlier
            conf_outlier = conf_outlier

        fpr, tpr, thresh = roc_curve(self.outlier_true, compare_outlier)       
        
        _conf = Conf_matrx(self.outlier_true, conf_outlier)
        _conf.conf(self.conf_name)
        
        _table = pd.concat([self.tabb,
                   pd.DataFrame({"Accuracy":[_conf.acc],"Precision":[_conf.pre],"Recall":[_conf.rec],"F1":[_conf.f1],"AUC":[auc(fpr, tpr)],"N":n, "Contamination": eta_sparsity},index = [_conf.name])])
        
        return _table

class fortable:
    def __init__(self, df, clf, tab, outlier_true, conf_name = "Method"):
        self.df = df
        self.clf = clf
        self.conf_name = conf_name
        self.tabb = tab
        self.outlier_true = outlier_true
        
    def _forfit(self):
        if 'fnoise' in self.df.columns:
            self.clf.fit(self.df[['x', 'y','fnoise']])
        elif 'f' in self.df.columns:
            self.clf.fit(self.df[['x', 'y', 'f']])
        if 'f' not in self.df.columns:
            self.clf.fit(self.df[['x', 'y']])

    def _forlabels(self):
        self.labels = list(self.clf.labels_)

    def _forpredict(self):
        self.predict = self.clf.fit_predict(self.df)
        
    def comparison(self, compare_outlier = None, conf_outlier = None, gode = False):
        if gode == False:
            self._forfit()
            self._forlabels()
            if 'fnoise' in self.df.columns:
                compare_outlier = self.clf.decision_function(np.array(self.df[['x', 'y','fnoise']]))
            else:
                compare_outlier = self.clf.decision_function(self.df)
            if self.conf_name == "LOF":
                self._forpredict()
                conf_outlier = self.predict
            elif self.conf_name != "LOF":
                conf_outlier = self.labels
        elif gode == True:
            compare_outlier = compare_outlier
            conf_outlier = conf_outlier

        fpr, tpr, thresh = roc_curve(self.outlier_true, compare_outlier)       
        
        _conf = Conf_matrx(self.outlier_true, conf_outlier)
        _conf.conf(self.conf_name)
        
        _table = pd.concat([self.tabb,
                   pd.DataFrame({"Accuracy":[_conf.acc],"Precision":[_conf.pre],"Recall":[_conf.rec],"F1":[_conf.f1],"AUC":[auc(fpr, tpr)],"N":n, "Contamination": eta_sparsity,"kappa":kappa},index = [_conf.name])])
        
        return _table

Linear

# tab_linear = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])

n = 10000
eta_sparsity = 0.2
random_seed = 77

np.random.seed(6)

epsilon = np.around(np.random.normal(size=n),15)
signal = np.random.choice(np.concatenate((np.random.uniform(-7, -5, round(n*eta_sparsity/2)).round(15), np.random.uniform(5, 7, round(n*eta_sparsity/2)).round(15), np.repeat(0, n - round(n*eta_sparsity)))), n)
eta = signal + epsilon

outlier_true_linear= signal.copy()
outlier_true_linear = list(map(lambda x: 1 if x!=0 else 0,outlier_true_linear))

x_1 = np.linspace(0,2,n)
y1_1 = 5 * x_1
y_1 = y1_1 + eta # eta = signal + epsilon

_df=pd.DataFrame({'x':x_1, 'y':y_1})

w=np.zeros((n,n))

for i in range(n):
    for j in range(n):
        if i==j :
            w[i,j] = 0
        elif np.abs(i-j) <= 1 : 
            w[i,j] = 1

index_of_trueoutlier_bool = signal!=0

GODE_Linear

_Linear = Linear(_df)
_Linear.fit(sd=20)

outlier_GODE_linear_old = (_Linear.df['Residual']**2).tolist()
sorted_data = sorted(outlier_GODE_linear_old,reverse=True)
index = int(len(sorted_data) * eta_sparsity)
five_percent = sorted_data[index]
outlier_GODE_linear = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_linear_old))

tab_lin = fortable(_df, clf = None, tab =tab_linear, outlier_true=outlier_true_linear, conf_name ="GODE")

tab_linear = tab_lin.comparison(compare_outlier = outlier_GODE_linear_old, conf_outlier = outlier_GODE_linear, gode = True)

LOF_Linear

np.random.seed(random_seed)
clf = LOF(contamination=eta_sparsity)

tab_lin = fortable(_df, clf, tab_linear, outlier_true_linear, "LOF")

tab_linear = tab_lin.comparison()

KNN_Linear

np.random.seed(random_seed)
clf = KNN(contamination=eta_sparsity)

tab_lin = fortable(_df, clf, tab_linear, outlier_true_linear, "KNN")

tab_linear = tab_lin.comparison()

CBLOF_Linear

clf = CBLOF(contamination=eta_sparsity,random_state=random_seed)
tab_lin = fortable(_df, clf, tab_linear, outlier_true_linear, "CBLOF")

tab_linear = tab_lin.comparison()

OCSVM_Linear

np.random.seed(random_seed)
clf = OCSVM(nu=eta_sparsity)

tab_lin = fortable(_df, clf, tab_linear, outlier_true_linear, "OCSVM")

tab_linear = tab_lin.comparison()

MCD_Linear

clf = MCD(contamination=eta_sparsity, random_state = random_seed)
tab_lin = fortable(_df, clf, tab_linear, outlier_true_linear, "MCD")

tab_linear = tab_lin.comparison()

Feature Bagging_Linear

clf = FeatureBagging(contamination=eta_sparsity, random_state=random_seed)

tab_lin = fortable(_df, clf, tab_linear, outlier_true_linear, "Feature Bagging")

tab_linear = tab_lin.comparison()

ABOD_Linear

np.random.seed(random_seed)
clf = ABOD(contamination=eta_sparsity)

tab_lin = fortable(_df, clf, tab_linear, outlier_true_linear, "ABOD")

tab_linear = tab_lin.comparison()

IForest_Linear

clf = IForest(contamination=eta_sparsity, random_state=random_seed)

tab_lin = fortable(_df, clf, tab_linear, outlier_true_linear, "Isolation Forest")

tab_linear = tab_lin.comparison()

HBOS_Linear

np.random.seed(random_seed)
clf = HBOS(contamination=eta_sparsity)

tab_lin = fortable(_df, clf, tab_linear, outlier_true_linear, "HBOS")

tab_linear = tab_lin.comparison()

SOS_Linear

np.random.seed(random_seed)
clf = SOS(contamination=eta_sparsity)

tab_lin = fortable(_df, clf, tab_linear, outlier_true_linear, "SOS")

tab_linear = tab_lin.comparison()

LSCP_Linear

detectors = [KNN(), LOF(), OCSVM()]
clf = LSCP(detectors,contamination=eta_sparsity, random_state=random_seed)

tab_lin = fortable(_df, clf, tab_linear, outlier_true_linear, "LSCP")

tab_linear = tab_lin.comparison()

tab_linear

round(tab_linear,3)#.to_csv('./Example_1_2.csv')

# tab_linear.to_csv('./Example_1_Dataset.csv')

Orbit

# tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])

n = 1000
eta_sparsity = 0.05
random_seed=77

n_values = list([1000,5000,10000])  
eta_sparsity_list = list([0.01,0.05,0.1])
random_seed=77
tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC","N","Contamination","kapapa"])

np.random.seed(777)
epsilon = np.around(np.random.normal(size=n),15)
signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
eta = signal + epsilon
pi=np.pi
ang=np.linspace(-pi,pi-2*pi/n,n)
r=5+np.cos(np.linspace(0,12*pi,n))
vx=r*np.cos(ang)
vy=r*np.sin(ang)
f1=10*np.sin(np.linspace(0,6*pi,n))
f = f1 + eta
_df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f})
outlier_true_orbit = signal.copy()
outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))

GODE_Orbit

n_values = list([1000,5000,10000])  
eta_sparsity_list = list([0.01,0.05,0.1])
random_seed=77
kappa=1.21

tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC","N","Contamination","kappa"])

tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC","N","Contamination","kappa"])

_Orbit = Orbit(_df)
_Orbit.get_distance()
kappa=1.21

_Orbit.get_weightmatrix(theta=(_Orbit.D[_Orbit.D>0].mean()),kappa=kappa) 
_Orbit.fit(sd=15)

outlier_GODE_orbit_old = (_Orbit.df['Residual']**2).tolist()
sorted_data = sorted(outlier_GODE_orbit_old,reverse=True)
index = int(len(sorted_data) * eta_sparsity)
five_percent = sorted_data[index]
outlier_GODE_orbit = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_orbit_old))

tab_orb = fortable(_df, clf = None, tab =tab_orbit, outlier_true=outlier_true_orbit, conf_name ="GODE")

tab_orbit = tab_orb.comparison(compare_outlier = outlier_GODE_orbit_old, conf_outlier = outlier_GODE_orbit, gode = True)

100%|██████████| 1000/1000 [00:01<00:00, 796.60it/s]
/tmp/ipykernel_255578/3780776973.py:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
  _table = pd.concat([self.tabb,

tab_orbit

	Accuracy	Precision	Recall	F1	AUC	N	Contamination	kappa
GODE	0.957	0.56	0.571429	0.565657	0.893088	1000	0.05	NaN

tab_orbit

	Accuracy	Precision	Recall	F1	AUC	N	Contamination	kappa
GODE	0.961	0.6	0.612245	0.606061	0.893023	1000	0.05	NaN

tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC","N","Contamination","kappa"])
kappa=1.21
for eta_sparsity in eta_sparsity_list:
    
    for n in n_values:

        np.random.seed(777)
        epsilon = np.around(np.random.normal(size=n),15)
        signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
        eta = signal + epsilon
        pi=np.pi
        ang=np.linspace(-pi,pi-2*pi/n,n)
        r=5+np.cos(np.linspace(0,12*pi,n))
        vx=r*np.cos(ang)
        vy=r*np.sin(ang)
        f1=10*np.sin(np.linspace(0,6*pi,n))
        f = f1 + eta
        _df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
        outlier_true_orbit = signal.copy()
        outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
        index_of_trueoutlier_bool = signal!=0
    
        _df_orbit = _df[['x','y','f']]

        _Orbit = Orbit(_df_orbit)
        _Orbit.get_distance()
        
        _Orbit.get_weightmatrix(theta=(_Orbit.D[_Orbit.D>0].mean()),kappa=kappa) 
        _Orbit.fit(sd=15)
        
        outlier_GODE_orbit_old = (_Orbit.df['Residual']**2).tolist()
        sorted_data = sorted(outlier_GODE_orbit_old,reverse=True)
        index = int(len(sorted_data) * eta_sparsity)
        percent = sorted_data[index]
        outlier_GODE_orbit = list(map(lambda x: 1 if x > percent else 0,outlier_GODE_orbit_old))
        
        tab_orb = fortable(_df_orbit, clf = None, tab =tab_orbit, outlier_true=outlier_true_orbit, conf_name ="GODE")
        
        tab_orbit = tab_orb.comparison(compare_outlier = outlier_GODE_orbit_old, conf_outlier = outlier_GODE_orbit, gode = True)

100%|██████████| 1000/1000 [00:01<00:00, 762.07it/s]
/tmp/ipykernel_255578/3780776973.py:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
  _table = pd.concat([self.tabb,
100%|██████████| 5000/5000 [00:31<00:00, 159.73it/s] 
100%|██████████| 10000/10000 [02:04<00:00, 80.49it/s] 
100%|██████████| 1000/1000 [00:01<00:00, 774.51it/s]
100%|██████████| 5000/5000 [00:31<00:00, 158.99it/s]
100%|██████████| 10000/10000 [02:06<00:00, 78.91it/s] 
100%|██████████| 1000/1000 [00:01<00:00, 797.18it/s]
100%|██████████| 5000/5000 [00:31<00:00, 159.37it/s]
100%|██████████| 10000/10000 [02:06<00:00, 78.86it/s]

tab_orbit_gode = tab_orbit.copy()

tab_orbit_gode.round(3)

	Accuracy	Precision	Recall	F1	AUC	N	Contamination	kappa
GODE	0.991	0.400	0.571	0.471	0.954	1000	0.01	NaN
GODE	0.986	0.460	0.354	0.400	0.876	5000	0.01	NaN
GODE	0.987	0.380	0.369	0.374	0.888	10000	0.01	NaN
GODE	0.957	0.560	0.571	0.566	0.893	1000	0.05	NaN
GODE	0.957	0.640	0.557	0.596	0.885	5000	0.05	NaN
GODE	0.959	0.592	0.585	0.588	0.891	10000	0.05	NaN
GODE	0.918	0.670	0.578	0.620	0.858	1000	0.10	NaN
GODE	0.917	0.614	0.578	0.596	0.863	5000	0.10	NaN
GODE	0.924	0.628	0.616	0.622	0.880	10000	0.10	NaN

# _Orbit = Orbit(_df)
# _Orbit.get_distance()

# _Orbit.get_weightmatrix(theta=(_Orbit.D[_Orbit.D>0].mean()),kappa=2500) 
# _Orbit.fit(sd=15)

# outlier_GODE_orbit_old = (_Orbit.df['Residual']**2).tolist()
# sorted_data = sorted(outlier_GODE_orbit_old,reverse=True)
# index = int(len(sorted_data) * eta_sparsity)
# five_percent = sorted_data[index]
# outlier_GODE_orbit = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_orbit_old))

# tab_orb = fortable(_df, clf = None, tab =tab_orbit, outlier_true=outlier_true_orbit, conf_name ="GODE")

# tab_orbit = tab_orb.comparison(compare_outlier = outlier_GODE_orbit_old, conf_outlier = outlier_GODE_orbit, gode = True)

LOF_Orbit

np.random.seed(random_seed)
clf = LOF(contamination=eta_sparsity)

tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "LOF")

tab_orbit = tab_orb.comparison()

/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/utils/deprecation.py:86: FutureWarning: Function fit_predict is deprecated
  warnings.warn(msg, category=FutureWarning)

tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC","N","Contamination","kappa"])

for n in n_values:
    for eta_sparsity in eta_sparsity_list:
        np.random.seed(777)
        epsilon = np.around(np.random.normal(size=n),15)
        signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
        eta = signal + epsilon
        pi=np.pi
        ang=np.linspace(-pi,pi-2*pi/n,n)
        r=5+np.cos(np.linspace(0,12*pi,n))
        vx=r*np.cos(ang)
        vy=r*np.sin(ang)
        f1=10*np.sin(np.linspace(0,6*pi,n))
        f = f1 + eta
        _df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
        outlier_true_orbit = signal.copy()
        outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
        index_of_trueoutlier_bool = signal!=0
            
        np.random.seed(random_seed)
        clf = LOF(contamination=eta_sparsity)
        
        tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "LOF")
        
        tab_orbit = tab_orb.comparison()

/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/utils/deprecation.py:86: FutureWarning: Function fit_predict is deprecated
  warnings.warn(msg, category=FutureWarning)
/tmp/ipykernel_255578/3780776973.py:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
  _table = pd.concat([self.tabb,
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/utils/deprecation.py:86: FutureWarning: Function fit_predict is deprecated
  warnings.warn(msg, category=FutureWarning)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/utils/deprecation.py:86: FutureWarning: Function fit_predict is deprecated
  warnings.warn(msg, category=FutureWarning)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/utils/deprecation.py:86: FutureWarning: Function fit_predict is deprecated
  warnings.warn(msg, category=FutureWarning)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/utils/deprecation.py:86: FutureWarning: Function fit_predict is deprecated
  warnings.warn(msg, category=FutureWarning)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/utils/deprecation.py:86: FutureWarning: Function fit_predict is deprecated
  warnings.warn(msg, category=FutureWarning)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/utils/deprecation.py:86: FutureWarning: Function fit_predict is deprecated
  warnings.warn(msg, category=FutureWarning)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/utils/deprecation.py:86: FutureWarning: Function fit_predict is deprecated
  warnings.warn(msg, category=FutureWarning)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/utils/deprecation.py:86: FutureWarning: Function fit_predict is deprecated
  warnings.warn(msg, category=FutureWarning)

tab_orbit_LOF = tab_orbit

KNN_Orbit

np.random.seed(random_seed)
clf = KNN(contamination=eta_sparsity)

tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "kNN")

tab_orbit = tab_orb.comparison()

tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
for n in n_values:
    for eta_sparsity in eta_sparsity_list:
        np.random.seed(777)
        epsilon = np.around(np.random.normal(size=n),15)
        signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
        eta = signal + epsilon
        pi=np.pi
        ang=np.linspace(-pi,pi-2*pi/n,n)
        r=5+np.cos(np.linspace(0,12*pi,n))
        vx=r*np.cos(ang)
        vy=r*np.sin(ang)
        f1=10*np.sin(np.linspace(0,6*pi,n))
        f = f1 + eta
        _df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
        outlier_true_orbit = signal.copy()
        outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
        index_of_trueoutlier_bool = signal!=0
            
        np.random.seed(random_seed)
        clf = KNN(contamination=eta_sparsity)
        
        tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "kNN")
        
        tab_orbit = tab_orb.comparison()
tab_orbit_kNN = tab_orbit

/tmp/ipykernel_255578/3780776973.py:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
  _table = pd.concat([self.tabb,

CBLOF_Orbit

clf = CBLOF(contamination=eta_sparsity,random_state=random_seed)

tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "CBLOF")

tab_orbit = tab_orb.comparison()

/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)

tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
for n in n_values:
    for eta_sparsity in eta_sparsity_list:
        np.random.seed(777)
        epsilon = np.around(np.random.normal(size=n),15)
        signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
        eta = signal + epsilon
        pi=np.pi
        ang=np.linspace(-pi,pi-2*pi/n,n)
        r=5+np.cos(np.linspace(0,12*pi,n))
        vx=r*np.cos(ang)
        vy=r*np.sin(ang)
        f1=10*np.sin(np.linspace(0,6*pi,n))
        f = f1 + eta
        _df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
        outlier_true_orbit = signal.copy()
        outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
        index_of_trueoutlier_bool = signal!=0
            
        clf = CBLOF(contamination=eta_sparsity,random_state=random_seed)

        tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "CBLOF")
        
        tab_orbit = tab_orb.comparison()
tab_orbit_CBLOF = tab_orbit

/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
/tmp/ipykernel_255578/3780776973.py:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
  _table = pd.concat([self.tabb,
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)

OCSVM_Orbit

np.random.seed(random_seed)
clf = OCSVM(nu=eta_sparsity)

tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "OCSVM")

tab_orbit = tab_orb.comparison()

/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but OneClassSVM was fitted without feature names
  warnings.warn(

tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
for n in n_values:
    for eta_sparsity in eta_sparsity_list:
        np.random.seed(777)
        epsilon = np.around(np.random.normal(size=n),15)
        signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
        eta = signal + epsilon
        pi=np.pi
        ang=np.linspace(-pi,pi-2*pi/n,n)
        r=5+np.cos(np.linspace(0,12*pi,n))
        vx=r*np.cos(ang)
        vy=r*np.sin(ang)
        f1=10*np.sin(np.linspace(0,6*pi,n))
        f = f1 + eta
        _df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
        outlier_true_orbit = signal.copy()
        outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
        index_of_trueoutlier_bool = signal!=0
            
        np.random.seed(random_seed)
        clf = OCSVM(nu=eta_sparsity)
        
        tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "OCSVM")
        
        tab_orbit = tab_orb.comparison()
tab_orbit_OCSVM = tab_orbit

/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but OneClassSVM was fitted without feature names
  warnings.warn(
/tmp/ipykernel_255578/3780776973.py:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
  _table = pd.concat([self.tabb,
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but OneClassSVM was fitted without feature names
  warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but OneClassSVM was fitted without feature names
  warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but OneClassSVM was fitted without feature names
  warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but OneClassSVM was fitted without feature names
  warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but OneClassSVM was fitted without feature names
  warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but OneClassSVM was fitted without feature names
  warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but OneClassSVM was fitted without feature names
  warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but OneClassSVM was fitted without feature names
  warnings.warn(

MCD_Orbit

clf = MCD(contamination=eta_sparsity , random_state = random_seed)

tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "MCD")

tab_orbit = tab_orb.comparison()

tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
for n in n_values:
    for eta_sparsity in eta_sparsity_list:
        np.random.seed(777)
        epsilon = np.around(np.random.normal(size=n),15)
        signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
        eta = signal + epsilon
        pi=np.pi
        ang=np.linspace(-pi,pi-2*pi/n,n)
        r=5+np.cos(np.linspace(0,12*pi,n))
        vx=r*np.cos(ang)
        vy=r*np.sin(ang)
        f1=10*np.sin(np.linspace(0,6*pi,n))
        f = f1 + eta
        _df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
        outlier_true_orbit = signal.copy()
        outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
        index_of_trueoutlier_bool = signal!=0
            
        clf = MCD(contamination=eta_sparsity , random_state = random_seed)

        tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "MCD")
        
        tab_orbit = tab_orb.comparison()
tab_orbit_MCD = tab_orbit

/tmp/ipykernel_255578/3780776973.py:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
  _table = pd.concat([self.tabb,

Feature Bagging_Orbit

clf = FeatureBagging(contamination=eta_sparsity, random_state=random_seed)

tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "Feature Bagging")

tab_orbit = tab_orb.comparison()

tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
for n in n_values:
    for eta_sparsity in eta_sparsity_list:
        np.random.seed(777)
        epsilon = np.around(np.random.normal(size=n),15)
        signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
        eta = signal + epsilon
        pi=np.pi
        ang=np.linspace(-pi,pi-2*pi/n,n)
        r=5+np.cos(np.linspace(0,12*pi,n))
        vx=r*np.cos(ang)
        vy=r*np.sin(ang)
        f1=10*np.sin(np.linspace(0,6*pi,n))
        f = f1 + eta
        _df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
        outlier_true_orbit = signal.copy()
        outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
        index_of_trueoutlier_bool = signal!=0
            
        clf = FeatureBagging(contamination=eta_sparsity, random_state=random_seed)

        tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "Feature Bagging")
        
        tab_orbit = tab_orb.comparison()
tab_orbit_Feature = tab_orbit

/tmp/ipykernel_255578/3780776973.py:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
  _table = pd.concat([self.tabb,

ABOD_Orbit

np.random.seed(random_seed)
clf = ABOD(contamination=eta_sparsity)

tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "ABOD")

tab_orbit = tab_orb.comparison()

tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
for n in n_values:
    for eta_sparsity in eta_sparsity_list:
        np.random.seed(777)
        epsilon = np.around(np.random.normal(size=n),15)
        signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
        eta = signal + epsilon
        pi=np.pi
        ang=np.linspace(-pi,pi-2*pi/n,n)
        r=5+np.cos(np.linspace(0,12*pi,n))
        vx=r*np.cos(ang)
        vy=r*np.sin(ang)
        f1=10*np.sin(np.linspace(0,6*pi,n))
        f = f1 + eta
        _df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
        outlier_true_orbit = signal.copy()
        outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
        index_of_trueoutlier_bool = signal!=0
            
        np.random.seed(random_seed)
        clf = ABOD(contamination=eta_sparsity)
        
        tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "ABOD")
        
        tab_orbit = tab_orb.comparison()
tab_orbit_ABOD = tab_orbit

/tmp/ipykernel_255578/3780776973.py:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
  _table = pd.concat([self.tabb,

IForest_Orbit

clf = IForest(contamination=eta_sparsity,random_state=random_seed)

tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "Isolation Forest")

tab_orbit = tab_orb.comparison()

/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but IsolationForest was fitted without feature names
  warnings.warn(

tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
for n in n_values:
    for eta_sparsity in eta_sparsity_list:
        np.random.seed(777)
        epsilon = np.around(np.random.normal(size=n),15)
        signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
        eta = signal + epsilon
        pi=np.pi
        ang=np.linspace(-pi,pi-2*pi/n,n)
        r=5+np.cos(np.linspace(0,12*pi,n))
        vx=r*np.cos(ang)
        vy=r*np.sin(ang)
        f1=10*np.sin(np.linspace(0,6*pi,n))
        f = f1 + eta
        _df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
        outlier_true_orbit = signal.copy()
        outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
        index_of_trueoutlier_bool = signal!=0
            
        clf = IForest(contamination=eta_sparsity,random_state=random_seed)

        tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "Isolation Forest")
        
        tab_orbit = tab_orb.comparison()
tab_orbit_Isolation = tab_orbit

/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but IsolationForest was fitted without feature names
  warnings.warn(
/tmp/ipykernel_255578/3780776973.py:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
  _table = pd.concat([self.tabb,
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but IsolationForest was fitted without feature names
  warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but IsolationForest was fitted without feature names
  warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but IsolationForest was fitted without feature names
  warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but IsolationForest was fitted without feature names
  warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but IsolationForest was fitted without feature names
  warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but IsolationForest was fitted without feature names
  warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but IsolationForest was fitted without feature names
  warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but IsolationForest was fitted without feature names
  warnings.warn(

HBOS_Orbit

np.random.seed(random_seed)
clf = HBOS(contamination=eta_sparsity)

tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "HBOS")

tab_orbit = tab_orb.comparison()

tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
for n in n_values:
    for eta_sparsity in eta_sparsity_list:
        np.random.seed(777)
        epsilon = np.around(np.random.normal(size=n),15)
        signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
        eta = signal + epsilon
        pi=np.pi
        ang=np.linspace(-pi,pi-2*pi/n,n)
        r=5+np.cos(np.linspace(0,12*pi,n))
        vx=r*np.cos(ang)
        vy=r*np.sin(ang)
        f1=10*np.sin(np.linspace(0,6*pi,n))
        f = f1 + eta
        _df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
        outlier_true_orbit = signal.copy()
        outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
        index_of_trueoutlier_bool = signal!=0
            
        np.random.seed(random_seed)
        clf = HBOS(contamination=eta_sparsity)
        
        tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "HBOS")
        
        tab_orbit = tab_orb.comparison()
tab_orbit_HBOS = tab_orbit

/tmp/ipykernel_255578/3780776973.py:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
  _table = pd.concat([self.tabb,

SOS_Orbit

np.random.seed(random_seed)
clf = SOS(contamination=eta_sparsity)

tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "SOS")

tab_orbit = tab_orb.comparison()

tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
for n in n_values:
    for eta_sparsity in eta_sparsity_list:
        np.random.seed(777)
        epsilon = np.around(np.random.normal(size=n),15)
        signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
        eta = signal + epsilon
        pi=np.pi
        ang=np.linspace(-pi,pi-2*pi/n,n)
        r=5+np.cos(np.linspace(0,12*pi,n))
        vx=r*np.cos(ang)
        vy=r*np.sin(ang)
        f1=10*np.sin(np.linspace(0,6*pi,n))
        f = f1 + eta
        _df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
        outlier_true_orbit = signal.copy()
        outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
        index_of_trueoutlier_bool = signal!=0
            
        np.random.seed(random_seed)
        clf = SOS(contamination=eta_sparsity)
        
        tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "SOS")
        
        tab_orbit = tab_orb.comparison()
tab_orbit_SOS = tab_orbit

/tmp/ipykernel_255578/3780776973.py:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
  _table = pd.concat([self.tabb,

LSCP_Orbit

detectors = [KNN(), LOF(), OCSVM()]
clf = LSCP(detectors,contamination=eta_sparsity, random_state=random_seed)

tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "LSCP")

tab_orbit = tab_orb.comparison()

/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/pyod/models/lscp.py:382: UserWarning: The number of histogram bins is greater than the number of classifiers, reducing n_bins to n_clf.
  warnings.warn(

tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
for n in n_values:
    for eta_sparsity in eta_sparsity_list:
        np.random.seed(777)
        epsilon = np.around(np.random.normal(size=n),15)
        signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
        eta = signal + epsilon
        pi=np.pi
        ang=np.linspace(-pi,pi-2*pi/n,n)
        r=5+np.cos(np.linspace(0,12*pi,n))
        vx=r*np.cos(ang)
        vy=r*np.sin(ang)
        f1=10*np.sin(np.linspace(0,6*pi,n))
        f = f1 + eta
        _df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
        outlier_true_orbit = signal.copy()
        outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
        index_of_trueoutlier_bool = signal!=0
            
        detectors = [KNN(), LOF(), OCSVM()]
        clf = LSCP(detectors,contamination=eta_sparsity, random_state=random_seed)
        
        tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "LSCP")
        
        tab_orbit = tab_orb.comparison()
tab_orbit_LSCP = tab_orbit

/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/pyod/models/lscp.py:382: UserWarning: The number of histogram bins is greater than the number of classifiers, reducing n_bins to n_clf.
  warnings.warn(
/tmp/ipykernel_255578/3780776973.py:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
  _table = pd.concat([self.tabb,
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/pyod/models/lscp.py:382: UserWarning: The number of histogram bins is greater than the number of classifiers, reducing n_bins to n_clf.
  warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/pyod/models/lscp.py:382: UserWarning: The number of histogram bins is greater than the number of classifiers, reducing n_bins to n_clf.
  warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/pyod/models/lscp.py:382: UserWarning: The number of histogram bins is greater than the number of classifiers, reducing n_bins to n_clf.
  warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/pyod/models/lscp.py:382: UserWarning: The number of histogram bins is greater than the number of classifiers, reducing n_bins to n_clf.
  warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/pyod/models/lscp.py:382: UserWarning: The number of histogram bins is greater than the number of classifiers, reducing n_bins to n_clf.
  warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/pyod/models/lscp.py:382: UserWarning: The number of histogram bins is greater than the number of classifiers, reducing n_bins to n_clf.
  warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/pyod/models/lscp.py:382: UserWarning: The number of histogram bins is greater than the number of classifiers, reducing n_bins to n_clf.
  warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/pyod/models/lscp.py:382: UserWarning: The number of histogram bins is greater than the number of classifiers, reducing n_bins to n_clf.
  warnings.warn(

tab_orbit

round(tab_orbit,3)

pd.concat([tab_orbit_gode.iloc[:,:-1],
           tab_orbit_LOF,
           tab_orbit_kNN,
          tab_orbit_CBLOF,
          tab_orbit_OCSVM,
          tab_orbit_MCD,
          tab_orbit_Feature,
          tab_orbit_ABOD,
          tab_orbit_Isolation,
          tab_orbit_HBOS,
          tab_orbit_SOS,
          tab_orbit_LSCP])#.to_csv('./Example_2_Dataset.csv')

	Accuracy	Precision	Recall	F1	AUC	N	Contamination	kappa
GODE	0.9910	0.400	0.571429	0.470588	0.953532	1000	0.01	NaN
GODE	0.9862	0.460	0.353846	0.400000	0.875514	5000	0.01	NaN
GODE	0.9873	0.380	0.368932	0.374384	0.887724	10000	0.01	NaN
GODE	0.9570	0.560	0.571429	0.565657	0.893088	1000	0.05	NaN
GODE	0.9566	0.640	0.557491	0.595903	0.884623	5000	0.05	NaN
...	...	...	...	...	...	...	...	...
LSCP	0.9502	0.576	0.501742	0.536313	0.869403	5000.0	0.05	NaN
LSCP	0.9078	0.570	0.536723	0.552861	0.840433	5000.0	0.10	NaN
LSCP	0.9861	0.320	0.310680	0.315271	0.858496	10000.0	0.01	NaN
LSCP	0.9542	0.548	0.541502	0.544732	0.878705	10000.0	0.05	NaN
LSCP	0.9200	0.610	0.598039	0.603960	0.862852	10000.0	0.10	NaN

108 rows × 8 columns

Bunny

# tab_bunny = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])

eta_sparsity = 0.2
random_seed=77
n = 2503

with open("../../2_research/Bunny.pkl", "rb") as file:
    loaded_obj = pickle.load(file)

_df = pd.DataFrame({'x':loaded_obj['x'],'y':loaded_obj['y'],'z':loaded_obj['z'],'fnoise':loaded_obj['f']+loaded_obj['noise'],'f':loaded_obj['f'],'noise':loaded_obj['noise']})
outlier_true_bunny = loaded_obj['unif'].copy()
outlier_true_bunny = list(map(lambda x: 1 if x !=0  else 0,outlier_true_bunny))
index_of_trueoutlier_bool_bunny = loaded_obj['unif']!=0
_W = loaded_obj['W'].copy()

GODE_Bunny

_BUNNY = BUNNY(_df)
_BUNNY.fit(sd=20)

outlier_GODE_bunny_old = (_BUNNY.df['Residual']**2).tolist()
sorted_data = sorted(outlier_GODE_bunny_old,reverse=True)
index = int(len(sorted_data) * eta_sparsity)
n_percent = sorted_data[index]
outlier_GODE_bunny = list(map(lambda x: 1 if x > n_percent else 0,outlier_GODE_bunny_old))

tab_bun = fortable(_df, clf = None, tab =tab_bunny, outlier_true=outlier_true_bunny, conf_name ="GODE")

tab_bunny = tab_bun.comparison(compare_outlier = outlier_GODE_bunny_old, conf_outlier = outlier_GODE_bunny, gode = True)

LOF_Bunny

np.random.seed(random_seed)
clf = LOF(contamination=eta_sparsity)

tab_bun = fortable(_df, clf, tab_bunny, outlier_true_bunny, "LOF")

tab_bunny = tab_bun.comparison()

KNN_Bunny

np.random.seed(random_seed)
clf = KNN(contamination=eta_sparsity)

tab_bun = fortable(_df, clf, tab_bunny, outlier_true_bunny, "kNN")

tab_bunny = tab_bun.comparison()

CBLOF_Bunny

clf = CBLOF(contamination=eta_sparsity,random_state=random_seed)

tab_bun = fortable(_df, clf, tab_bunny, outlier_true_bunny, "CBLOF")

tab_bunny = tab_bun.comparison()

OCSVM_Bunny

np.random.seed(random_seed)
clf = OCSVM(nu=eta_sparsity)

tab_bun = fortable(_df, clf, tab_bunny, outlier_true_bunny, "OCSVM")

tab_bunny = tab_bun.comparison()

MCD_Bunny

clf = MCD(contamination=eta_sparsity , random_state = random_seed)

tab_bun = fortable(_df, clf, tab_bunny, outlier_true_bunny, "MCD")

tab_bunny = tab_bun.comparison()

Feature Bagging_Bunny

clf = FeatureBagging(contamination=eta_sparsity, random_state=random_seed)

tab_bun = fortable(_df, clf, tab_bunny, outlier_true_bunny, "Feature Bagging")

tab_bunny = tab_bun.comparison()

ABOD_Bunny

np.random.seed(random_seed)
clf = ABOD(contamination=eta_sparsity)

tab_bun = fortable(_df, clf, tab_bunny, outlier_true_bunny, "ABOD")

tab_bunny = tab_bun.comparison()

IForest_Bunny

clf = IForest(contamination=eta_sparsity,random_state=random_seed)

tab_bun = fortable(_df, clf, tab_bunny, outlier_true_bunny, "Isolation Forest")

tab_bunny = tab_bun.comparison()

HBOS_Bunny

np.random.seed(random_seed)
clf = HBOS(contamination=eta_sparsity)

tab_bun = fortable(_df, clf, tab_bunny, outlier_true_bunny, "HBOS")

tab_bunny = tab_bun.comparison()

SOS_Bunny

np.random.seed(random_seed)
clf = SOS(contamination=eta_sparsity)

tab_bun = fortable(_df, clf, tab_bunny, outlier_true_bunny, "SOS")

tab_bunny = tab_bun.comparison()

LSCP_Bunny

detectors = [KNN(), LOF(), OCSVM()]
clf = LSCP(detectors,contamination=eta_sparsity, random_state=random_seed)

tab_bun = fortable(_df, clf, tab_bunny, outlier_true_bunny, "LSCP")

tab_bunny = tab_bun.comparison()

tab_bunny

round(tab_bunny,3)

# tab_bunny.to_csv('./Example_3_Dataset.csv')