import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import pandas as pd
import random
import pickle
import warnings
"ignore", np.ComplexWarning)
warnings.simplefilter("ignore", category=RuntimeWarning)
warnings.filterwarnings(from haversine import haversine
from IPython.display import HTML
import plotly.graph_objects as go
import copy
import tqdm
from rpy2.robjects.packages import importr
from rpy2.robjects.vectors import FloatVector
from pygsp import graphs, filters, plotting, utils
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score, roc_curve, auc
k fold 교차검증 orbit
Import
from pyod.models.lof import LOF
from pyod.models.knn import KNN
from pyod.models.cblof import CBLOF
from pyod.models.ocsvm import OCSVM
from pyod.models.mcd import MCD
from pyod.models.feature_bagging import FeatureBagging
from pyod.models.abod import ABOD
from pyod.models.iforest import IForest
from pyod.models.hbos import HBOS
from pyod.models.sos import SOS
from pyod.models.so_gaal import SO_GAAL
from pyod.models.mo_gaal import MO_GAAL
from pyod.models.lscp import LSCP
Class
class Conf_matrx:
def __init__(self,original,compare):
self.original = original
self.compare = compare
def conf(self,name):
self.name = name
self.conf_matrix = confusion_matrix(self.original, self.compare)
# fig, ax = plt.subplots(figsize=(5, 5))
# ax.matshow(self.conf_matrix, cmap=plt.cm.Oranges, alpha=0.3)
# for i in range(self.conf_matrix.shape[0]):
# for j in range(self.conf_matrix.shape[1]):
# ax.text(x=j, y=i,s=self.conf_matrix[i, j], va='center', ha='center', size='xx-large')
# plt.xlabel('Predictions', fontsize=18)
# plt.ylabel('Actuals', fontsize=18)
# plt.title('Confusion Matrix of ' + str(name), fontsize=18)
# plt.show()
self.acc = accuracy_score(self.original, self.compare)
self.pre = precision_score(self.original, self.compare)
self.rec = recall_score(self.original, self.compare)
self.f1 = f1_score(self.original, self.compare)
# print('Accuracy: %.3f' % self.acc)
# print('Precision: %.3f' % self.pre)
# print('Recall: %.3f' % self.rec)
# print('F1 Score: %.3f' % self.f1)
class Linear:
def __init__(self,df):
self.df = df
self.y = df.y.to_numpy()
self.x = df.x.to_numpy()
self.n = len(self.y)
self.W = w
def _eigen(self):
= self.W.sum(axis=1)
d= np.diag(d)
Dself.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
self.lamb, self.Psi = np.linalg.eigh(self.L)
self.Lamb = np.diag(self.lamb)
def fit(self,sd=20): # fit with ebayesthresh
self._eigen()
self.ybar = self.Psi.T @ self.y # fbar := graph fourier transform of f
self.power = self.ybar**2
= importr('EbayesThresh').ebayesthresh
ebayesthresh self.power_threshed=np.array(ebayesthresh(FloatVector(self.power),sd=sd))
self.ybar_threshed = np.where(self.power_threshed>0,self.ybar,0)
self.yhat = self.Psi@self.ybar_threshed
self.df = self.df.assign(yHat = self.yhat)
self.df = self.df.assign(Residual = self.df.y- self.df.yHat)
class Orbit:
def __init__(self,df):
self.df = df
self.f = df.f.to_numpy()
self.x = df.x.to_numpy()
self.y = df.y.to_numpy()
self.n = len(self.f)
self.theta= None
def get_distance(self):
self.D = np.zeros([self.n,self.n])
= np.stack([self.x, self.y],axis=1)
locations for i in tqdm.tqdm(range(self.n)):
for j in range(i,self.n):
self.D[i,j]=np.linalg.norm(locations[i]-locations[j])
self.D = self.D + self.D.T
def get_weightmatrix(self,theta=1,beta=0.5,kappa=4000):
self.theta = theta
= np.where(self.D < kappa,self.D,0)
dist self.W = np.exp(-(dist/self.theta)**2)
def _eigen(self):
= self.W.sum(axis=1)
d= np.diag(d)
Dself.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
self.lamb, self.Psi = np.linalg.eigh(self.L)
self.Lamb = np.diag(self.lamb)
def fit(self,sd=5): # fit with ebayesthresh
self._eigen()
self.fbar = self.Psi.T @ self.f # fbar := graph fourier transform of f
self.power = self.fbar**2
= importr('EbayesThresh').ebayesthresh
ebayesthresh self.power_threshed=np.array(ebayesthresh(FloatVector(self.power),sd=sd))
self.fbar_threshed = np.where(self.power_threshed>0,self.fbar,0)
self.fhat = self.Psi@self.fbar_threshed
self.df = self.df.assign(fHat = self.fhat)
self.df = self.df.assign(Residual = self.df.f- self.df.fHat)
class BUNNY:
def __init__(self,df):
self.df = df
self.f = df.f.to_numpy()
self.z = df.z.to_numpy()
self.x = df.x.to_numpy()
self.y = df.y.to_numpy()
self.noise = df.noise.to_numpy()
self.fnoise = self.f + self.noise
self.W = _W
self.n = len(self.f)
self.theta= None
def _eigen(self):
= self.W.sum(axis=1)
d= np.diag(d)
Dself.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
self.lamb, self.Psi = np.linalg.eigh(self.L)
self.Lamb = np.diag(self.lamb)
def fit(self,sd=5): # fit with ebayesthresh
self._eigen()
self.fbar = self.Psi.T @ self.fnoise # fbar := graph fourier transform of f
self.power = self.fbar**2
= importr('EbayesThresh').ebayesthresh
ebayesthresh self.power_threshed=np.array(ebayesthresh(FloatVector(self.power),sd=sd))
self.fbar_threshed = np.where(self.power_threshed>0,self.fbar,0)
self.fhat = self.Psi@self.fbar_threshed
self.df = self.df.assign(fnoise = self.fnoise)
self.df = self.df.assign(fHat = self.fhat)
self.df = self.df.assign(Residual = self.df.f + self.df.noise - self.df.fHat)
class fortable:
def __init__(self, df, clf, tab, outlier_true, conf_name = "Method"):
self.df = df
self.clf = clf
self.conf_name = conf_name
self.tabb = tab
self.outlier_true = outlier_true
def _forfit(self):
if 'fnoise' in self.df.columns:
self.clf.fit(self.df[['x', 'y','fnoise']])
elif 'f' in self.df.columns:
self.clf.fit(self.df[['x', 'y', 'f']])
if 'f' not in self.df.columns:
self.clf.fit(self.df[['x', 'y']])
def _forlabels(self):
self.labels = list(self.clf.labels_)
def _forpredict(self):
self.predict = self.clf.fit_predict(self.df)
def comparison(self, compare_outlier = None, conf_outlier = None, gode = False):
if gode == False:
self._forfit()
self._forlabels()
if 'fnoise' in self.df.columns:
= self.clf.decision_function(np.array(self.df[['x', 'y','fnoise']]))
compare_outlier else:
= self.clf.decision_function(self.df)
compare_outlier if self.conf_name == "LOF":
self._forpredict()
= self.predict
conf_outlier elif self.conf_name != "LOF":
= self.labels
conf_outlier elif gode == True:
= compare_outlier
compare_outlier = conf_outlier
conf_outlier
= roc_curve(self.outlier_true, compare_outlier)
fpr, tpr, thresh
= Conf_matrx(self.outlier_true, conf_outlier)
_conf self.conf_name)
_conf.conf(
= pd.concat([self.tabb,
_table "Accuracy":[_conf.acc],"Precision":[_conf.pre],"Recall":[_conf.rec],"F1":[_conf.f1],"AUC":[auc(fpr, tpr)],"N":n, "Contamination": eta_sparsity},index = [_conf.name])])
pd.DataFrame({
return _table
class fortable:
def __init__(self, df, clf, tab, outlier_true, conf_name = "Method"):
self.df = df
self.clf = clf
self.conf_name = conf_name
self.tabb = tab
self.outlier_true = outlier_true
def _forfit(self):
if 'fnoise' in self.df.columns:
self.clf.fit(self.df[['x', 'y','fnoise']])
elif 'f' in self.df.columns:
self.clf.fit(self.df[['x', 'y', 'f']])
if 'f' not in self.df.columns:
self.clf.fit(self.df[['x', 'y']])
def _forlabels(self):
self.labels = list(self.clf.labels_)
def _forpredict(self):
self.predict = self.clf.fit_predict(self.df)
def comparison(self, compare_outlier = None, conf_outlier = None, gode = False):
if gode == False:
self._forfit()
self._forlabels()
if 'fnoise' in self.df.columns:
= self.clf.decision_function(np.array(self.df[['x', 'y','fnoise']]))
compare_outlier else:
= self.clf.decision_function(self.df)
compare_outlier if self.conf_name == "LOF":
self._forpredict()
= self.predict
conf_outlier elif self.conf_name != "LOF":
= self.labels
conf_outlier elif gode == True:
= compare_outlier
compare_outlier = conf_outlier
conf_outlier
= roc_curve(self.outlier_true, compare_outlier)
fpr, tpr, thresh
= Conf_matrx(self.outlier_true, conf_outlier)
_conf self.conf_name)
_conf.conf(
= pd.concat([self.tabb,
_table "Accuracy":[_conf.acc],"Precision":[_conf.pre],"Recall":[_conf.rec],"F1":[_conf.f1],"AUC":[auc(fpr, tpr)],"N":n, "Contamination": eta_sparsity,"kappa":kappa},index = [_conf.name])])
pd.DataFrame({
return _table
Linear
# tab_linear = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
= 10000
n = 0.2
eta_sparsity = 77 random_seed
6)
np.random.seed(
= np.around(np.random.normal(size=n),15)
epsilon = np.random.choice(np.concatenate((np.random.uniform(-7, -5, round(n*eta_sparsity/2)).round(15), np.random.uniform(5, 7, round(n*eta_sparsity/2)).round(15), np.repeat(0, n - round(n*eta_sparsity)))), n)
signal = signal + epsilon
eta
= signal.copy()
outlier_true_linear= list(map(lambda x: 1 if x!=0 else 0,outlier_true_linear))
outlier_true_linear
= np.linspace(0,2,n)
x_1 = 5 * x_1
y1_1 = y1_1 + eta # eta = signal + epsilon
y_1
=pd.DataFrame({'x':x_1, 'y':y_1})
_df
=np.zeros((n,n))
w
for i in range(n):
for j in range(n):
if i==j :
= 0
w[i,j] elif np.abs(i-j) <= 1 :
= 1
w[i,j]
= signal!=0 index_of_trueoutlier_bool
GODE_Linear
= Linear(_df)
_Linear =20)
_Linear.fit(sd
= (_Linear.df['Residual']**2).tolist()
outlier_GODE_linear_old = sorted(outlier_GODE_linear_old,reverse=True)
sorted_data = int(len(sorted_data) * eta_sparsity)
index = sorted_data[index]
five_percent = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_linear_old))
outlier_GODE_linear
= fortable(_df, clf = None, tab =tab_linear, outlier_true=outlier_true_linear, conf_name ="GODE")
tab_lin
= tab_lin.comparison(compare_outlier = outlier_GODE_linear_old, conf_outlier = outlier_GODE_linear, gode = True) tab_linear
LOF_Linear
np.random.seed(random_seed)= LOF(contamination=eta_sparsity)
clf
= fortable(_df, clf, tab_linear, outlier_true_linear, "LOF")
tab_lin
= tab_lin.comparison() tab_linear
KNN_Linear
np.random.seed(random_seed)= KNN(contamination=eta_sparsity)
clf
= fortable(_df, clf, tab_linear, outlier_true_linear, "KNN")
tab_lin
= tab_lin.comparison() tab_linear
CBLOF_Linear
= CBLOF(contamination=eta_sparsity,random_state=random_seed)
clf = fortable(_df, clf, tab_linear, outlier_true_linear, "CBLOF")
tab_lin
= tab_lin.comparison() tab_linear
OCSVM_Linear
np.random.seed(random_seed)= OCSVM(nu=eta_sparsity)
clf
= fortable(_df, clf, tab_linear, outlier_true_linear, "OCSVM")
tab_lin
= tab_lin.comparison() tab_linear
MCD_Linear
= MCD(contamination=eta_sparsity, random_state = random_seed)
clf = fortable(_df, clf, tab_linear, outlier_true_linear, "MCD")
tab_lin
= tab_lin.comparison() tab_linear
Feature Bagging_Linear
= FeatureBagging(contamination=eta_sparsity, random_state=random_seed)
clf
= fortable(_df, clf, tab_linear, outlier_true_linear, "Feature Bagging")
tab_lin
= tab_lin.comparison() tab_linear
ABOD_Linear
np.random.seed(random_seed)= ABOD(contamination=eta_sparsity)
clf
= fortable(_df, clf, tab_linear, outlier_true_linear, "ABOD")
tab_lin
= tab_lin.comparison() tab_linear
IForest_Linear
= IForest(contamination=eta_sparsity, random_state=random_seed)
clf
= fortable(_df, clf, tab_linear, outlier_true_linear, "Isolation Forest")
tab_lin
= tab_lin.comparison() tab_linear
HBOS_Linear
np.random.seed(random_seed)= HBOS(contamination=eta_sparsity)
clf
= fortable(_df, clf, tab_linear, outlier_true_linear, "HBOS")
tab_lin
= tab_lin.comparison() tab_linear
SOS_Linear
np.random.seed(random_seed)= SOS(contamination=eta_sparsity)
clf
= fortable(_df, clf, tab_linear, outlier_true_linear, "SOS")
tab_lin
= tab_lin.comparison() tab_linear
LSCP_Linear
= [KNN(), LOF(), OCSVM()]
detectors = LSCP(detectors,contamination=eta_sparsity, random_state=random_seed)
clf
= fortable(_df, clf, tab_linear, outlier_true_linear, "LSCP")
tab_lin
= tab_lin.comparison() tab_linear
tab_linear
round(tab_linear,3)#.to_csv('./Example_1_2.csv')
# tab_linear.to_csv('./Example_1_Dataset.csv')
Orbit
# tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
= 1000
n = 0.05
eta_sparsity =77 random_seed
= list([1000,5000,10000])
n_values = list([0.01,0.05,0.1])
eta_sparsity_list =77
random_seed= pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC","N","Contamination","kapapa"]) tab_orbit
777)
np.random.seed(= np.around(np.random.normal(size=n),15)
epsilon = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
signal = signal + epsilon
eta =np.pi
pi=np.linspace(-pi,pi-2*pi/n,n)
ang=5+np.cos(np.linspace(0,12*pi,n))
r=r*np.cos(ang)
vx=r*np.sin(ang)
vy=10*np.sin(np.linspace(0,6*pi,n))
f1= f1 + eta
f = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f})
_df = signal.copy()
outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit)) outlier_true_orbit
GODE_Orbit
= list([1000,5000,10000])
n_values = list([0.01,0.05,0.1])
eta_sparsity_list =77
random_seed=1.21
kappa
= pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC","N","Contamination","kappa"]) tab_orbit
= pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC","N","Contamination","kappa"])
tab_orbit
= Orbit(_df)
_Orbit
_Orbit.get_distance()=1.21
kappa
=(_Orbit.D[_Orbit.D>0].mean()),kappa=kappa)
_Orbit.get_weightmatrix(theta=15)
_Orbit.fit(sd
= (_Orbit.df['Residual']**2).tolist()
outlier_GODE_orbit_old = sorted(outlier_GODE_orbit_old,reverse=True)
sorted_data = int(len(sorted_data) * eta_sparsity)
index = sorted_data[index]
five_percent = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_orbit_old))
outlier_GODE_orbit
= fortable(_df, clf = None, tab =tab_orbit, outlier_true=outlier_true_orbit, conf_name ="GODE")
tab_orb
= tab_orb.comparison(compare_outlier = outlier_GODE_orbit_old, conf_outlier = outlier_GODE_orbit, gode = True) tab_orbit
100%|██████████| 1000/1000 [00:01<00:00, 796.60it/s]
/tmp/ipykernel_255578/3780776973.py:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
_table = pd.concat([self.tabb,
tab_orbit
Accuracy | Precision | Recall | F1 | AUC | N | Contamination | kappa | |
---|---|---|---|---|---|---|---|---|
GODE | 0.957 | 0.56 | 0.571429 | 0.565657 | 0.893088 | 1000 | 0.05 | NaN |
tab_orbit
Accuracy | Precision | Recall | F1 | AUC | N | Contamination | kappa | |
---|---|---|---|---|---|---|---|---|
GODE | 0.961 | 0.6 | 0.612245 | 0.606061 | 0.893023 | 1000 | 0.05 | NaN |
= pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC","N","Contamination","kappa"])
tab_orbit =1.21
kappafor eta_sparsity in eta_sparsity_list:
for n in n_values:
777)
np.random.seed(= np.around(np.random.normal(size=n),15)
epsilon = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
signal = signal + epsilon
eta =np.pi
pi=np.linspace(-pi,pi-2*pi/n,n)
ang=5+np.cos(np.linspace(0,12*pi,n))
r=r*np.cos(ang)
vx=r*np.sin(ang)
vy=10*np.sin(np.linspace(0,6*pi,n))
f1= f1 + eta
f = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
_df = signal.copy()
outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
outlier_true_orbit = signal!=0
index_of_trueoutlier_bool
= _df[['x','y','f']]
_df_orbit
= Orbit(_df_orbit)
_Orbit
_Orbit.get_distance()
=(_Orbit.D[_Orbit.D>0].mean()),kappa=kappa)
_Orbit.get_weightmatrix(theta=15)
_Orbit.fit(sd
= (_Orbit.df['Residual']**2).tolist()
outlier_GODE_orbit_old = sorted(outlier_GODE_orbit_old,reverse=True)
sorted_data = int(len(sorted_data) * eta_sparsity)
index = sorted_data[index]
percent = list(map(lambda x: 1 if x > percent else 0,outlier_GODE_orbit_old))
outlier_GODE_orbit
= fortable(_df_orbit, clf = None, tab =tab_orbit, outlier_true=outlier_true_orbit, conf_name ="GODE")
tab_orb
= tab_orb.comparison(compare_outlier = outlier_GODE_orbit_old, conf_outlier = outlier_GODE_orbit, gode = True) tab_orbit
100%|██████████| 1000/1000 [00:01<00:00, 762.07it/s]
/tmp/ipykernel_255578/3780776973.py:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
_table = pd.concat([self.tabb,
100%|██████████| 5000/5000 [00:31<00:00, 159.73it/s]
100%|██████████| 10000/10000 [02:04<00:00, 80.49it/s]
100%|██████████| 1000/1000 [00:01<00:00, 774.51it/s]
100%|██████████| 5000/5000 [00:31<00:00, 158.99it/s]
100%|██████████| 10000/10000 [02:06<00:00, 78.91it/s]
100%|██████████| 1000/1000 [00:01<00:00, 797.18it/s]
100%|██████████| 5000/5000 [00:31<00:00, 159.37it/s]
100%|██████████| 10000/10000 [02:06<00:00, 78.86it/s]
= tab_orbit.copy() tab_orbit_gode
round(3) tab_orbit_gode.
Accuracy | Precision | Recall | F1 | AUC | N | Contamination | kappa | |
---|---|---|---|---|---|---|---|---|
GODE | 0.991 | 0.400 | 0.571 | 0.471 | 0.954 | 1000 | 0.01 | NaN |
GODE | 0.986 | 0.460 | 0.354 | 0.400 | 0.876 | 5000 | 0.01 | NaN |
GODE | 0.987 | 0.380 | 0.369 | 0.374 | 0.888 | 10000 | 0.01 | NaN |
GODE | 0.957 | 0.560 | 0.571 | 0.566 | 0.893 | 1000 | 0.05 | NaN |
GODE | 0.957 | 0.640 | 0.557 | 0.596 | 0.885 | 5000 | 0.05 | NaN |
GODE | 0.959 | 0.592 | 0.585 | 0.588 | 0.891 | 10000 | 0.05 | NaN |
GODE | 0.918 | 0.670 | 0.578 | 0.620 | 0.858 | 1000 | 0.10 | NaN |
GODE | 0.917 | 0.614 | 0.578 | 0.596 | 0.863 | 5000 | 0.10 | NaN |
GODE | 0.924 | 0.628 | 0.616 | 0.622 | 0.880 | 10000 | 0.10 | NaN |
# _Orbit = Orbit(_df)
# _Orbit.get_distance()
# _Orbit.get_weightmatrix(theta=(_Orbit.D[_Orbit.D>0].mean()),kappa=2500)
# _Orbit.fit(sd=15)
# outlier_GODE_orbit_old = (_Orbit.df['Residual']**2).tolist()
# sorted_data = sorted(outlier_GODE_orbit_old,reverse=True)
# index = int(len(sorted_data) * eta_sparsity)
# five_percent = sorted_data[index]
# outlier_GODE_orbit = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_orbit_old))
# tab_orb = fortable(_df, clf = None, tab =tab_orbit, outlier_true=outlier_true_orbit, conf_name ="GODE")
# tab_orbit = tab_orb.comparison(compare_outlier = outlier_GODE_orbit_old, conf_outlier = outlier_GODE_orbit, gode = True)
LOF_Orbit
np.random.seed(random_seed)= LOF(contamination=eta_sparsity)
clf
= fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "LOF")
tab_orb
= tab_orb.comparison() tab_orbit
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/utils/deprecation.py:86: FutureWarning: Function fit_predict is deprecated
warnings.warn(msg, category=FutureWarning)
= pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC","N","Contamination","kappa"])
tab_orbit
for n in n_values:
for eta_sparsity in eta_sparsity_list:
777)
np.random.seed(= np.around(np.random.normal(size=n),15)
epsilon = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
signal = signal + epsilon
eta =np.pi
pi=np.linspace(-pi,pi-2*pi/n,n)
ang=5+np.cos(np.linspace(0,12*pi,n))
r=r*np.cos(ang)
vx=r*np.sin(ang)
vy=10*np.sin(np.linspace(0,6*pi,n))
f1= f1 + eta
f = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
_df = signal.copy()
outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
outlier_true_orbit = signal!=0
index_of_trueoutlier_bool
np.random.seed(random_seed)= LOF(contamination=eta_sparsity)
clf
= fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "LOF")
tab_orb
= tab_orb.comparison() tab_orbit
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/utils/deprecation.py:86: FutureWarning: Function fit_predict is deprecated
warnings.warn(msg, category=FutureWarning)
/tmp/ipykernel_255578/3780776973.py:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
_table = pd.concat([self.tabb,
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/utils/deprecation.py:86: FutureWarning: Function fit_predict is deprecated
warnings.warn(msg, category=FutureWarning)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/utils/deprecation.py:86: FutureWarning: Function fit_predict is deprecated
warnings.warn(msg, category=FutureWarning)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/utils/deprecation.py:86: FutureWarning: Function fit_predict is deprecated
warnings.warn(msg, category=FutureWarning)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/utils/deprecation.py:86: FutureWarning: Function fit_predict is deprecated
warnings.warn(msg, category=FutureWarning)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/utils/deprecation.py:86: FutureWarning: Function fit_predict is deprecated
warnings.warn(msg, category=FutureWarning)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/utils/deprecation.py:86: FutureWarning: Function fit_predict is deprecated
warnings.warn(msg, category=FutureWarning)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/utils/deprecation.py:86: FutureWarning: Function fit_predict is deprecated
warnings.warn(msg, category=FutureWarning)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/utils/deprecation.py:86: FutureWarning: Function fit_predict is deprecated
warnings.warn(msg, category=FutureWarning)
= tab_orbit tab_orbit_LOF
KNN_Orbit
np.random.seed(random_seed)= KNN(contamination=eta_sparsity)
clf
= fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "kNN")
tab_orb
= tab_orb.comparison() tab_orbit
= pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
tab_orbit for n in n_values:
for eta_sparsity in eta_sparsity_list:
777)
np.random.seed(= np.around(np.random.normal(size=n),15)
epsilon = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
signal = signal + epsilon
eta =np.pi
pi=np.linspace(-pi,pi-2*pi/n,n)
ang=5+np.cos(np.linspace(0,12*pi,n))
r=r*np.cos(ang)
vx=r*np.sin(ang)
vy=10*np.sin(np.linspace(0,6*pi,n))
f1= f1 + eta
f = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
_df = signal.copy()
outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
outlier_true_orbit = signal!=0
index_of_trueoutlier_bool
np.random.seed(random_seed)= KNN(contamination=eta_sparsity)
clf
= fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "kNN")
tab_orb
= tab_orb.comparison()
tab_orbit = tab_orbit tab_orbit_kNN
/tmp/ipykernel_255578/3780776973.py:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
_table = pd.concat([self.tabb,
CBLOF_Orbit
= CBLOF(contamination=eta_sparsity,random_state=random_seed)
clf
= fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "CBLOF")
tab_orb
= tab_orb.comparison() tab_orbit
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
super()._check_params_vs_input(X, default_n_init=10)
= pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
tab_orbit for n in n_values:
for eta_sparsity in eta_sparsity_list:
777)
np.random.seed(= np.around(np.random.normal(size=n),15)
epsilon = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
signal = signal + epsilon
eta =np.pi
pi=np.linspace(-pi,pi-2*pi/n,n)
ang=5+np.cos(np.linspace(0,12*pi,n))
r=r*np.cos(ang)
vx=r*np.sin(ang)
vy=10*np.sin(np.linspace(0,6*pi,n))
f1= f1 + eta
f = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
_df = signal.copy()
outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
outlier_true_orbit = signal!=0
index_of_trueoutlier_bool
= CBLOF(contamination=eta_sparsity,random_state=random_seed)
clf
= fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "CBLOF")
tab_orb
= tab_orb.comparison()
tab_orbit = tab_orbit tab_orbit_CBLOF
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
super()._check_params_vs_input(X, default_n_init=10)
/tmp/ipykernel_255578/3780776973.py:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
_table = pd.concat([self.tabb,
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
super()._check_params_vs_input(X, default_n_init=10)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
super()._check_params_vs_input(X, default_n_init=10)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
super()._check_params_vs_input(X, default_n_init=10)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
super()._check_params_vs_input(X, default_n_init=10)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
super()._check_params_vs_input(X, default_n_init=10)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
super()._check_params_vs_input(X, default_n_init=10)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
super()._check_params_vs_input(X, default_n_init=10)
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/cluster/_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
super()._check_params_vs_input(X, default_n_init=10)
OCSVM_Orbit
np.random.seed(random_seed)= OCSVM(nu=eta_sparsity)
clf
= fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "OCSVM")
tab_orb
= tab_orb.comparison() tab_orbit
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but OneClassSVM was fitted without feature names
warnings.warn(
= pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
tab_orbit for n in n_values:
for eta_sparsity in eta_sparsity_list:
777)
np.random.seed(= np.around(np.random.normal(size=n),15)
epsilon = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
signal = signal + epsilon
eta =np.pi
pi=np.linspace(-pi,pi-2*pi/n,n)
ang=5+np.cos(np.linspace(0,12*pi,n))
r=r*np.cos(ang)
vx=r*np.sin(ang)
vy=10*np.sin(np.linspace(0,6*pi,n))
f1= f1 + eta
f = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
_df = signal.copy()
outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
outlier_true_orbit = signal!=0
index_of_trueoutlier_bool
np.random.seed(random_seed)= OCSVM(nu=eta_sparsity)
clf
= fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "OCSVM")
tab_orb
= tab_orb.comparison()
tab_orbit = tab_orbit tab_orbit_OCSVM
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but OneClassSVM was fitted without feature names
warnings.warn(
/tmp/ipykernel_255578/3780776973.py:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
_table = pd.concat([self.tabb,
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but OneClassSVM was fitted without feature names
warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but OneClassSVM was fitted without feature names
warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but OneClassSVM was fitted without feature names
warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but OneClassSVM was fitted without feature names
warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but OneClassSVM was fitted without feature names
warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but OneClassSVM was fitted without feature names
warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but OneClassSVM was fitted without feature names
warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but OneClassSVM was fitted without feature names
warnings.warn(
MCD_Orbit
= MCD(contamination=eta_sparsity , random_state = random_seed)
clf
= fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "MCD")
tab_orb
= tab_orb.comparison() tab_orbit
= pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
tab_orbit for n in n_values:
for eta_sparsity in eta_sparsity_list:
777)
np.random.seed(= np.around(np.random.normal(size=n),15)
epsilon = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
signal = signal + epsilon
eta =np.pi
pi=np.linspace(-pi,pi-2*pi/n,n)
ang=5+np.cos(np.linspace(0,12*pi,n))
r=r*np.cos(ang)
vx=r*np.sin(ang)
vy=10*np.sin(np.linspace(0,6*pi,n))
f1= f1 + eta
f = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
_df = signal.copy()
outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
outlier_true_orbit = signal!=0
index_of_trueoutlier_bool
= MCD(contamination=eta_sparsity , random_state = random_seed)
clf
= fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "MCD")
tab_orb
= tab_orb.comparison()
tab_orbit = tab_orbit tab_orbit_MCD
/tmp/ipykernel_255578/3780776973.py:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
_table = pd.concat([self.tabb,
Feature Bagging_Orbit
= FeatureBagging(contamination=eta_sparsity, random_state=random_seed)
clf
= fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "Feature Bagging")
tab_orb
= tab_orb.comparison() tab_orbit
= pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
tab_orbit for n in n_values:
for eta_sparsity in eta_sparsity_list:
777)
np.random.seed(= np.around(np.random.normal(size=n),15)
epsilon = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
signal = signal + epsilon
eta =np.pi
pi=np.linspace(-pi,pi-2*pi/n,n)
ang=5+np.cos(np.linspace(0,12*pi,n))
r=r*np.cos(ang)
vx=r*np.sin(ang)
vy=10*np.sin(np.linspace(0,6*pi,n))
f1= f1 + eta
f = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
_df = signal.copy()
outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
outlier_true_orbit = signal!=0
index_of_trueoutlier_bool
= FeatureBagging(contamination=eta_sparsity, random_state=random_seed)
clf
= fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "Feature Bagging")
tab_orb
= tab_orb.comparison()
tab_orbit = tab_orbit tab_orbit_Feature
/tmp/ipykernel_255578/3780776973.py:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
_table = pd.concat([self.tabb,
ABOD_Orbit
np.random.seed(random_seed)= ABOD(contamination=eta_sparsity)
clf
= fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "ABOD")
tab_orb
= tab_orb.comparison() tab_orbit
= pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
tab_orbit for n in n_values:
for eta_sparsity in eta_sparsity_list:
777)
np.random.seed(= np.around(np.random.normal(size=n),15)
epsilon = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
signal = signal + epsilon
eta =np.pi
pi=np.linspace(-pi,pi-2*pi/n,n)
ang=5+np.cos(np.linspace(0,12*pi,n))
r=r*np.cos(ang)
vx=r*np.sin(ang)
vy=10*np.sin(np.linspace(0,6*pi,n))
f1= f1 + eta
f = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
_df = signal.copy()
outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
outlier_true_orbit = signal!=0
index_of_trueoutlier_bool
np.random.seed(random_seed)= ABOD(contamination=eta_sparsity)
clf
= fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "ABOD")
tab_orb
= tab_orb.comparison()
tab_orbit = tab_orbit tab_orbit_ABOD
/tmp/ipykernel_255578/3780776973.py:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
_table = pd.concat([self.tabb,
IForest_Orbit
= IForest(contamination=eta_sparsity,random_state=random_seed)
clf
= fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "Isolation Forest")
tab_orb
= tab_orb.comparison() tab_orbit
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but IsolationForest was fitted without feature names
warnings.warn(
= pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
tab_orbit for n in n_values:
for eta_sparsity in eta_sparsity_list:
777)
np.random.seed(= np.around(np.random.normal(size=n),15)
epsilon = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
signal = signal + epsilon
eta =np.pi
pi=np.linspace(-pi,pi-2*pi/n,n)
ang=5+np.cos(np.linspace(0,12*pi,n))
r=r*np.cos(ang)
vx=r*np.sin(ang)
vy=10*np.sin(np.linspace(0,6*pi,n))
f1= f1 + eta
f = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
_df = signal.copy()
outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
outlier_true_orbit = signal!=0
index_of_trueoutlier_bool
= IForest(contamination=eta_sparsity,random_state=random_seed)
clf
= fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "Isolation Forest")
tab_orb
= tab_orb.comparison()
tab_orbit = tab_orbit tab_orbit_Isolation
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but IsolationForest was fitted without feature names
warnings.warn(
/tmp/ipykernel_255578/3780776973.py:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
_table = pd.concat([self.tabb,
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but IsolationForest was fitted without feature names
warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but IsolationForest was fitted without feature names
warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but IsolationForest was fitted without feature names
warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but IsolationForest was fitted without feature names
warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but IsolationForest was fitted without feature names
warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but IsolationForest was fitted without feature names
warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but IsolationForest was fitted without feature names
warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but IsolationForest was fitted without feature names
warnings.warn(
HBOS_Orbit
np.random.seed(random_seed)= HBOS(contamination=eta_sparsity)
clf
= fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "HBOS")
tab_orb
= tab_orb.comparison() tab_orbit
= pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
tab_orbit for n in n_values:
for eta_sparsity in eta_sparsity_list:
777)
np.random.seed(= np.around(np.random.normal(size=n),15)
epsilon = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
signal = signal + epsilon
eta =np.pi
pi=np.linspace(-pi,pi-2*pi/n,n)
ang=5+np.cos(np.linspace(0,12*pi,n))
r=r*np.cos(ang)
vx=r*np.sin(ang)
vy=10*np.sin(np.linspace(0,6*pi,n))
f1= f1 + eta
f = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
_df = signal.copy()
outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
outlier_true_orbit = signal!=0
index_of_trueoutlier_bool
np.random.seed(random_seed)= HBOS(contamination=eta_sparsity)
clf
= fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "HBOS")
tab_orb
= tab_orb.comparison()
tab_orbit = tab_orbit tab_orbit_HBOS
/tmp/ipykernel_255578/3780776973.py:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
_table = pd.concat([self.tabb,
SOS_Orbit
np.random.seed(random_seed)= SOS(contamination=eta_sparsity)
clf
= fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "SOS")
tab_orb
= tab_orb.comparison() tab_orbit
= pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
tab_orbit for n in n_values:
for eta_sparsity in eta_sparsity_list:
777)
np.random.seed(= np.around(np.random.normal(size=n),15)
epsilon = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
signal = signal + epsilon
eta =np.pi
pi=np.linspace(-pi,pi-2*pi/n,n)
ang=5+np.cos(np.linspace(0,12*pi,n))
r=r*np.cos(ang)
vx=r*np.sin(ang)
vy=10*np.sin(np.linspace(0,6*pi,n))
f1= f1 + eta
f = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
_df = signal.copy()
outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
outlier_true_orbit = signal!=0
index_of_trueoutlier_bool
np.random.seed(random_seed)= SOS(contamination=eta_sparsity)
clf
= fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "SOS")
tab_orb
= tab_orb.comparison()
tab_orbit = tab_orbit tab_orbit_SOS
/tmp/ipykernel_255578/3780776973.py:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
_table = pd.concat([self.tabb,
LSCP_Orbit
= [KNN(), LOF(), OCSVM()]
detectors = LSCP(detectors,contamination=eta_sparsity, random_state=random_seed)
clf
= fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "LSCP")
tab_orb
= tab_orb.comparison() tab_orbit
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/pyod/models/lscp.py:382: UserWarning: The number of histogram bins is greater than the number of classifiers, reducing n_bins to n_clf.
warnings.warn(
= pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
tab_orbit for n in n_values:
for eta_sparsity in eta_sparsity_list:
777)
np.random.seed(= np.around(np.random.normal(size=n),15)
epsilon = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
signal = signal + epsilon
eta =np.pi
pi=np.linspace(-pi,pi-2*pi/n,n)
ang=5+np.cos(np.linspace(0,12*pi,n))
r=r*np.cos(ang)
vx=r*np.sin(ang)
vy=10*np.sin(np.linspace(0,6*pi,n))
f1= f1 + eta
f = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
_df = signal.copy()
outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
outlier_true_orbit = signal!=0
index_of_trueoutlier_bool
= [KNN(), LOF(), OCSVM()]
detectors = LSCP(detectors,contamination=eta_sparsity, random_state=random_seed)
clf
= fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "LSCP")
tab_orb
= tab_orb.comparison()
tab_orbit = tab_orbit tab_orbit_LSCP
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/pyod/models/lscp.py:382: UserWarning: The number of histogram bins is greater than the number of classifiers, reducing n_bins to n_clf.
warnings.warn(
/tmp/ipykernel_255578/3780776973.py:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
_table = pd.concat([self.tabb,
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/pyod/models/lscp.py:382: UserWarning: The number of histogram bins is greater than the number of classifiers, reducing n_bins to n_clf.
warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/pyod/models/lscp.py:382: UserWarning: The number of histogram bins is greater than the number of classifiers, reducing n_bins to n_clf.
warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/pyod/models/lscp.py:382: UserWarning: The number of histogram bins is greater than the number of classifiers, reducing n_bins to n_clf.
warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/pyod/models/lscp.py:382: UserWarning: The number of histogram bins is greater than the number of classifiers, reducing n_bins to n_clf.
warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/pyod/models/lscp.py:382: UserWarning: The number of histogram bins is greater than the number of classifiers, reducing n_bins to n_clf.
warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/pyod/models/lscp.py:382: UserWarning: The number of histogram bins is greater than the number of classifiers, reducing n_bins to n_clf.
warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/pyod/models/lscp.py:382: UserWarning: The number of histogram bins is greater than the number of classifiers, reducing n_bins to n_clf.
warnings.warn(
/home/csy/anaconda3/envs/pygsp/lib/python3.10/site-packages/pyod/models/lscp.py:382: UserWarning: The number of histogram bins is greater than the number of classifiers, reducing n_bins to n_clf.
warnings.warn(
tab_orbit
round(tab_orbit,3)
-1],
pd.concat([tab_orbit_gode.iloc[:,:
tab_orbit_LOF,
tab_orbit_kNN,
tab_orbit_CBLOF,
tab_orbit_OCSVM,
tab_orbit_MCD,
tab_orbit_Feature,
tab_orbit_ABOD,
tab_orbit_Isolation,
tab_orbit_HBOS,
tab_orbit_SOS,#.to_csv('./Example_2_Dataset.csv') tab_orbit_LSCP])
Accuracy | Precision | Recall | F1 | AUC | N | Contamination | kappa | |
---|---|---|---|---|---|---|---|---|
GODE | 0.9910 | 0.400 | 0.571429 | 0.470588 | 0.953532 | 1000 | 0.01 | NaN |
GODE | 0.9862 | 0.460 | 0.353846 | 0.400000 | 0.875514 | 5000 | 0.01 | NaN |
GODE | 0.9873 | 0.380 | 0.368932 | 0.374384 | 0.887724 | 10000 | 0.01 | NaN |
GODE | 0.9570 | 0.560 | 0.571429 | 0.565657 | 0.893088 | 1000 | 0.05 | NaN |
GODE | 0.9566 | 0.640 | 0.557491 | 0.595903 | 0.884623 | 5000 | 0.05 | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... |
LSCP | 0.9502 | 0.576 | 0.501742 | 0.536313 | 0.869403 | 5000.0 | 0.05 | NaN |
LSCP | 0.9078 | 0.570 | 0.536723 | 0.552861 | 0.840433 | 5000.0 | 0.10 | NaN |
LSCP | 0.9861 | 0.320 | 0.310680 | 0.315271 | 0.858496 | 10000.0 | 0.01 | NaN |
LSCP | 0.9542 | 0.548 | 0.541502 | 0.544732 | 0.878705 | 10000.0 | 0.05 | NaN |
LSCP | 0.9200 | 0.610 | 0.598039 | 0.603960 | 0.862852 | 10000.0 | 0.10 | NaN |
108 rows × 8 columns
Bunny
# tab_bunny = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
= 0.2
eta_sparsity =77
random_seed= 2503 n
with open("../../2_research/Bunny.pkl", "rb") as file:
= pickle.load(file) loaded_obj
= pd.DataFrame({'x':loaded_obj['x'],'y':loaded_obj['y'],'z':loaded_obj['z'],'fnoise':loaded_obj['f']+loaded_obj['noise'],'f':loaded_obj['f'],'noise':loaded_obj['noise']})
_df = loaded_obj['unif'].copy()
outlier_true_bunny = list(map(lambda x: 1 if x !=0 else 0,outlier_true_bunny))
outlier_true_bunny = loaded_obj['unif']!=0
index_of_trueoutlier_bool_bunny = loaded_obj['W'].copy() _W
GODE_Bunny
= BUNNY(_df)
_BUNNY =20)
_BUNNY.fit(sd
= (_BUNNY.df['Residual']**2).tolist()
outlier_GODE_bunny_old = sorted(outlier_GODE_bunny_old,reverse=True)
sorted_data = int(len(sorted_data) * eta_sparsity)
index = sorted_data[index]
n_percent = list(map(lambda x: 1 if x > n_percent else 0,outlier_GODE_bunny_old))
outlier_GODE_bunny
= fortable(_df, clf = None, tab =tab_bunny, outlier_true=outlier_true_bunny, conf_name ="GODE")
tab_bun
= tab_bun.comparison(compare_outlier = outlier_GODE_bunny_old, conf_outlier = outlier_GODE_bunny, gode = True) tab_bunny
LOF_Bunny
np.random.seed(random_seed)= LOF(contamination=eta_sparsity)
clf
= fortable(_df, clf, tab_bunny, outlier_true_bunny, "LOF")
tab_bun
= tab_bun.comparison() tab_bunny
KNN_Bunny
np.random.seed(random_seed)= KNN(contamination=eta_sparsity)
clf
= fortable(_df, clf, tab_bunny, outlier_true_bunny, "kNN")
tab_bun
= tab_bun.comparison() tab_bunny
CBLOF_Bunny
= CBLOF(contamination=eta_sparsity,random_state=random_seed)
clf
= fortable(_df, clf, tab_bunny, outlier_true_bunny, "CBLOF")
tab_bun
= tab_bun.comparison() tab_bunny
OCSVM_Bunny
np.random.seed(random_seed)= OCSVM(nu=eta_sparsity)
clf
= fortable(_df, clf, tab_bunny, outlier_true_bunny, "OCSVM")
tab_bun
= tab_bun.comparison() tab_bunny
MCD_Bunny
= MCD(contamination=eta_sparsity , random_state = random_seed)
clf
= fortable(_df, clf, tab_bunny, outlier_true_bunny, "MCD")
tab_bun
= tab_bun.comparison() tab_bunny
Feature Bagging_Bunny
= FeatureBagging(contamination=eta_sparsity, random_state=random_seed)
clf
= fortable(_df, clf, tab_bunny, outlier_true_bunny, "Feature Bagging")
tab_bun
= tab_bun.comparison() tab_bunny
ABOD_Bunny
np.random.seed(random_seed)= ABOD(contamination=eta_sparsity)
clf
= fortable(_df, clf, tab_bunny, outlier_true_bunny, "ABOD")
tab_bun
= tab_bun.comparison() tab_bunny
IForest_Bunny
= IForest(contamination=eta_sparsity,random_state=random_seed)
clf
= fortable(_df, clf, tab_bunny, outlier_true_bunny, "Isolation Forest")
tab_bun
= tab_bun.comparison() tab_bunny
HBOS_Bunny
np.random.seed(random_seed)= HBOS(contamination=eta_sparsity)
clf
= fortable(_df, clf, tab_bunny, outlier_true_bunny, "HBOS")
tab_bun
= tab_bun.comparison() tab_bunny
SOS_Bunny
np.random.seed(random_seed)= SOS(contamination=eta_sparsity)
clf
= fortable(_df, clf, tab_bunny, outlier_true_bunny, "SOS")
tab_bun
= tab_bun.comparison() tab_bunny
LSCP_Bunny
= [KNN(), LOF(), OCSVM()]
detectors = LSCP(detectors,contamination=eta_sparsity, random_state=random_seed)
clf
= fortable(_df, clf, tab_bunny, outlier_true_bunny, "LSCP")
tab_bun
= tab_bun.comparison() tab_bunny
tab_bunny
round(tab_bunny,3)
# tab_bunny.to_csv('./Example_3_Dataset.csv')