# Comparison Results on Real Data

SEOYEON CHOI  
2023-06-22

> Comparison at real data

# Import

In [516]:
import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import plotly.express as px
import warnings
warnings.simplefilter("ignore", np.ComplexWarning)
from haversine import haversine
from IPython.display import HTML
import plotly.graph_objects as go

import copy 

import rpy2
import rpy2.robjects as ro 
from rpy2.robjects.vectors import FloatVector 
from rpy2.robjects.packages import importr

import matplotlib
from sklearn.svm import OneClassSVM
from sklearn.linear_model import SGDOneClassSVM
from sklearn.kernel_approximation import Nystroem
from sklearn.pipeline import make_pipeline

from sklearn.neighbors import LocalOutlierFactor

from sklearn.datasets import fetch_kddcup99, fetch_covtype, fetch_openml
from sklearn.preprocessing import LabelBinarizer

import tqdm

from pygsp import graphs, filters, plotting, utils

from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

import plotly.express as px

from sklearn.covariance import EmpiricalCovariance, MinCovDet

from alibi_detect.od import IForest
from pyod.models.abod import ABOD
from pyod.models.cblof import CBLOF
import seaborn as sns
from PyNomaly import loop
from sklearn import svm
from pyod.models.lscp import LSCP
from pyod.models.hbos import HBOS
from pyod.models.so_gaal import SO_GAAL
from pyod.models.mcd import MCD
from pyod.models.mo_gaal import MO_GAAL
from pyod.models.knn import KNN
from pyod.models.lof import LOF
from pyod.models.ocsvm import OCSVM
from pyod.models.feature_bagging import FeatureBagging
from pyod.models.sos import SOS

In [5]:
class earthquake_func:
    def __init__(self,df):
        self.df = df 
        self.f = df.Magnitude.to_numpy()
        self.year = df.Year.to_numpy()
        self.lat = df.Latitude.to_numpy()
        self.long = df.Longitude.to_numpy()
        self.n = len(self.f)
        
        self.theta= None
    def get_distance(self):
        self.D = np.zeros([self.n,self.n])
        locations = np.stack([self.lat, self.long],axis=1)
        for i in tqdm.tqdm(range(self.n)):
            for j in range(i,self.n): 
                self.D[i,j]=haversine(locations[i],locations[j])
        self.D = self.D+self.D.T
    def get_weightmatrix(self,theta=1,beta=0.5,kappa=4000):
        self.theta = theta
        dist = np.where(self.D<kappa,self.D,0)
        self.W = np.exp(-(dist/self.theta)**2)

    def _eigen(self):
        d= self.W.sum(axis=1)
        D= np.diag(d)
        self.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
        self.lamb, self.Psi = np.linalg.eigh(self.L)
        self.Lamb = np.diag(self.lamb)        
    def fit(self,ref=0.5): # fit with ebayesthresh
        self._eigen()
        self.fbar = self.Psi.T @ self.f # fbar := graph fourier transform of f
        self.power = self.fbar**2 
        ebayesthresh = importr('EbayesThresh').ebayesthresh
        self.power_threshed=np.array(ebayesthresh(FloatVector(self.fbar**2)))
        self.fbar_threshed = np.where(self.power_threshed>0,self.fbar,0)
        self.fhat = self.Psi@self.fbar_threshed
        self.df = self.df.assign(MagnitudeHat = self.fhat)
        self.df = self.df.assign(Residual = self.df.Magnitude- self.df.MagnitudeHat)
        self.con = np.where(self.df.Residual>0.7,1,0)
        
    def vis(self,MagThresh=7,ResThresh=1):
        fig = px.density_mapbox(self.df, 
                        lat='Latitude', 
                        lon='Longitude', 
                        z='Magnitude', 
                        radius=5,
                        center=dict(lat=37, lon=160), 
                        zoom=1.5,
                        height=900,
                        opacity = 0.4,
                        mapbox_style="stamen-terrain",
                        range_color=[-7,7])
        fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
        fig.add_scattermapbox(lat = self.df.query('Magnitude > @MagThresh')['Latitude'],
                      lon = self.df.query('Magnitude > @MagThresh')['Longitude'],
                      text = self.df.query('Magnitude > @MagThresh')['Magnitude'],
                      marker_size= 8,
                      marker_color= 'red',
                      opacity = 0.6
                      )
        fig.add_scattermapbox(lat = self.df.query('Residual**2 > @ResThresh')['Latitude'],
                      lon = self.df.query('Residual**2 > @ResThresh')['Longitude'],
                      text = self.df.query('Magnitude > @ResThresh')['Magnitude'],
                      marker_size= 8,
                      marker_color= 'blue',
                      opacity = 0.5
                      )
        return HTML(fig.to_html(include_mathjax=False, config=dict({'scrollZoom':False})))
    def visf(self):
        fig = px.density_mapbox(self.df, 
                        lat='Latitude', 
                        lon='Longitude', 
                        z='Magnitude', 
                        radius=5,
                        center=dict(lat=37, lon=160), 
                        zoom=1.5,
                        height=900,
                        opacity = 0.7,
                        mapbox_style="stamen-terrain",
                        range_color=[-7,7])
        fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
        return HTML(fig.to_html(include_mathjax=False, config=dict({'scrollZoom':False})))
    def visfhat(self):
        fig = px.density_mapbox(self.df, 
                        lat='Latitude', 
                        lon='Longitude', 
                        z='MagnitudeHat', 
                        radius=5,
                        center=dict(lat=37, lon=160), 
                        zoom=1.5,
                        height=900,
                        opacity = 0.7,
                        mapbox_style="stamen-terrain",
                        range_color=[-7,7])
        fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
        return HTML(fig.to_html(include_mathjax=False, config=dict({'scrollZoom':False})))
    def visres(self,MagThresh=7,ResThresh=1):
        fig = px.density_mapbox(self.df, 
                        lat='Latitude', 
                        lon='Longitude', 
                        z=[0] * len(self.df), 
                        radius=5,
                        center=dict(lat=37, lon=160), 
                        zoom=1.5,
                        height=900,
                        opacity = 0.7,
                        mapbox_style="stamen-terrain",
                        range_color=[-7,7])
        fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
        fig.add_scattermapbox(lat = self.df.query('Residual**2 > @ResThresh')['Latitude'],
                      lon = self.df.query('Residual**2 > @ResThresh')['Longitude'],
                      text = self.df.query('Magnitude > @ResThresh')['Magnitude'],
                      marker_size= 8,
                      marker_color= 'blue',
                      opacity = 0.7
                      )
        return HTML(fig.to_html(include_mathjax=False, config=dict({'scrollZoom':False})))

In [6]:
class Conf_matrx:
    def __init__(self,original,compare,tab):
        self.original = original
        self.compare = compare
        self.tab = tab
    def conf(self,name):
        self.conf_matrix = confusion_matrix(self.original, self.compare)
        
        fig, ax = plt.subplots(figsize=(5, 5))
        ax.matshow(self.conf_matrix, cmap=plt.cm.Oranges, alpha=0.3)
        for i in range(self.conf_matrix.shape[0]):
            for j in range(self.conf_matrix.shape[1]):
                ax.text(x=j, y=i,s=self.conf_matrix[i, j], va='center', ha='center', size='xx-large')
        plt.xlabel('Predictions', fontsize=18)
        plt.ylabel('Actuals', fontsize=18)
        plt.title('Confusion Matrix', fontsize=18)
        plt.show()
        
        self.acc = accuracy_score(self.original, self.compare)
        self.pre = precision_score(self.original, self.compare)
        self.rec = recall_score(self.original, self.compare)
        self.f1 = f1_score(self.original, self.compare)
        
        print('Accuracy: %.3f' % self.acc)
        print('Precision: %.3f' % self.pre)
        print('Recall: %.3f' % self.rec)
        print('F1 Score: %.3f' % self.f1)
        
        self.tab = self.tab.append(pd.DataFrame({"Accuracy":[self.acc],"Precision":[self.pre],"Recall":[self.rec],"F1":[self.f1]},index = [name]))

In [17]:
tab_gode = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1"])

## load data and clean it

`-` load

In [7]:
df_global= pd.concat([pd.read_csv('00_05.csv'),pd.read_csv('05_10.csv'),pd.read_csv('10_15.csv'),pd.read_csv('15_20.csv')]).iloc[:,[0,1,2,4]].rename(columns={'latitude':'Latitude','longitude':'Longitude','mag':'Magnitude'}).reset_index().iloc[:,1:]

`-` cleaning

In [8]:
df_global = df_global.assign(Year=list(map(lambda x: x.split('-')[0], df_global.time))).iloc[:,1:]

In [9]:
df_global.Year = df_global.Year.astype(np.float64)

In [10]:
df_global_10 = df_global.copy()
df_global_10 = df_global_10.query("2010 <= Year < 2015").reset_index().iloc[:,1:];df_global_10

### GODE

In [218]:
gode_global = earthquake_func(df_global_10)

`-` get distance

In [219]:
gode_global.get_distance()

100%|██████████| 12498/12498 [07:20<00:00, 28.35it/s] 

In [220]:
gode_global.D[gode_global.D>0].mean()

`-` weight matrix

In [221]:
gode_global.get_weightmatrix(theta=(gode_global.D[gode_global.D>0].mean()),kappa=2500) 

`-` fit

In [352]:
gode_global.fit()

In [353]:
_df = gode_global.df.copy()

In [354]:
_df.sort_values("Residual",ascending=False).iloc[:40,:]

In [355]:
outlier_simul_one = (_df['Residual']**2).tolist()

In [356]:
outlier_simul_one = list(map(lambda x: -1 if x > 0.04 else 1,outlier_simul_one))

In [357]:
pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one)],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE'})

In [None]:
_conf = Conf_matrx(outlier_true_one,outlier_simul_one,tab_gode)

In [None]:
_conf.conf("GODE")

In [None]:
one = _conf.tab

### LOF

In [358]:
clf = LocalOutlierFactor(n_neighbors=2)

In [360]:
lof_rst = clf.fit_predict(_df)

In [361]:
pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one),
          pd.DataFrame(lof_rst).rename(columns={0:'LOF'})],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE',
                          'LOF':'LOF'})

In [None]:
_conf = Conf_matrx(outlier_true_one,clf.fit_predict(X),tab_orbit)

In [None]:
_conf.conf("LOF (Breunig et al., 2000)")

In [None]:
two = one.append(_conf.tab)

### KNN

In [365]:
clf = KNN()
clf.fit(_df[['Latitude', 'Longitude','Magnitude']])
# _df['knn_clf'] = clf.labels_

In [366]:
outlier_KNN_one = list(clf.labels_)

In [367]:
outlier_KNN_one = list(map(lambda x: 1 if x==0  else -1,outlier_KNN_one))

In [415]:
pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one),
          pd.DataFrame(lof_rst).rename(columns={0:'LOF'}),
          pd.DataFrame(outlier_KNN_one).rename(columns={0:'KNN'})],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE',
                          'LOF':'LOF',
                         'KNN':'KNN'})

In [None]:
_conf = Conf_matrx(outlier_true_one,outlier_KNN_one,tab_orbit)

In [None]:
_conf.conf("kNN (Ramaswamy et al., 2000)")

In [None]:
three = two.append(_conf.tab)

### CBLOF

In [20]:
clf = CBLOF(contamination=0.05,check_estimator=False, random_state=77)
clf.fit(df_global_10[['Latitude', 'Longitude','Magnitude']])
df_global_10['CBLOF_Clf'] = clf.labels_

  super()._check_params_vs_input(X, default_n_init=10)

In [21]:
outlier_CBLOF_one = list(clf.labels_)

In [22]:
outlier_CBLOF_one = list(map(lambda x: 1 if x==0  else -1,outlier_CBLOF_one))

In [29]:
outlier_CBLOF_one_t = pd.DataFrame([outlier_CBLOF_one]).T.rename(columns={0:'CBLOF'});outlier_CBLOF_one_t

In [32]:
# outlier_CBLOF_one_t.to_csv('outlier_CBLOF_one.csv',index=False)

### OCSVM

In [376]:
clf = svm.OneClassSVM(nu=0.1, kernel="rbf", gamma=0.1)

In [377]:
clf.fit(_df)

In [378]:
outlier_OSVM_one = list(clf.predict(_df))

In [416]:
pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one),
          pd.DataFrame(lof_rst).rename(columns={0:'LOF'}),
          pd.DataFrame(outlier_KNN_one).rename(columns={0:'KNN'}),
          pd.DataFrame(outlier_OSVM_one).rename(columns={0:'OCSVM'})],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE',
                          'LOF':'LOF',
                         'KNN':'KNN',
                         'OCSVM':'OCSVM'})

In [None]:
_conf = Conf_matrx(outlier_true_one,outlier_OSVM_one,tab_orbit)

In [None]:
_conf.conf("OCSVM (Sch ̈olkopf et al., 2001)")

In [None]:
five = four.append(_conf.tab)

### MCD

In [380]:
clf = MCD()
clf.fit(_df[['Latitude','Longitude','Magnitude']])
# _df['MCD_clf'] = clf.labels_

In [381]:
outlier_MCD_one = list(clf.labels_)

In [382]:
outlier_MCD_one = list(map(lambda x: 1 if x==0  else -1,outlier_MCD_one))

In [417]:
pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one),
          pd.DataFrame(lof_rst).rename(columns={0:'LOF'}),
          pd.DataFrame(outlier_KNN_one).rename(columns={0:'KNN'}),
          pd.DataFrame(outlier_OSVM_one).rename(columns={0:'OCSVM'}),
          pd.DataFrame(outlier_MCD_one).rename(columns={0:'MCD'})],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE',
                          'LOF':'LOF',
                         'KNN':'KNN',
                         'OCSVM':'OCSVM',
                         'MCD':'MCD'})

In [None]:
_conf = Conf_matrx(outlier_true_one,outlier_MCD_one,tab_orbit)

In [None]:
_conf.conf("MCD (Hardin and Rocke, 2004)")

In [None]:
six = five.append(_conf.tab)

### Feature Bagging

In [386]:
clf = FeatureBagging()
clf.fit(_df[['Latitude','Longitude','Magnitude']])
# _df['FeatureBagging_clf'] = clf.labels_

In [387]:
outlier_FeatureBagging_one = list(clf.labels_)

In [388]:
outlier_FeatureBagging_one = list(map(lambda x: 1 if x==0  else -1,outlier_FeatureBagging_one))

In [418]:
pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one),
          pd.DataFrame(lof_rst).rename(columns={0:'LOF'}),
          pd.DataFrame(outlier_KNN_one).rename(columns={0:'KNN'}),
          pd.DataFrame(outlier_OSVM_one).rename(columns={0:'OCSVM'}),
          pd.DataFrame(outlier_MCD_one).rename(columns={0:'MCD'}),
          pd.DataFrame(outlier_FeatureBagging_one).rename(columns={0:'Feature Bagging'})],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE',
                          'LOF':'LOF',
                         'KNN':'KNN',
                         'OCSVM':'OCSVM',
                         'MCD':'MCD',
                         'Feature Bagging':'Feature Bagging'})

In [None]:
_conf = Conf_matrx(outlier_true_one,outlier_FeatureBagging_one,tab_orbit)

In [None]:
_conf.conf("Feature Bagging (Lazarevic and Kumar, 2005)")

In [None]:
seven = six.append(_conf.tab)

### ABOD

In [390]:
clf = ABOD(contamination=0.05)
clf.fit(_df[['Latitude','Longitude','Magnitude']])
# _df['ABOD_Clf'] = clf.labels_

In [391]:
outlier_ABOD_one = list(clf.labels_)

In [392]:
outlier_ABOD_one = list(map(lambda x: 1 if x==0  else -1,outlier_ABOD_one))

In [419]:
pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one),
          pd.DataFrame(lof_rst).rename(columns={0:'LOF'}),
          pd.DataFrame(outlier_KNN_one).rename(columns={0:'KNN'}),
          pd.DataFrame(outlier_OSVM_one).rename(columns={0:'OCSVM'}),
          pd.DataFrame(outlier_MCD_one).rename(columns={0:'MCD'}),
          pd.DataFrame(outlier_FeatureBagging_one).rename(columns={0:'Feature Bagging'}),
          pd.DataFrame(outlier_ABOD_one).rename(columns={0:'ABOD'})],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE',
                          'LOF':'LOF',
                         'KNN':'KNN',
                         'OCSVM':'OCSVM',
                         'MCD':'MCD',
                         'Feature Bagging':'Feature Bagging',
                         'ABOD':'ABOD'})

In [None]:
_conf = Conf_matrx(outlier_true_one,outlier_ABOD_one,tab_orbit)

In [None]:
_conf.conf("ABOD (Kriegel et al., 2008)")

In [None]:
eight = seven.append(_conf.tab)

### IForest

In [394]:
od = IForest(
    threshold=0.,
    n_estimators=100
)

In [395]:
od.fit(_df[['Latitude','Longitude','Magnitude']])

In [396]:
preds = od.predict(
    _df[['Latitude','Longitude','Magnitude']],
    return_instance_score=True
)

In [397]:
# _df['IF_alibi'] = preds['data']['is_outlier']

In [398]:
# outlier_alibi_one = _df['IF_alibi']
outlier_alibi_one = preds['data']['is_outlier']

In [399]:
outlier_alibi_one = list(map(lambda x: 1 if x==0  else -1,outlier_alibi_one))

In [420]:
pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one),
          pd.DataFrame(lof_rst).rename(columns={0:'LOF'}),
          pd.DataFrame(outlier_KNN_one).rename(columns={0:'KNN'}),
          pd.DataFrame(outlier_OSVM_one).rename(columns={0:'OCSVM'}),
          pd.DataFrame(outlier_MCD_one).rename(columns={0:'MCD'}),
          pd.DataFrame(outlier_FeatureBagging_one).rename(columns={0:'Feature Bagging'}),
          pd.DataFrame(outlier_ABOD_one).rename(columns={0:'ABOD'}),
          pd.DataFrame(outlier_alibi_one).rename(columns={0:'IForest'})],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE',
                          'LOF':'LOF',
                         'KNN':'KNN',
                         'OCSVM':'OCSVM',
                         'MCD':'MCD',
                         'Feature Bagging':'Feature Bagging',
                         'ABOD':'ABOD',
                         'IForest':'IForest'})

In [None]:
_conf = Conf_matrx(outlier_true_one,outlier_alibi_one,tab_orbit)

In [None]:
_conf.conf("Isolation Forest (Liu et al., 2008)")

In [None]:
nine = eight.append(_conf.tab)

### HBOS

In [401]:
clf = HBOS()
clf.fit(_df[['Latitude','Longitude','Magnitude']])
# _df['HBOS_clf'] = clf.labels_

In [402]:
outlier_HBOS_one = list(clf.labels_)

In [403]:
outlier_HBOS_one = list(map(lambda x: 1 if x==0  else -1,outlier_HBOS_one))

In [421]:
pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one),
          pd.DataFrame(lof_rst).rename(columns={0:'LOF'}),
          pd.DataFrame(outlier_KNN_one).rename(columns={0:'KNN'}),
          pd.DataFrame(outlier_OSVM_one).rename(columns={0:'OCSVM'}),
          pd.DataFrame(outlier_MCD_one).rename(columns={0:'MCD'}),
          pd.DataFrame(outlier_FeatureBagging_one).rename(columns={0:'Feature Bagging'}),
          pd.DataFrame(outlier_ABOD_one).rename(columns={0:'ABOD'}),
          pd.DataFrame(outlier_alibi_one).rename(columns={0:'IForest'}),
          pd.DataFrame(outlier_HBOS_one).rename(columns={0:'HBOS'})],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE',
                          'LOF':'LOF',
                         'KNN':'KNN',
                         'OCSVM':'OCSVM',
                         'MCD':'MCD',
                         'Feature Bagging':'Feature Bagging',
                         'ABOD':'ABOD',
                         'IForest':'IForest',
                         'HBOS':'HBOS'})

In [None]:
_conf = Conf_matrx(outlier_true_one,outlier_HBOS_one,tab_orbit)

In [None]:
_conf.conf("HBOS (Goldstein and Dengel, 2012)")

In [None]:
ten = nine.append(_conf.tab)

### SOS

In [405]:
outlier_SOS_one = list(clf.labels_)

In [406]:
outlier_SOS_one = list(map(lambda x: 1 if x==0  else -1,outlier_SOS_one))

In [407]:
clf = SOS()
clf.fit(_df[['Latitude','Longitude','Magnitude']])
# _df['SOS_clf'] = clf.labels_

In [422]:
pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one),
          pd.DataFrame(lof_rst).rename(columns={0:'LOF'}),
          pd.DataFrame(outlier_KNN_one).rename(columns={0:'KNN'}),
          pd.DataFrame(outlier_OSVM_one).rename(columns={0:'OCSVM'}),
          pd.DataFrame(outlier_MCD_one).rename(columns={0:'MCD'}),
          pd.DataFrame(outlier_FeatureBagging_one).rename(columns={0:'Feature Bagging'}),
          pd.DataFrame(outlier_ABOD_one).rename(columns={0:'ABOD'}),
          pd.DataFrame(outlier_alibi_one).rename(columns={0:'IForest'}),
          pd.DataFrame(outlier_HBOS_one).rename(columns={0:'HBOS'}),
          pd.DataFrame(outlier_SOS_one).rename(columns={0:'SOS'})],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE',
                          'LOF':'LOF',
                         'KNN':'KNN',
                         'OCSVM':'OCSVM',
                         'MCD':'MCD',
                         'Feature Bagging':'Feature Bagging',
                         'ABOD':'ABOD',
                         'IForest':'IForest',
                         'HBOS':'HBOS',
                         'SOS':'SOS'})

In [None]:
_conf = Conf_matrx(outlier_true_one,outlier_SOS_one,tab_orbit)

In [None]:
_conf.conf("SOS (Janssens et al., 2012)")

In [None]:
eleven = ten.append(_conf.tab)

### SO_GAAL

In [325]:
clf = SO_GAAL()
clf.fit(_df[['Latitude','Longitude','Magnitude']])
# _df['SO_GAAL_clf'] = clf.labels_

  super().__init__(name, **kwargs)

Epoch 1 of 60

Testing for epoch 1 index 1:

Testing for epoch 1 index 2:

Testing for epoch 1 index 3:

Testing for epoch 1 index 4:

Testing for epoch 1 index 5:

Testing for epoch 1 index 6:

Testing for epoch 1 index 7:

Testing for epoch 1 index 8:

Testing for epoch 1 index 9:

Testing for epoch 1 index 10:

Testing for epoch 1 index 11:

Testing for epoch 1 index 12:

Testing for epoch 1 index 13:

Testing for epoch 1 index 14:

Testing for epoch 1 index 15:

Testing for epoch 1 index 16:

Testing for epoch 1 index 17:

Testing for epoch 1 index 18:

Testing for epoch 1 index 19:

Testing for epoch 1 index 20:

Testing for epoch 1 index 21:

Testing for epoch 1 index 22:

Testing for epoch 1 index 23:

Testing for epoch 1 index 24:
Epoch 2 of 60

Testing for epoch 2 index 1:

Testing for epoch 2 index 2:

Testing for epoch 2 index 3:

Testing for epoch 2 index 4:

Testing for epoch 2 index 5:

Testing for epoch 2 index 6:

Testing for epoch 2 index 7:

Testing for epoch 2 index 

In [326]:
outlier_SO_GAAL_one = list(clf.labels_)

In [327]:
outlier_SO_GAAL_one = list(map(lambda x: 1 if x==0  else -1,outlier_SO_GAAL_one))

In [423]:
pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one),
          pd.DataFrame(lof_rst).rename(columns={0:'LOF'}),
          pd.DataFrame(outlier_KNN_one).rename(columns={0:'KNN'}),
          pd.DataFrame(outlier_OSVM_one).rename(columns={0:'OCSVM'}),
          pd.DataFrame(outlier_MCD_one).rename(columns={0:'MCD'}),
          pd.DataFrame(outlier_FeatureBagging_one).rename(columns={0:'Feature Bagging'}),
          pd.DataFrame(outlier_ABOD_one).rename(columns={0:'ABOD'}),
          pd.DataFrame(outlier_alibi_one).rename(columns={0:'IForest'}),
          pd.DataFrame(outlier_HBOS_one).rename(columns={0:'HBOS'}),
          pd.DataFrame(outlier_SOS_one).rename(columns={0:'SOS'}),
          pd.DataFrame(outlier_SO_GAAL_one).rename(columns={0:'SO_GAAL'})],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE',
                          'LOF':'LOF',
                         'KNN':'KNN',
                         'OCSVM':'OCSVM',
                         'MCD':'MCD',
                         'Feature Bagging':'Feature Bagging',
                         'ABOD':'ABOD',
                         'IForest':'IForest',
                         'HBOS':'HBOS',
                         'SOS':'SOS',
                         'SO_GAAL':'SO_GAAL'})

In [None]:
_conf = Conf_matrx(outlier_true_one,outlier_SO_GAAL_one,tab_orbit)

In [None]:
_conf.conf("SO-GAAL (Liu et al., 2019)")

In [None]:
twelve = eleven.append(_conf.tab)

### MO_GAAL

In [None]:
clf = MO_GAAL()
clf.fit(_df[['Latitude','Longitude','Magnitude']])
# _df['MO_GAAL_clf'] = clf.labels_

  super().__init__(name, **kwargs)

Epoch 1 of 60

Testing for epoch 1 index 1:

Testing for epoch 1 index 2:

Testing for epoch 1 index 3:

Testing for epoch 1 index 4:

Testing for epoch 1 index 5:

Testing for epoch 1 index 6:

Testing for epoch 1 index 7:

Testing for epoch 1 index 8:

Testing for epoch 1 index 9:

Testing for epoch 1 index 10:

Testing for epoch 1 index 11:

Testing for epoch 1 index 12:

Testing for epoch 1 index 13:

Testing for epoch 1 index 14:

Testing for epoch 1 index 15:

Testing for epoch 1 index 16:

Testing for epoch 1 index 17:

Testing for epoch 1 index 18:

Testing for epoch 1 index 19:

Testing for epoch 1 index 20:

Testing for epoch 1 index 21:

Testing for epoch 1 index 22:

Testing for epoch 1 index 23:

Testing for epoch 1 index 24:
Epoch 2 of 60

Testing for epoch 2 index 1:

Testing for epoch 2 index 2:

Testing for epoch 2 index 3:

Testing for epoch 2 index 4:

Testing for epoch 2 index 5:

Testing for epoch 2 index 6:

Testing for epoch 2 index 7:

Testing for epoch 2 index 

In [340]:
outlier_MO_GAAL_one = list(clf.labels_)

In [341]:
outlier_MO_GAAL_one = list(map(lambda x: 1 if x==0  else -1,outlier_MO_GAAL_one))

In [424]:
pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one),
          pd.DataFrame(lof_rst).rename(columns={0:'LOF'}),
          pd.DataFrame(outlier_KNN_one).rename(columns={0:'KNN'}),
          pd.DataFrame(outlier_OSVM_one).rename(columns={0:'OCSVM'}),
          pd.DataFrame(outlier_MCD_one).rename(columns={0:'MCD'}),
          pd.DataFrame(outlier_FeatureBagging_one).rename(columns={0:'Feature Bagging'}),
          pd.DataFrame(outlier_ABOD_one).rename(columns={0:'ABOD'}),
          pd.DataFrame(outlier_alibi_one).rename(columns={0:'IForest'}),
          pd.DataFrame(outlier_HBOS_one).rename(columns={0:'HBOS'}),
          pd.DataFrame(outlier_SOS_one).rename(columns={0:'SOS'}),
          pd.DataFrame(outlier_SO_GAAL_one).rename(columns={0:'SO_GAAL'}),
          pd.DataFrame(outlier_MO_GAAL_one).rename(columns={0:'MO_GAAL'})],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE',
                          'LOF':'LOF',
                         'KNN':'KNN',
                         'OCSVM':'OCSVM',
                         'MCD':'MCD',
                         'Feature Bagging':'Feature Bagging',
                         'ABOD':'ABOD',
                         'IForest':'IForest',
                         'HBOS':'HBOS',
                         'SOS':'SOS',
                         'SO_GAAL':'SO_GAAL',
                         'MO_GAAL':'MO_GAAL'})

In [None]:
_conf = Conf_matrx(outlier_true_one,outlier_MO_GAAL_one,tab_orbit)

In [None]:
_conf.conf("MO-GAAL (Liu et al., 2019)")

In [None]:
thirteen = twelve.append(_conf.tab)

### LSCP

In [343]:
detectors = [KNN(), LOF(), OCSVM()]
clf = LSCP(detectors)
clf.fit(_df[['Latitude','Longitude','Magnitude']])
# _df['LSCP_clf'] = clf.labels_



In [344]:
outlier_LSCP_one = list(clf.labels_)

In [345]:
outlier_LSCP_one = list(map(lambda x: 1 if x==0  else -1,outlier_LSCP_one))

In [425]:
pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one),
          pd.DataFrame(lof_rst).rename(columns={0:'LOF'}),
          pd.DataFrame(outlier_KNN_one).rename(columns={0:'KNN'}),
          pd.DataFrame(outlier_OSVM_one).rename(columns={0:'OCSVM'}),
          pd.DataFrame(outlier_MCD_one).rename(columns={0:'MCD'}),
          pd.DataFrame(outlier_FeatureBagging_one).rename(columns={0:'Feature Bagging'}),
          pd.DataFrame(outlier_ABOD_one).rename(columns={0:'ABOD'}),
          pd.DataFrame(outlier_alibi_one).rename(columns={0:'IForest'}),
          pd.DataFrame(outlier_HBOS_one).rename(columns={0:'HBOS'}),
          pd.DataFrame(outlier_SOS_one).rename(columns={0:'SOS'}),
          pd.DataFrame(outlier_SO_GAAL_one).rename(columns={0:'SO_GAAL'}),
          pd.DataFrame(outlier_MO_GAAL_one).rename(columns={0:'MO_GAAL'}),
          pd.DataFrame(outlier_LSCP_one).rename(columns={0:'LSCP'})],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE',
                          'LOF':'LOF',
                         'KNN':'KNN',
                         'OCSVM':'OCSVM',
                         'MCD':'MCD',
                         'Feature Bagging':'Feature Bagging',
                         'ABOD':'ABOD',
                         'IForest':'IForest',
                         'HBOS':'HBOS',
                         'SOS':'SOS',
                         'SO_GAAL':'SO_GAAL',
                         'MO_GAAL':'MO_GAAL',
                         'LSCP':'LSCP'})

In [None]:
_conf = Conf_matrx(outlier_true_one,outlier_LSCP_one,tab_orbit)

In [None]:
_conf.conf("LSCP (Zhao et al., 2019)")

In [None]:
fourteen_orbit = thirteen.append(_conf.tab)

## Result

In [426]:
_df_rst = pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one),
          pd.DataFrame(lof_rst).rename(columns={0:'LOF'}),
          pd.DataFrame(outlier_KNN_one).rename(columns={0:'KNN'}),
          pd.DataFrame(outlier_OSVM_one).rename(columns={0:'OCSVM'}),
          pd.DataFrame(outlier_MCD_one).rename(columns={0:'MCD'}),
          pd.DataFrame(outlier_FeatureBagging_one).rename(columns={0:'Feature Bagging'}),
          pd.DataFrame(outlier_ABOD_one).rename(columns={0:'ABOD'}),
          pd.DataFrame(outlier_alibi_one).rename(columns={0:'IForest'}),
          pd.DataFrame(outlier_HBOS_one).rename(columns={0:'HBOS'}),
          pd.DataFrame(outlier_SOS_one).rename(columns={0:'SOS'}),
          pd.DataFrame(outlier_SO_GAAL_one).rename(columns={0:'SO_GAAL'}),
          pd.DataFrame(outlier_MO_GAAL_one).rename(columns={0:'MO_GAAL'}),
          pd.DataFrame(outlier_LSCP_one).rename(columns={0:'LSCP'})],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE',
                          'LOF':'LOF',
                         'KNN':'KNN',
                         'OCSVM':'OCSVM',
                         'MCD':'MCD',
                         'Feature Bagging':'Feature Bagging',
                         'ABOD':'ABOD',
                         'IForest':'IForest',
                         'HBOS':'HBOS',
                         'SOS':'SOS',
                         'SO_GAAL':'SO_GAAL',
                         'MO_GAAL':'MO_GAAL',
                         'LSCP':'LSCP'})

In [427]:
_df_compa = _df_rst.copy()

In [488]:
cmp = pd.concat([pd.read_csv('05_10.csv'),pd.read_csv('10_15.csv')]).iloc[:,[0,1,2,4]].rename(columns={'latitude':'Latitude','longitude':'Longitude','mag':'Magnitude'}).reset_index().iloc[:,1:]

In [489]:
cmp

In [None]:
pd.read_csv('outlier_CBLOF_one.csv')

In [None]:
_df_compa.to_csv('earthquake_comparison.csv')

#### Haiti

In [457]:
_df_compa[_df_compa['Latitude']==18.443] # Haiti(lat=18.4430, lon=-72.5710)

In [490]:
cmp[cmp['Latitude']==18.443]

#### Iquique

In [455]:
_df_compa[_df_compa['Latitude']==-32.6953] # Iquiqeu lat=-32.6953, lon=-71.4416

In [491]:
cmp[cmp['Latitude']==-32.6953]

In [496]:
_df_compa[_df_compa['Latitude']==-20.5709] # Iquiqeu lat=-32.6953, lon=-71.4416

In [495]:
cmp[cmp['Latitude']==-20.5709]

#### Sichan

In [456]:
_df_compa[_df_compa['Latitude']==30.3080] # sichan(lat=30.3080, lon=102.8880)

In [492]:
cmp[cmp['Latitude']==30.3080]

In [441]:
_df_compa.sort_values('Anomalious Score',ascending=False).iloc[:50,:].reset_index()