EbayesThresh Python화 시도

Author

SEOYEON CHOI

Published

June 1, 2024

ref

Import

import numpy as np
import matplotlib.pyplot as plt
import ebayesthresh

import for code check

from scipy.stats import norm, stats
from scipy.optimize import minimize, root_scalar
from statsmodels.robust.scale import mad
import pywt

beta.cauchy

Function beta for the quasi-Cauchy prior

  • Description

Given a value or vector x of values, find the value(s) of the function \(\beta(x) = g(x)/\phi(x) − 1\), where \(g\) is the convolution of the quasi-Cauchy with the normal density \(\phi(x)\).

x가 입력되면 코시 분포와 정규 분포를 혼합해서 함수 베타 구하기

x = np.array([-2,1,0,-4,8,50])
phix = norm.pdf(x)
phix
array([5.39909665e-02, 2.41970725e-01, 3.98942280e-01, 1.33830226e-04,
       5.05227108e-15, 0.00000000e+00])
  • x의 확률밀도함수pdf 구하기
j = (x != 0)
j
array([ True,  True, False,  True,  True,  True])
  • 0이 아닌 인덱스만 얻기
beta = x
beta
array([-2,  1,  0, -4,  8, 50])
beta = np.where(j == False, -1/2, beta)
beta
array([-2. ,  1. , -0.5, -4. ,  8. , 50. ])
  • j가 False 즉 0이면 -1/2를 넣고 아니면 beta값 그대로 넣기
beta[j] = (norm.pdf(0) / phix[j] - 1) / (x[j] ** 2) - 1
beta
RuntimeWarning: divide by zero encountered in divide
  beta[j] = (norm.pdf(0) / phix[j] - 1) / (x[j] ** 2) - 1
array([ 5.97264025e-01, -3.51278729e-01, -5.00000000e-01,  1.85247374e+02,
        1.23379625e+12,             inf])
  • 0의 확률밀도함수pdf에서 x의 확률밀도함수phix로 나누어서 1을 빼고 그것을 x 중 0이 아닌 값들에 제곱한 값으로 나눠 1을 빼기

  • \(\beta(x) = \begin{cases} x & \text{ if } x = 0 \\ \frac{\frac{\phi(0)}{\phi(x)}-1}{x^2} - 1 & \\ \text{ if } x \ne 0\end{cases}\)

  • R code

beta.cauchy <- function(x) {
#
#   Find the function beta for the mixed normal prior with Cauchy
#   tails.  It is assumed that the noise variance is equal to one.
#
    phix <- dnorm(x)
    j <- (x != 0)
    beta <- x
    beta[!j] <- -1/2
    beta[j] <- (dnorm(0)/phix[j] - 1)/x[j]^2 - 1
    return(beta)
}

결과

  • Python
ebayesthresh.beta_cauchy(np.array([-2,1,0,-4,8,50]))
/home/csy/Dropbox/sy_hub/posts/1_Note/ebayesthresh/utils.py:21: RuntimeWarning: divide by zero encountered in divide
  beta[j] = (norm.pdf(0) / phix[j] - 1) / (x[j] ** 2) - 1
array([ 5.97264025e-01, -3.51278729e-01, -5.00000000e-01,  1.85247374e+02,
        1.23379625e+12,             inf])
  • R
> beta.cauchy(c(-2,1,0,-4,8,50))
[1]  5.972640e-01 -3.512787e-01 -5.000000e-01  1.852474e+02  1.233796e+12           Inf
  • 무한히 꼬리가 긴 분포를 가진 코시분포
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import cauchy

# 모수 설정
loc_values = [0.0, 0.0, -2.0]  # 위치 모수
scale_values = [0.5, 1.0, 2.0]  # 척도 모수

# 그래프 그리기
x = np.linspace(-10, 10, 1000)
plt.figure(figsize=(12, 6))

# PDF 그리기
plt.subplot(1, 2, 1)
for loc, scale in zip(loc_values, scale_values):
    plt.plot(x, cauchy.pdf(x, loc, scale), label=f'loc={loc}, scale={scale}')
plt.title('Cauchy PDF')
plt.xlabel('x')
plt.ylabel('Probability Density')
plt.legend()

# CDF 그리기
plt.subplot(1, 2, 2)
for loc, scale in zip(loc_values, scale_values):
    plt.plot(x, cauchy.cdf(x, loc, scale), label=f'loc={loc}, scale={scale}')
plt.title('Cauchy CDF')
plt.xlabel('x')
plt.ylabel('Cumulative Probability')
plt.legend()

plt.show()

beta.laplace

Function beta for the Laplace prior

  • Description

Given a single value or a vector of \(x\) and \(s\), find the value(s) of the function \(\beta(x; s, a) = \frac{g(x; s, a)}{f_n(x; 0, s)}−1\), where \(f_n(x; 0, s)\) is the normal density with mean \(0\) and standard deviation \(s\), and \(g\) is the convolution of the Laplace density with scale parameter a, \(γa(\mu)\), with the normal density \(f_n(x; µ, s)\) with mean mu and standard deviation \(s\).

평균이 \(\mu\)이며, 스케일 파라메터 a를 가진 라플라스와 정규분포의 합성함수 \(g\)와 평균이 0이고 표준편차가s인 f로 계산되는 함수 베타

# x = np.array([-2,1,0,-4,8,50])
x = 2.14
s = 1
# s = np.arange(1, 7)
a = 0.5
  • s는 표준편차
  • a는 Laplaxe prior모수, 이 값이 클수록 부포 모양이 뾰족해진다.
x = np.abs(x)
x
2.14
  • 확률변수 x에 절대값 취하고
xpa = x/s + s*a
xpa
2.64
xma = x/s - s*a
xma
1.6400000000000001
rat1 = 1/xpa
rat1
0.3787878787878788
xpa < 35
True
rat1[xpa < 35]
array([0.37878788])
  • 표준정규분포 누적 분포 함수cdf / 표준정규분포 밀도함수 pdf
if isinstance(rat1, (int, float, str, bool)) and xpa < 35:
    rat1 = norm.cdf(-xpa) / norm.pdf(xpa)
elif isinstance(rat1, (int, float, str, bool)) and xpa > 35:
    rat1 = rat1
else:
    rat1[xpa < 35] = norm.cdf(-xpa[xpa < 35]) / norm.pdf(xpa[xpa < 35])
rat1
0.33890303155852075
rat2 = 1/np.abs(xma)
rat2
0.6097560975609756
if isinstance(xma, (int, float, str, bool)) and xma > 35:
    xma = 35
elif isinstance(xma, (int, float, str, bool)) and xma < 35:
    xma = xma
else:
    xma[xma > 35] = 35
xma
1.6400000000000001
if isinstance(rat1, (int, float, str, bool)) and xma > -35:
    rat2 = norm.cdf(xma) / norm.pdf(xma)
elif isinstance(rat1, (int, float, str, bool)) and xma < -35:
    rat2 = rat2
else:
    rat2[xma > -35] = norm.cdf(xma[xma > -35]) / norm.pdf(xma[xma > -35])
rat2
9.133199429298308
  • beta = \(\frac{g(x; s, a)}{f_n(x; 0, s)}−1\)
  • g = 스케일 파라메터 a를 가진 라플라스 밀도의 convolution + \(\gamma (u ; a)\)
    • 단, The Laplace density is given by \(\gamma (u ; a) = \frac{1}{2} a e^{-a |u|}\) and is also known as the double exponential density.
beta = (a * s) / 2 * (rat1 + rat2) - 1
beta
1.3680256152142074
  • R code
beta.laplace <- function(x, s = 1, a = 0.5) {
#
#  The function beta for the Laplace prior given parameter a and s (sd)
#
    x <- abs(x)
    xpa <- x/s + s*a
    xma <- x/s - s*a
    rat1 <- 1/xpa
    rat1[xpa < 35] <- pnorm( - xpa[xpa < 35])/dnorm(xpa[xpa < 35])
    rat2 <- 1/abs(xma)
    xma[xma > 35] <- 35
    rat2[xma > -35] <- pnorm(xma[xma > -35])/dnorm(xma[xma > -35])
    beta <- (a * s) / 2 * (rat1 + rat2) - 1
    return(beta)
}

결과

  • Python
ebayesthresh.beta_laplace(np.array([-2,1,0,-4,8,50]),s=1)
array([ 8.89852030e-001, -3.80041717e-001, -5.61817772e-001,
        2.85459467e+002,  1.02698062e+012,  6.34453954e+265])
ebayesthresh.beta_laplace(-2,s=1,a=0.5)
0.8898520296511427
ebayesthresh.beta_laplace(np.array([-2,1,0,-4,8,50]),s=np.arange(1, 7))
array([ 8.89852030e-01, -3.03909953e-01, -2.26276543e-01, -3.97301589e-02,
        1.53950123e-01,  5.64694733e+06])
  • R
> beta.laplace(c(-2,1,0,-4,8,50), s=1)
[1]   8.898520e-01  -3.800417e-01  -5.618178e-01   2.854595e+02   1.026981e+12  6.344540e+265
> beta.laplace(-2, s=1, a=0.5)
[1] 0.889852
> beta.laplace(c(-2,1,0,-4,8,50), s=1:6, a=1)
[1]   0.890821055  -0.129919250  -0.086229104  -0.005203193   0.054213718 112.493576777

라플라스 분포, 정규분포와 유사한 모습, 하지만 중간이 뾰족한 모형

\(f(x) = \frac{1}{2b} \exp \big[ \frac{-|x - \mu|}{b} \big]\)

import numpy as np
import matplotlib.pyplot as plt

# 라플라스 분포의 확률 밀도 함수 정의
def laplace_pdf(x, mu, b):
    return 1/(2*b) * np.exp(-np.abs(x - mu) / b)

# 파라미터 설정
mu = 0  # 평균
b = 1   # 스케일 파라미터

# x 값 생성
x_values = np.linspace(-10, 10, 1000)

# 라플라스 분포의 확률 밀도 함수 계산
pdf_values = laplace_pdf(x_values, mu, b)

# 그래프 그리기
plt.figure(figsize=(8, 6))
plt.plot(x_values, pdf_values, label=f'Laplace Distribution (mu={mu}, b={b})')
plt.title('Laplace Distribution')
plt.xlabel('x')
plt.ylabel('Probability Density')
plt.legend()
plt.grid(True)
plt.show()

cauchy_medzero

the objective function that has to be zeroed, component by component, to find the posterior median when the quasi-Cauchy prior is used. x is the parameter vector, z is the data vector, w is the weight x and z may be scalars

  • quasi-Cauchy prior에서 사후 중앙값 찾기 위한 함수
  • x,z는 벡터일수도 있고, 스칼라일 수 도 있다.
# x = np.array([-2,1,0,-4,8,50])
x = 4
# z = np.array([1,0,2,3,-1,-1])
z = 5
w = 0.5
hh = z - x
hh
1
dnhh = norm.pdf(hh)
dnhh
0.24197072451914337
yleft = norm.cdf(hh) - z * dnhh + ((z * x - 1) * dnhh * norm.cdf(-x)) / norm.pdf(x)
yleft
0.7194871459755684
yright2 = 1 + np.exp(-z**2 / 2) * (z**2 * (1 / w - 1) - 1)
yright2
1.00008943967613
yright2 / 2 - yleft
-0.21944242613750342
  • R코드
cauchy.medzero <- function(x, z, w) {
#    
# the objective function that has to be zeroed, component by
# component, to find the posterior median when the quasi-Cauchy prior
# is used.  x is the parameter vector, z is the data vector, w is the
# weight x and z may be scalars
#
    hh <- z - x
    dnhh <- dnorm(hh)
    yleft <- pnorm(hh) - z * dnhh + ((z * x - 1) * dnhh * pnorm( - x))/
        dnorm(x)
    yright2 <- 1 + exp( - z^2/2) * (z^2 * (1/w - 1) - 1)
    return(yright2/2 - yleft)
}

결과

  • Python
    • 벡터, 스칼라일때 가능한지 확인
ebayesthresh.cauchy_medzero(np.array([-2,1,0,-4,8,50]),np.array([1,0,2,3,-1,-1]),0.5)
/home/csy/Dropbox/sy_hub/posts/1_Note/ebayesthresh/utils.py:203: RuntimeWarning: invalid value encountered in divide
  yleft = norm.cdf(hh) - z * dnhh + ((z * x - 1) * dnhh * norm.cdf(-x)) / norm.pdf(x)
array([-0.25356559,  0.        , -0.09859737, -0.45556313,  0.5       ,
               nan])
ebayesthresh.cauchy_medzero(4,5,0.5)
-0.21944242613750342
  • R
> cauchy.medzero(c(-2,1,0,-4,8,50),c(1,0,2,3,-1,-1),0.5)
[1] -0.25356559  0.00000000 -0.09859737 -0.45556313  0.50000000         NaN
> cauchy.medzero(4,5,0.5)
[1] -0.2194424

cauchy_threshzero

  • cauchy 임계값 찾기 위한 것
  • 아래에서 반환되는 y가 0에 가깝도록 만들어주는 z를 찾는 과정
z = np.array([1,0,2,3,-1,-1])
# z = 0
# w = 0.5
w = np.array([0.5,0.4,0.3,0.2,0,0.1])
if isinstance(z, (list)):
    z = np.array(z)
y = norm.cdf(z) - z * norm.pdf(z) - 1/2 - (z**2 * np.exp(-z**2/2) * (1/w - 1))/2
y
RuntimeWarning: divide by zero encountered in divide
  y = norm.cdf(z) - z * norm.pdf(z) - 1/2 - (z**2 * np.exp(-z**2/2) * (1/w - 1))/2
array([-0.20389131,  0.        , -0.26229672,  0.28539262,        -inf,
       -2.82876199])

\(y = pnorm(z) - z \times dnorm(z) - \frac{1}{2} - \frac{z^2 \exp (\frac{z^2}{2})(\frac{1}{w} - 1)}{2}\)

  • R 코드
cauchy.threshzero <- function(z, w) {
#    
# The objective function that has to be zeroed to find the Cauchy
# threshold. z is the putative threshold vector, w is the weight w
# can be a vector
#
    y <- pnorm(z) - z * dnorm(z) - 1/2 -
         (z^2 * exp( - z^2/2) * (1/w - 1))/2
    return(y)
}

결과

  • Python
ebayesthresh.cauchy_threshzero(np.array([1,0,2,3,-1,-1]),0.5)
array([-0.20389131,  0.        ,  0.09859737,  0.43536407, -0.40263935,
       -0.40263935])
ebayesthresh.cauchy_threshzero(np.array([1,0,2,3,-1,-1]),np.array([0.5,0.4,0.3,0.2,0,0.1]))
/home/csy/Dropbox/sy_hub/posts/1_Note/ebayesthresh/utils.py:211: RuntimeWarning: divide by zero encountered in divide
  y = norm.cdf(z) - z * norm.pdf(z) - 1/2 - (z**2 * np.exp(-z**2/2) * (1/w - 1))/2
array([-0.20389131,  0.        , -0.26229672,  0.28539262,        -inf,
       -2.82876199])
  • R
> cauchy.threshzero(c(1,0,2,3,-1,-1),0.5)
[1] -0.20389131  0.00000000  0.09859737  0.43536407 -0.40263935 -0.40263935
cauchy.threshzero(c(1,0,2,3,-1,-1), c(0.5,0.4,0.3,0.2,0,0.1))
[1] -0.2038913  0.0000000 -0.2622967  0.2853926       -Inf -2.8287620

ebayesthresh

x = np.concatenate([np.random.normal(0, 1, 90), np.random.normal(5, 1, 10)])
sdev = 1
prior="laplace"
a=0.5
bayesfac=False
verbose=False
threshrule="median"
universalthresh=True
stabadjustment=None
pr = prior[0:1]
pr
'l'
if sdev is None:
    sdev = ebayesthresh.mad(x, center=0)
    stabadjustment_condition = True
elif len(np.atleast_1d(sdev)) == 1:
    if stabadjustment is not None:
        raise ValueError("Argument stabadjustment is not applicable when variances are homogeneous.")
    if np.isnan(sdev):
        sdev = ebayesthresh.mad(x, center=0)
    stabadjustment_condition = True
else:
    if pr == "c":
        raise ValueError("Standard deviation has to be homogeneous for Cauchy prior.")
    if len(sdev) != len(x):
        raise ValueError("Standard deviation has to be homogeneous or have the same length as observations.")
    if stabadjustment is None:
        stabadjustment = False
    stabadjustment_condition = stabadjustment
stabadjustment_condition
True
if stabadjustment_condition:
    m_sdev = np.mean(sdev)
    s = sdev / m_sdev
    x = x / m_sdev
else:
    s = sdev
a
0.5
if (pr == "l") and np.isnan(a):
    pp = ebayesthresh.wandafromx(x, s, universalthresh)
    w = pp['w']
    a = pp['a']
else:
    w = ebayesthresh.wfromx(x, s, prior=prior, a=a, universalthresh=universalthresh)
w
0.3200062747569716
a
0.5
s
1.0
if pr != "m" or verbose:
    tt = ebayesthresh.tfromw(w, s, prior=prior, bayesfac=bayesfac, a=a)[0]
    if stabadjustment_condition:
        tcor = tt * m_sdev
    else:
        tcor = tt
if threshrule == "median":
    muhat = ebayesthresh.postmed(x, s, w, prior=prior, a=a)
elif threshrule == "mean":
    muhat = ebayesthresh.postmean(x, s, w, prior=prior, a=a)
elif threshrule == "hard":
    muhat = ebayesthresh.threshld(x, tt)
elif threshrule == "soft":
    muhat = ebayesthresh.threshld(x, tt, hard=False)
elif threshrule == "none":
    muhat = None
else:
    raise ValueError(f"Unknown threshold rule: {threshrule}")
muhat
array([ 0.        ,  1.34329661,  0.        , -0.        , -0.        ,
       -0.        , -0.        ,  0.        ,  0.        ,  0.        ,
       -0.        ,  0.        , -0.        ,  0.        , -0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
       -0.        ,  0.        , -0.        ,  0.        ,  0.        ,
       -0.        , -0.        ,  0.        , -0.        , -0.        ,
       -0.        ,  0.        , -0.        ,  0.        , -0.        ,
       -0.        , -0.        , -0.        ,  0.        ,  0.        ,
        0.        , -0.        ,  0.        ,  0.        , -0.        ,
        0.        ,  0.        ,  0.        , -0.        ,  0.        ,
        1.38682932, -0.        , -0.        ,  0.        ,  0.        ,
       -0.        , -0.        , -0.        , -0.        ,  0.        ,
       -0.        , -0.        , -0.        , -0.        , -0.        ,
       -1.94078819, -0.        ,  0.        , -0.        , -0.        ,
       -0.        , -0.        , -0.        ,  0.        , -0.        ,
       -0.        , -0.        ,  0.        ,  0.        , -0.        ,
        0.        , -0.        , -0.        ,  0.        , -0.        ,
       -0.        ,  0.        , -0.        , -0.        , -0.        ,
        7.23887854,  4.28032392,  4.38326438,  3.63340097,  4.98882396,
        5.10584713,  4.50526557,  3.20664347,  4.56728499,  4.21031715])
if stabadjustment_condition:
    muhat = muhat * m_sdev
muhat
array([ 0.        ,  1.34329661,  0.        , -0.        , -0.        ,
       -0.        , -0.        ,  0.        ,  0.        ,  0.        ,
       -0.        ,  0.        , -0.        ,  0.        , -0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
       -0.        ,  0.        , -0.        ,  0.        ,  0.        ,
       -0.        , -0.        ,  0.        , -0.        , -0.        ,
       -0.        ,  0.        , -0.        ,  0.        , -0.        ,
       -0.        , -0.        , -0.        ,  0.        ,  0.        ,
        0.        , -0.        ,  0.        ,  0.        , -0.        ,
        0.        ,  0.        ,  0.        , -0.        ,  0.        ,
        1.38682932, -0.        , -0.        ,  0.        ,  0.        ,
       -0.        , -0.        , -0.        , -0.        ,  0.        ,
       -0.        , -0.        , -0.        , -0.        , -0.        ,
       -1.94078819, -0.        ,  0.        , -0.        , -0.        ,
       -0.        , -0.        , -0.        ,  0.        , -0.        ,
       -0.        , -0.        ,  0.        ,  0.        , -0.        ,
        0.        , -0.        , -0.        ,  0.        , -0.        ,
       -0.        ,  0.        , -0.        , -0.        , -0.        ,
        7.23887854,  4.28032392,  4.38326438,  3.63340097,  4.98882396,
        5.10584713,  4.50526557,  3.20664347,  4.56728499,  4.21031715])
if not verbose:
    muhat
else:
    retlist = {
        'muhat': muhat,
        'x': x,
        'threshold.sdevscale': tt,
        'threshold.origscale': tcor,
        'prior': prior,
        'w': w,
        'a': a,
        'bayesfac': bayesfac,
        'sdev': sdev,
        'threshrule': threshrule
    }
    if pr == "c":
        del retlist['a']
    if threshrule == "none":
        del retlist['muhat']
    retlist

R코드

ebayesthresh <- function (x, prior = "laplace", a = 0.5, bayesfac = FALSE,
                          sdev = NA, verbose = FALSE, threshrule = "median",
                          universalthresh = TRUE, stabadjustment) {
#  
#  Given a vector of data x, find the marginal maximum likelihood
#  estimator of the mixing weight w, and apply an appropriate
#  thresholding rule using this weight.
#
#  If the prior is laplace and a=NA, then the inverse scale parameter
#  is also found by MML.
#
#  Standard deviation sdev can be a vector (heterogeneous variance) or
#  a single value (homogeneous variance). If sdev=NA, then it is
#  estimated using the function mad(x). Heterogeneous variance is
#  allowed only for laplace prior currently.
#    
#  The thresholding rules allowed are "median", "mean", "hard", "soft"
#  and "none"; if "none" is used, then only the parameters are worked
#  out.
#
#  If hard or soft thresholding is used, the argument "bayesfac"
#  specifies whether to use the bayes factor threshold or the
#  posterior median threshold.
#    
#  If universalthresh=TRUE, the thresholds will be upper bounded by
#  universal threshold adjusted by standard deviation; otherwise,
#  weight w will be searched in [0, 1].
#    
#  If stabadjustment=TRUE, the observations and standard deviations
#  will be first divided by the mean of all given standard deviations
#  in case of inefficiency due to large value of standard
#  deviation. In the case of homogeneous variance, the standard
#  deviations will be normalized to 1 automatically.
#
#  If verbose=TRUE then the routine returns a list with several
#  arguments, including muhat which is the result of the
#  thresholding. If verbose=FALSE then only muhat is returned.
#
    
  # Find the standard deviation if necessary and estimate the parameters
  pr <- substring(prior, 1, 1)

  if(length(sdev) == 1) {
      if(!missing(stabadjustment))
        stop(paste("Argument stabadjustment is not applicable when",
                   "variances are homogeneous."))
      if(is.na(sdev)) {
          sdev <- mad(x, center = 0)
      }
      stabadjustment_condition = TRUE
  } else{
    if(pr == "c")
      stop("Standard deviation has to be homogeneous for Cauchy prior.")
    if(length(sdev) != length(x))
      stop(paste("Standard deviation has to be homogeneous or has the",
                 "same length as observations."))
    if(missing(stabadjustment))
      stabadjustment <- FALSE
    stabadjustment_condition = stabadjustment
  }
  
  if (stabadjustment_condition) {
    m_sdev <- mean(sdev)
    s <- sdev/m_sdev
    x <- x/m_sdev
  } else { s <- sdev }
  
    if ((pr == "l") & is.na(a)) {
      pp <- wandafromx(x, s, universalthresh)
          w  <- pp$w
          a  <- pp$a
    }
    else
          w <- wfromx(x, s, prior = prior, a = a, universalthresh)
    if(pr != "m" | verbose) {
      tt <- tfromw(w, s, prior = prior, bayesfac = bayesfac, a = a)
      if(stabadjustment_condition) {
        tcor <- tt * m_sdev
          } else {
            tcor <- tt
          }
    }
    if(threshrule == "median")
        muhat <- postmed(x, s, w, prior = prior, a = a)
    if(threshrule == "mean")
    muhat <- postmean(x, s, w, prior = prior, a = a)
    if(threshrule == "hard")
        muhat <- threshld(x, tt)
    if(threshrule == "soft")
        muhat <- threshld(x, tt, hard = FALSE)
        if(threshrule == "none")
                muhat <- NA

    # Now return desired output
    if(stabadjustment_condition) {
      muhat <- muhat * m_sdev
    }
    if(!verbose)
            return(muhat)
    retlist <- list(muhat = muhat, x = x, threshold.sdevscale = tt, 
                        threshold.origscale = tcor, prior = prior, w = w,
                        a = a, bayesfac = bayesfac, sdev = sdev,
                        threshrule = threshrule)
    if(pr == "c")
        retlist <- retlist[-7]
    if(threshrule == "none")
        retlist <- retlist[-1]
    return(retlist)
}

결과

  • Python
ebayesthresh.ebayesthresh(x=np.concatenate([np.random.normal(0, 1, 90), np.random.normal(5, 1, 10)]), sdev = 1)
array([-0.        ,  0.        , -0.        ,  0.        , -0.        ,
        0.        ,  0.        , -0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        , -0.        ,
        0.        ,  0.        ,  0.        , -0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
       -0.        ,  0.        ,  0.        ,  0.        ,  0.83181094,
       -0.        , -0.        , -0.        ,  0.        , -0.        ,
       -0.        , -0.        , -0.        , -0.        , -0.        ,
       -0.        ,  0.        , -0.        ,  0.        , -2.89468824,
       -0.        ,  0.        , -0.        ,  0.        ,  0.        ,
        0.        ,  0.        , -0.        , -0.        , -0.        ,
        0.        , -0.        ,  0.        ,  0.        ,  0.        ,
        0.        , -0.        , -0.        , -0.        ,  0.        ,
       -0.        ,  0.        , -0.        , -0.        ,  0.        ,
        0.        , -0.        ,  0.        , -0.        , -0.        ,
        0.        , -0.        ,  0.        ,  0.        , -0.        ,
       -0.        , -0.        , -0.        ,  0.        , -0.        ,
       -0.        , -0.        ,  0.        ,  0.        ,  0.        ,
        2.99222302,  4.60992013,  3.68760194,  3.16329967,  3.93131817,
        4.87250552,  3.47040456,  3.28492248,  5.09923275,  4.07454939])
  • R
> ebayesthresh(x = rnorm(100, c(rep(0,90), rep(5,10))),
+              prior = "laplace", sdev = 1)
  [1]  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000
 [15]  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000
 [29]  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000
 [43]  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000
 [57]  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000
 [71]  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000 -0.4480064
 [85]  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  5.1534865  6.2732386  4.4612851  5.9931848  4.5828731  4.6154038  4.8247775  3.6219544
 [99]  4.4480080  5.4084453

ebayesthresh.wavelet

R코드

ebayesthresh.wavelet <-
  function (xtr, vscale = "independent", smooth.levels = Inf,
            prior = "laplace", a = 0.5, bayesfac = FALSE,
            threshrule = "median") {
    xcl <- class(xtr)
    if (class(xcl) == "dwt " && length(xcl) > 1) {
            xtr <- ebayesthresh.wavelet.splus(xtr, vscale, smooth.levels,
                                              prior, a, bayesfac, threshrule)
            return(xtr)
        }
    if (xcl == "wd") {
            xtr <- ebayesthresh.wavelet.wd(xtr, vscale, smooth.levels, prior,
                                           a, bayesfac, threshrule)
            return(xtr)
        }
        if (xcl == "dwt" || xcl=="modwt") {
            xtr <- ebayesthresh.wavelet.dwt(xtr, vscale, smooth.levels,
                                            prior, a, bayesfac, threshrule)
            return(xtr)
        }
    stop("Unknown wavelet transform type; no smoothing performed")
    return(xtr)
}

결과

  • Python

  • R

ebayesthresh.wavelet.dwt

data = np.concatenate([np.random.normal(0, 1, 90), np.random.normal(5, 1, 10)])
n = len(data)
level1 = data[::2] - data[1::2]
level2 = level1[::2] - level1[1::2]
approximation = level2[::2] + level2[::2]

# Create a list of wavelet coefficients for each level
x_dwt = [approximation, level2, level1]
x_dwt
[array([ 1.4836815 ,  2.5460343 , -1.306332  ,  1.31723319, -2.55056374,
         5.08187009, -0.83959987, -3.73303537,  2.84676984,  4.08970155,
         7.31864861, -0.9809799 ,  1.16959737]),
 array([ 0.74184075, -1.25167459,  1.27301715, -2.76291158, -0.653166  ,
        -1.67217533,  0.6586166 ,  2.04732727, -1.27528187,  1.14064582,
         2.54093505, -3.09342206, -0.41979993, -1.14225822, -1.86651769,
         1.20685697,  1.42338492,  1.94833839,  2.04485077, -3.20509258,
         3.6593243 , -0.02077669, -0.49048995,  1.47917861,  0.58479869]),
 array([ 0.47868431, -0.26315645,  0.31878975,  1.57046433,  1.13616511,
        -0.13685204,  0.4748337 ,  3.23774528,  1.05890084,  1.71206684,
        -0.47029185,  1.20188347, -1.31592992, -1.97454652,  0.96714734,
        -1.08017994, -1.82143641, -0.54615454, -1.44897433, -2.58962015,
         0.4946747 , -2.04626034, -1.47187691,  1.62154515, -0.0205145 ,
         0.39928543, -2.00296752, -0.86070929, -1.84618836,  0.02032933,
         1.884058  ,  0.67720103,  0.44194886, -0.98143606,  0.12154746,
        -1.82679094,  0.41042574, -1.63442503, -2.87055564,  0.33453694,
         2.38411684, -1.27520746,  1.96971695,  1.99049364,  0.07055637,
         0.56104632,  3.25314351,  1.7739649 ,  0.34259736, -0.24220132])]
# x_dwt
vscale="independent"
smooth_levels=float('inf')
prior="laplace"
a=0.5
bayesfac=False,
threshrule="median"
nlevs = len(x_dwt) - 1
nlevs
2
slevs = min(nlevs, smooth_levels)
slevs
2
if isinstance(vscale, str):
    vs = vscale[0].lower()
    if vs == "i":
        vscale = ebayesthresh.mad(x_dwt[0], center=0)
    if vs == "l":
        vscale = None
for j in range(slevs):
    x_dwt[j] = ebayesthresh.ebayesthresh(x_dwt[j], prior=prior, a=a, bayesfac=bayesfac, 
                            sdev=vscale, verbose=False, threshrule=threshrule)
Unexpected exception formatting exception. Falling back to standard exception
Traceback (most recent call last):
  File "/home/csy/anaconda3/envs/temp_csy/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3398, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-23-7fc60b3f848b>", line 2, in <cell line: 1>
    x_dwt[j] = EbayesThresh.ebayesthresh(x_dwt[j], prior=prior, a=a, bayesfac=bayesfac,
  File "/home/csy/Dropbox/sy_hub/posts/1_Note/EbayesThresh/utils.py", line 100, in ebayesthresh
    tt = tfromw(w, s, prior=prior, bayesfac=bayesfac, a=a)[0]
  File "/home/csy/Dropbox/sy_hub/posts/1_Note/EbayesThresh/utils.py", line 713, in tfromw
TypeError: object of type 'numpy.float64' has no len()

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/csy/anaconda3/envs/temp_csy/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 1993, in showtraceback
    stb = self.InteractiveTB.structured_traceback(
  File "/home/csy/anaconda3/envs/temp_csy/lib/python3.8/site-packages/IPython/core/ultratb.py", line 1118, in structured_traceback
    return FormattedTB.structured_traceback(
  File "/home/csy/anaconda3/envs/temp_csy/lib/python3.8/site-packages/IPython/core/ultratb.py", line 1012, in structured_traceback
    return VerboseTB.structured_traceback(
  File "/home/csy/anaconda3/envs/temp_csy/lib/python3.8/site-packages/IPython/core/ultratb.py", line 865, in structured_traceback
    formatted_exception = self.format_exception_as_a_whole(etype, evalue, etb, number_of_lines_of_context,
  File "/home/csy/anaconda3/envs/temp_csy/lib/python3.8/site-packages/IPython/core/ultratb.py", line 818, in format_exception_as_a_whole
    frames.append(self.format_record(r))
  File "/home/csy/anaconda3/envs/temp_csy/lib/python3.8/site-packages/IPython/core/ultratb.py", line 736, in format_record
    result += ''.join(_format_traceback_lines(frame_info.lines, Colors, self.has_colors, lvals))
  File "/home/csy/anaconda3/envs/temp_csy/lib/python3.8/site-packages/stack_data/utils.py", line 144, in cached_property_wrapper
    value = obj.__dict__[self.func.__name__] = self.func(obj)
  File "/home/csy/anaconda3/envs/temp_csy/lib/python3.8/site-packages/stack_data/core.py", line 734, in lines
    pieces = self.included_pieces
  File "/home/csy/anaconda3/envs/temp_csy/lib/python3.8/site-packages/stack_data/utils.py", line 144, in cached_property_wrapper
    value = obj.__dict__[self.func.__name__] = self.func(obj)
  File "/home/csy/anaconda3/envs/temp_csy/lib/python3.8/site-packages/stack_data/core.py", line 681, in included_pieces
    pos = scope_pieces.index(self.executing_piece)
  File "/home/csy/anaconda3/envs/temp_csy/lib/python3.8/site-packages/stack_data/utils.py", line 144, in cached_property_wrapper
    value = obj.__dict__[self.func.__name__] = self.func(obj)
  File "/home/csy/anaconda3/envs/temp_csy/lib/python3.8/site-packages/stack_data/core.py", line 660, in executing_piece
    return only(
  File "/home/csy/anaconda3/envs/temp_csy/lib/python3.8/site-packages/executing/executing.py", line 190, in only
    raise NotOneValueFound('Expected one value, found 0')
executing.executing.NotOneValueFound: Expected one value, found 0
x_dwt
[array([-3.97210513, -4.0287387 ,  0.03812726,  1.98061813, -0.20712241,
        -1.94263661, -3.20640339, -1.52713519,  3.36779351,  3.58251383,
        -1.57906486,  0.59753974, -3.67964951]),
 array([-1.98605257, -2.03819191, -2.01436935, -0.23136287,  0.01906363,
         2.48481231,  0.99030907,  0.80823516, -0.1035612 ,  2.53250102,
        -0.9713183 , -2.40256619, -1.60320169, -1.59307862, -0.7635676 ,
         0.88817825,  1.68389675, -1.00917812,  1.79125692,  1.06362513,
        -0.78953243,  1.3533214 ,  0.29876987, -1.80890545, -1.83982476]),
 array([-0.90884553,  1.07720704, -1.07828366,  0.95990824, -1.96840177,
         0.04596758, -0.70551575, -0.47415287, -1.34986168, -1.36892531,
         0.40485746, -2.07995485,  0.54909349, -0.44121557,  0.1563934 ,
        -0.65184176, -0.43358298, -0.33002178,  3.00455902,  0.472058  ,
         0.79130012,  1.76261842, -1.14949085,  1.25307534, -2.67917775,
        -1.07597605,  0.58772537,  2.18080399, -0.33286881,  0.43069878,
        -0.90429141, -1.79246966,  1.98651968,  0.30262293, -2.10891536,
        -1.09973725,  1.15116106, -0.64009585,  1.62734043,  0.56371529,
        -1.83981846, -1.05028603,  0.0481002 , -1.30522119, -0.23335818,
        -0.53212805, -1.39336681,  0.41553864, -1.58935501,  0.25046975])]

R코드

ebayesthresh.wavelet.dwt <-
  function (x.dwt, vscale = "independent", smooth.levels = Inf, 
            prior = "laplace", a = 0.5, bayesfac = FALSE,
            threshrule = "median") {
        nlevs <- length(x.dwt)-1
        slevs <- min(nlevs, smooth.levels)
        if (is.character(vscale)) {
            vs <- substring(vscale, 1, 1)
            if (vs == "i") 
                vscale <- mad(x.dwt[[1]])
            if (vs == "l") 
                vscale <- NA
        }
        for (j in 1:slevs) {
            x.dwt[[j]] <-
              ebayesthresh(x.dwt[[j]], prior, a, bayesfac, vscale,
                           FALSE, threshrule)
        }
     return(x.dwt)
}

결과

  • Python
ebayesthresh.ebayesthresh_wavelet_dwt(x_dwt)
[array([ 0.,  0., -0.,  0., -0.,  0., -0., -0.,  0.,  0.,  0., -0.,  0.]),
 array([ 0., -0.,  0., -0., -0., -0.,  0.,  0., -0.,  0.,  0., -0., -0.,
        -0., -0.,  0.,  0.,  0.,  0., -0.,  0., -0., -0.,  0.,  0.]),
 array([ 0.47868431, -0.26315645,  0.31878975,  1.57046433,  1.13616511,
        -0.13685204,  0.4748337 ,  3.23774528,  1.05890084,  1.71206684,
        -0.47029185,  1.20188347, -1.31592992, -1.97454652,  0.96714734,
        -1.08017994, -1.82143641, -0.54615454, -1.44897433, -2.58962015,
         0.4946747 , -2.04626034, -1.47187691,  1.62154515, -0.0205145 ,
         0.39928543, -2.00296752, -0.86070929, -1.84618836,  0.02032933,
         1.884058  ,  0.67720103,  0.44194886, -0.98143606,  0.12154746,
        -1.82679094,  0.41042574, -1.63442503, -2.87055564,  0.33453694,
         2.38411684, -1.27520746,  1.96971695,  1.99049364,  0.07055637,
         0.56104632,  3.25314351,  1.7739649 ,  0.34259736, -0.24220132])]
  • R

확인 불가

> ebayesthresh.wavelet.dwt(x.dwt=x_dwt)
Error in ebayesthresh.wavelet.dwt(x.dwt = x_dwt) : 
  could not find function "ebayesthresh.wavelet.dwt"

ebayesthresh.wavelet.splus

data = np.concatenate([np.random.normal(0, 1, 90), np.random.normal(5, 1, 10)])
n = len(data)
level1 = data[::2] - data[1::2]
level2 = level1[::2] - level1[1::2]
approximation = level2[::2] + level2[::2]

# Create a list of wavelet coefficients for each level
x_dwt = [approximation, level2, level1]
x_dwt
[array([  1.99404911,  -6.15701427,  -3.99558976,  -0.38391355,
         -6.6016326 ,  -2.81007867, -17.52592145,  -7.07965432,
         -1.3653538 ,  -4.08821234,  -2.66406197,  -4.45001631,
         -2.9337468 ]),
 array([ 0.99702455,  2.06672342, -3.07850714,  3.12833675, -1.99779488,
         0.82687941, -0.19195677,  0.73432995, -3.3008163 ,  2.46746019,
        -1.40503934,  0.45749726, -8.76296072, -1.28596396, -3.53982716,
         3.44395235, -0.6826769 , -0.11415533, -2.04410617,  0.87670091,
        -1.33203099, -1.5215132 , -2.22500816,  2.61347947, -1.4668734 ]),
 array([ 0.26815468, -0.72886987, -0.47285771, -2.53958113, -2.01598499,
         1.06252215,  2.86786944, -0.26046731, -2.45177824, -0.45398336,
         0.9342025 ,  0.10732309, -0.43102009, -0.23906331,  0.55549546,
        -0.17883449, -1.11553065,  2.18528565,  1.17389913, -1.29356106,
        -0.51051515,  0.89452418, -0.37799399, -0.83549125, -5.18513165,
         3.57782907, -0.40410394,  0.88186002, -0.82947095,  2.71035621,
         2.01683439, -1.42711796, -1.16240604, -0.47972914, -1.00833728,
        -0.89418195, -1.4092027 ,  0.63490347,  0.72968632, -0.1470146 ,
         0.02978361,  1.3618146 , -0.03523747,  1.48627572, -1.02615173,
         1.19885643,  0.62995346, -1.983526  , -0.21994701,  1.24692638])]
vscale="independent"
smooth_levels=float('inf')
prior="laplace"
a=0.5
bayesfac=False
threshrule="median"
nlevs = len(x_dwt)
nlevs
3
slevs = min(nlevs, smooth_levels)
slevs
3
if isinstance(vscale, str):
    vs = vscale[0].lower()
    if vs == "i":
        vscale = ebayesthresh.mad(x_dwt[-1])  # Use the last level for vscale
    elif vs == "l":
        vscale = None
vscale
1.28961200438462
for j in range(nlevs - slevs + 1, nlevs + 1):
        x_dwt[j - 1] = ebayesthresh.ebayesthresh(x_dwt[j - 1], prior=prior, a=a, bayesfac=bayesfac, 
                                    sdev=vscale, verbose=False, threshrule=threshrule)
x_dwt
[array([ 2.54686727, -3.93681469,  2.84493444, -0.22684436, -0.        ,
         0.69626776, -0.1937811 , -5.05610077,  0.73042895,  4.06889481,
         0.09960291,  1.5871375 ,  0.43377755]),
 array([ 0.        , -0.        , -0.        ,  0.        ,  0.        ,
        -0.        , -0.        , -1.88673589, -0.        , -0.        ,
         0.        , -0.        , -0.        ,  0.        , -0.        ,
        -0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        , -0.        ,  0.        , -2.86961148,  0.        ]),
 array([ 0., -0.,  0.,  0., -0., -0.,  0.,  0.,  0., -0., -0.,  0.,  0.,
         0., -0.,  0., -0., -0., -0.,  0.,  0.,  0., -0.,  0.,  0.,  0.,
         0., -0., -0.,  0.,  0.,  0., -0., -0.,  0.,  0.,  0., -0.,  0.,
         0.,  0.,  0., -0., -0.,  0., -0., -0.,  0., -0., -0.])]

R코드

ebayesthresh.wavelet.splus <-
  function (x.dwt, vscale = "independent", smooth.levels = Inf, 
            prior = "laplace", a = 0.5, bayesfac = FALSE,
            threshrule = "median") {
        nlevs <- attributes(x.dwt)$n.levels
        slevs <- min(nlevs, smooth.levels)
        if (is.character(vscale)) {
            vs <- substring(vscale, 1, 1)
            if (vs == "i") 
                vscale <- mad(x.dwt[[nlevs + 1]])
            if (vs == "l") 
                vscale <- NA
        }
        for (j in ((nlevs - slevs + 2):(nlevs + 1)))
          x.dwt[[j]] <-
            ebayesthresh(as.vector(x.dwt[[j]]), prior, a, bayesfac,
                         vscale, FALSE, threshrule)
        return(x.dwt)
}

결과

  • Python
ebayesthresh.ebayesthresh_wavelet_splus(x_dwt=x_dwt)
  • R

확인 불가

> ebayesthresh.wavelet.splus(x.dwt=x_dwt)
Error in ebayesthresh.wavelet.splus(x.dwt = x_dwt) : 
  could not find function "ebayesthresh.wavelet.splus"

ebayesthresh.wavelet.wd

def get_mother_wavelet_coefficients(data, level):
    # Perform wavelet decomposition
    coeffs = pywt.wavedec(data, 'db1', level=level)
    
    # Access the coefficients at the desired resolution level
    mother_wavelet_coeffs = coeffs[level]
    
    return mother_wavelet_coeffs

# Example usage:
data = [1, 2, 3, 4, 5, 6, 7, 8]
level = 2
mother_wavelet_coeffs = get_mother_wavelet_coefficients(data, level)
mother_wavelet_coeffs
array([-0.70710678, -0.70710678, -0.70710678, -0.70710678])
x_wd = pywt.Wavelet('db1')
x_wd
pywt._extensions._pywt.Wavelet(name='db1', filter_bank=([0.7071067811865476, 0.7071067811865476], [-0.7071067811865476, 0.7071067811865476], [0.7071067811865476, 0.7071067811865476], [0.7071067811865476, -0.7071067811865476]))
vscale="independent"
smooth_levels=float('inf')
prior="laplace"
a=0.5
bayesfac=False
threshrule="median"
nlevs = x_wd['nlevels']
nlevs
slevs = min(nlevs - 1, smooth_levels)
slevs
if isinstance(vscale, str):
    vs = vscale[0].lower()
    if vs == "i":
        vscale = ebayesthresh.mad(get_mother_wavelet_coefficients(x_wd,-1))  # Use the last level for vscale
    elif vs == "l":
        vscale = None
for j in range(nlevs - slevs, nlevs - 1):
        x_wd.d[j] = ebayesthresh(x_wd.d[j], prior=prior, a=a, bayesfac=bayesfac, 
                                  sdev=vscale, verbose=False, threshrule=threshrule)
x_wd

R코드

ebayesthresh.wavelet.wd <-
  function (x.wd, vscale = "independent", smooth.levels = Inf,
            prior = "laplace", a = 0.5, bayesfac = FALSE,
            threshrule = "median") {
    nlevs <- x.wd$nlevels
    slevs <- min(nlevs - 1, smooth.levels)
    if (is.character(vscale)) {
        vs <- substring(vscale, 1, 1)
        if (vs == "i")
            vscale <- mad(accessD(x.wd, level = nlevs - 1))
        if (vs == "l")
            vscale <- NA
    }
    for (j in (nlevs - slevs):(nlevs - 1)) {
      x.wd <- putD(x.wd, level = j,
                   v = ebayesthresh(accessD(x.wd, level = j), prior, a,
                                    bayesfac, vscale, FALSE, threshrule))
    }
    return(x.wd)
}

결과

  • Python
ebayesthresh.ebayesthresh_wavelet_wd(x_wd)
  • R

확인 불가

> ebayesthresh.wavelet.wd(x.dwt=x_dwt)
Error in ebayesthresh.wavelet.wd(x.dwt = x_dwt) : 
  could not find function "ebayesthresh.wavelet.wd"

isotone

Isotonic Regression은 입력 변수에 따른 출력 변수의 단조 증가(monotonic increasing) 또는 감소(monotonic decreasing) 패턴을 찾는 방법

beta = ebayesthresh.beta_cauchy(np.array([-2,1,0,-4]))
w = np.ones(len(beta))
aa = w + 1/beta
x = w + aa
wt = 1/aa**2
increasing = False
nn = len(x)
if nn == 1:
    x = x
if not increasing:
    x = -x
ip = np.arange(1, nn+1)
dx = np.diff(x)
nx = len(x)
jmax = np.arange(nx)[(np.concatenate((dx <= 0, [False])) & np.concatenate(([True], dx > 0)))]
jmin = np.arange(nx)[(np.concatenate((dx > 0, [True])) & np.concatenate(([False], dx <= 0)))]
jb=0
ind = np.arange(jmax[jb], jmin[jb]+1)
wtn = np.sum(wt[ind])
x[jmax[jb]] = np.sum(wt[ind] * x[ind]) / wtn
wt[jmax[jb]] = wtn
x[jmax[jb]+1:jmin[jb]+1] = np.nan
# Clean up within iteration, eliminating the parts of sequences that
# were set to NA
ind = ~np.isnan(x)
x = x[ind]
wt = wt[ind]
ip = ip[ind]
dx = np.diff(x)
nx = len(x)
# Final cleanup: reconstruct z at all points by repeating the pooled
# values the appropriate number of times
jj = np.zeros(nn, dtype=int)
jj[ip - 1] = 1
z = x[np.cumsum(jj) - 1]
if not increasing:
    z = -z
z
array([3.67430141, 0.76041105, 0.76041105, 0.76041105])

R코드

isotone <- function(x, wt = rep(1, length(x)), increasing = FALSE) {
#
#   find the weighted least squares isotone fit to the 
#   sequence x, the weights given by the sequence wt
#
#   if increasing == TRUE the curve is set to be increasing, 
#   otherwise to be decreasing
#
#   the vector ip contains the indices on the original scale of the
#   breaks in the regression at each stage
#
    nn <- length(x)
    if(nn == 1)
        return(x)
    if(!increasing)
        x <-  - x
    ip <- (1:nn)
    dx <- diff(x)
    nx <- length(x)
    while((nx > 1) && (min(dx) < 0)) {
#
#  do single pool-adjacent-violators step
#
#  find all local minima and maxima
#
        jmax <- (1:nx)[c(dx <= 0, FALSE) & c(TRUE, dx > 0)]
        jmin <- (1:nx)[c(dx > 0, TRUE) & c(FALSE, dx <= 0)]
#  do pav step for each pair of maxima and minima
#
#  add up weights within subsequence that is pooled
#  set first element of subsequence to the weighted average
#  the first weight to the sum of the weights within the subsequence
#    and remainder of the subsequence to NA
#
        for(jb in (1:length(jmax))) {
            ind <- (jmax[jb]:jmin[jb])
            wtn <- sum(wt[ind])
            x[jmax[jb]] <- sum(wt[ind] * x[ind])/wtn
            wt[jmax[jb]] <- wtn
            x[(jmax[jb] + 1):jmin[jb]] <- NA
        }
#
#  clean up within iteration, eliminating the parts of sequences that
#  were set to NA
#
        ind <- !is.na(x)
        x <- x[ind]
        wt <- wt[ind]
        ip <- ip[ind]
        dx <- diff(x)
        nx <- length(x)
    }
# 
#  final cleanup: reconstruct z at all points by repeating the pooled
#    values the appropriate number of times
#
    jj <- rep(0, nn)
    jj[ip] <- 1
    z <- x[cumsum(jj)]
    if(!increasing)
        z <-  - z
    return(z)
}

결과

  • Python
beta = ebayesthresh.beta_cauchy(np.array([-2,1,0,-4]))
w = np.ones(len(beta))
aa = w + 1/beta
ps = w + aa
ww = 1/aa**2
wnew = ebayesthresh.isotone(ps, ww, increasing = False)
wnew
[3.67430141208924, 0.760411047043364, 0.760411047043364, 0.760411047043364]

R

> beta <- beta.cauchy(c(-2,1,0,-4))
> w <- rep(1, length(x))
> aa = w + 1/beta
> ps = w + aa
> ww = 1/aa**2
> wnew = isotone(ps, ww, increasing = FALSE)
> wnew
[1] 3.674301 0.760411 0.760411 0.760411

laplace_threshzero

x = np.array([-2,1,0,-4,8,50])
s = 1
w = 0.5
a = 0.5
a = min(a, 20)
a
0.5
if isinstance(x, list):
    z = []
    for elem in x:
        xma = elem / s - s * a
        z_add = norm.cdf(xma) - (1 / a) * (1 / s * norm.pdf(xma)) * (1 / w + ebayesthresh.beta_laplace(elem, s, a))
        z.append(z_add)
else:
    xma = x / s - s * a
    z = norm.cdf(xma) - (1 / a) * (1 / s * norm.pdf(xma)) * (1 / w + ebayesthresh.beta_laplace(x, s, a))
xma
array([-2.5,  0.5, -0.5, -4.5,  7.5, 49.5])
z
array([-0.09509872, -0.44919982, -0.70413065, -0.00918596,  0.5       ,
        1.        ])

R코드

laplace.threshzero <- function(x, s = 1, w = 0.5, a = 0.5) {
#
# The function that has to be zeroed to find the threshold with the
# Laplace prior.  Only allow a < 20 for input value.
#    
  a <- min(a, 20)
  xma <- x/s - s*a
  z <- pnorm(xma) - 1/a * (1/s*dnorm(xma)) * (1/w + beta.laplace(x, s, a))
  return(z)
}

결과

  • Python
ebayesthresh.laplace_threshzero(np.array([-2,1,0,-4,8,50]), s = 1, w = 0.5, a = 0.5)
array([-0.09509872, -0.44919982, -0.70413065, -0.00918596,  0.5       ,
        1.        ])
ebayesthresh.laplace_threshzero(-5, s = 1, w = 0.5, a = 0.5)
-0.0033691679532916346
  • R
> laplace.threshzero(c(-2,1,0,-4,8,50), s = 1, w = 0.5, a = 0.5)
[1] -0.095098724 -0.449199824 -0.704130654 -0.009185958  0.500000000  1.000000000
> laplace.threshzero(-5, s = 1, w = 0.5, a = 0.5)
[1] -0.003369168

negloglik_laplace

Marginal negative log likelihood function for laplace prior.

  • 라플라스 프라이어에 대한 한계음의로그우도함수 계산
xpar = np.array([0.5,0.6,0.3])
xx = np.array([1,2,3,4,5])
ss = np.array([1])
tlo = np.sqrt(2 * np.log(len(np.array([1,2,3,4,5])))) * 1
thi = np.array([0,0,0])
a = xpar[1]
a
0.6
wlo = ebayesthresh.wfromt(thi, ss, a=a)
wlo
array([1., 1., 1.])
whi = ebayesthresh.wfromt(tlo, ss, a=a)
whi
array([0.44528236])
wlo = np.max(wlo)
wlo
1.0
whi = np.min(whi)
whi
0.44528236200207744
loglik = np.sum(np.log(1 + (xpar[0] * (whi - wlo) + wlo) *
                           ebayesthresh.beta_laplace(xx, ss, a)))
-loglik
-16.797273710172206

R코드

negloglik.laplace <- function(xpar, xx, ss, tlo, thi) {
#
#  Marginal negative log likelihood function for laplace prior. 
#   Constraints for thresholds need to be passed externally.
#  
#  xx   :data
#  xpar :vector of two parameters:
#      xpar[1] : a value between [0, 1] which will be adjusted to range of w 
#      xpar[2] : inverse scale (rate) parameter ("a")
#  ss   :vector of standard deviations
#  tlo  :lower bound of thresholds
#  thi  :upper bound of thresholds
#
    a <- xpar[2]
        
        # Calculate the range of w given a, using negative monotonicity
        # between w and t
    wlo <- wfromt(thi, ss, a = a)
    whi <- wfromt(tlo, ss, a = a)
    wlo <- max(wlo)
    whi <- min(whi)
    loglik <- sum(log(1 + (xpar[1] * (whi - wlo) + wlo) *
                          beta.laplace(xx, ss, a)))
    return(-loglik)
}

결과

  • Python
xpar = np.array([0.5,0.6,0.3])
xx = np.array([1,2,3,4,5])
ss = np.array([1])
tlo = np.sqrt(2 * np.log(len(np.array([1,2,3,4,5])))) * 1
thi = np.array([0,0,0])
ebayesthresh.negloglik_laplace(xpar, xx, ss, tlo, thi)
-16.797273710172206
  • R
> xpar <- c(0.5, 0.6, 0.3)
> xx <- c(1, 2, 3, 4, 5)
> ss <- c(1)
> tlo <- sqrt(2 * log(length(c(1, 2, 3, 4, 5)))) * 1
> thi <- c(0, 0, 0)
> negloglik.laplace(xpar, xx, ss, tlo, thi)
[1] -16.79727

postmean

Given a single value or a vector of data and sampling standard deviations (sd equals 1 for Cauchy prior), find the corresponding posterior mean estimate(s) of the underlying signal value(s).

  • 적절한 사후 평균 찾기
x = np.array([-2,1,0,-4,8,50])
s = 1
w = 0.5
prior = "cauchy"
# prior = "laplace"
a = 0.5
pr = prior[0:1]
pr
'c'
if pr == "l":
    mutilde = ebayesthresh.postmean_laplace(x, s, w, a=a)
elif pr == "c":
    if np.any(s != 1):
        raise ValueError("Only standard deviation of 1 is allowed for Cauchy prior.")
    mutilde = ebayesthresh.postmean_cauchy(x, w)
else:
    raise ValueError("Unknown prior type.")
mutilde
array([ 0,  0,  0, -3,  7, 49])

R코드

postmean <- function(x, s = 1, w = 0.5, prior = "laplace", a = 0.5) {
#
#  Find the posterior mean for the appropriate prior for 
#   given x, s (sd), w and a.
#
    pr <- substring(prior, 1, 1)
    if(pr == "l")
        mutilde <- postmean.laplace(x, s, w, a = a)
    if(pr == "c"){
            if(any(s != 1))
                stop(paste("Only standard deviation of 1 is allowed",
                           "for Cauchy prior."))
        mutilde <- postmean.cauchy(x, w)
    }
    return(mutilde)
}

결과

  • Python
ebayesthresh.postmean(np.array([-2,1,0,-4,8,50]), s=1, w = 0.5, prior = "laplace", a = 0.5)
array([-1.01158962,  0.27095331,  0.        , -3.48800924,  7.5       ,
       49.5       ])
  • R
> postmean(c(-2,1,0,-4,8,50), s=1, w = 0.5, prior = "laplace", a = 0.5)
[1] -1.0115896  0.2709533  0.0000000 -3.4880092  7.5000000 49.5000000

postmean_cauchy

Find the posterior mean for the quasi-Cauchy prior with mixing weight w given data x, which may be a scalar or a vector.

  • quasi-Cauch에 대한 사후 평균 구하기
x = np.array([-2,1,0,-4,8,50], dtype=float)
w = 0.5
muhat = x
ind = (x == 0)
ind
array([False, False,  True, False, False, False])
x = x[~ind]
x
array([-2.,  1., -4.,  8., 50.])
ex = np.exp(-x**2/2)
ex
array([1.35335283e-01, 6.06530660e-01, 3.35462628e-04, 1.26641655e-14,
       0.00000000e+00])
z = w * (x - (2 * (1 - ex))/x)
z
array([-0.56766764,  0.10653066, -1.75008387,  3.875     , 24.98      ])
z = z / (w * (1 - ex) + (1 - w) * ex * x**2)
z
array([-0.80748973,  0.21306132, -3.48264328,  7.75      , 49.96      ])
muhat[~ind] = z
muhat
array([-0.80748973,  0.21306132,  0.        , -3.48264328,  7.75      ,
       49.96      ])

R코드

postmean.cauchy <- function(x, w) {
#
#  Find the posterior mean for the quasi-Cauchy prior with mixing
#   weight w given data x, which may be a scalar or a vector.
#
    muhat <- x
    ind <- (x == 0)
    x <- x[!ind]
    ex <- exp( - x^2/2)
    z <- w * (x - (2 * (1 - ex))/x)
    z <- z/(w * (1 - ex) + (1 - w) * ex * x^2)
    muhat[!ind] <- z
    return(muhat)
}

결과

  • Python
ebayesthresh.postmean_cauchy(np.array([-2,1,0,-4,8,50], dtype=float),0.5)
array([-0.80748973,  0.21306132,  0.        , -3.48264328,  7.75      ,
       49.96      ])
  • R
> postmean.cauchy(c(-2,1,0,-4,8,50),0.5)
[1] -0.8074897  0.2130613  0.0000000 -3.4826433  7.7500000 49.9600000

postmean.laplace

Find the posterior mean for the double exponential prior for given \(x, s (sd), w\), and \(a\).

  • 이전 지수 분포에 대한 사후 평균
x = np.array([-2,1,0,-4,8,50])
s = 1
w = 0.5
a = 0.5
a = min(a, 20)
a
0.5
w_post = ebayesthresh.wpost_laplace(w, x, s, a)
w_post
array([0.65396152, 0.38270015, 0.30467782, 0.99652125, 1.        ,
       1.        ])
sx = np.sign(x)
sx
array([-1,  1,  0, -1,  1,  1])
x = np.abs(x)
x
array([ 2,  1,  0,  4,  8, 50])
xpa = x/s + s*a
xpa
array([ 2.5,  1.5,  0.5,  4.5,  8.5, 50.5])
xma = x/s - s*a
xma
array([ 1.5,  0.5, -0.5,  3.5,  7.5, 49.5])
xpa[xpa > 35] = 35
xpa
array([ 2.5,  1.5,  0.5,  4.5,  8.5, 35. ])
xma[xma < -35] = -35
xma
array([ 1.5,  0.5, -0.5,  3.5,  7.5, 49.5])
cp1 = norm.cdf(xma)
cp1
array([0.9331928 , 0.69146246, 0.30853754, 0.99976737, 1.        ,
       1.        ])
cp2 = norm.cdf(-xpa)
cp2
array([6.20966533e-003, 6.68072013e-002, 3.08537539e-001, 3.39767312e-006,
       9.47953482e-018, 1.12491071e-268])
ef = np.exp(np.minimum(2 * a * x, 100))
ef
array([7.38905610e+00, 2.71828183e+00, 1.00000000e+00, 5.45981500e+01,
       2.98095799e+03, 5.18470553e+21])
postmean_cond = x - a * s**2 * (2 * cp1 / (cp1 + ef * cp2) - 1)
postmean_cond
array([ 1.54686413,  0.70800417,  0.        ,  3.50018552,  7.5       ,
       49.5       ])
sx * w_post * postmean_cond
array([-1.01158962,  0.27095331,  0.        , -3.48800924,  7.5       ,
       49.5       ])

R코드

postmean.laplace <- function(x, s = 1, w = 0.5, a = 0.5) {
#
#  Find the posterior mean for the double exponential prior for 
#   given x, s (sd), w and a.
#
    
  # Only allow a < 20 for input value.
  a <- min(a, 20)
  
  # First find the probability of being non-zero
    wpost <- wpost.laplace(w, x, s, a)
  
  # Now find the posterior mean conditional on being non-zero
    sx <- sign(x)
    x <- abs(x)
    xpa <- x/s + s*a
    xma <- x/s - s*a
    xpa[xpa > 35] <- 35
    xma[xma < -35] <- -35
    
    cp1 <- pnorm(xma)
    cp2 <- pnorm( - xpa)
    ef <- exp(pmin(2 * a * x, 100))
    postmeancond <- x - a * s^2 * ( 2 * cp1/(cp1 + ef * cp2) - 1)
  
  # Calculate posterior mean and return
    return(sx * wpost * postmeancond)
}

결과

  • Python
ebayesthresh.postmean_laplace(np.array([-2,1,0,-4,8,50]))
array([-1.01158962,  0.27095331,  0.        , -3.48800924,  7.5       ,
       49.5       ])
  • R
> postmean.laplace(c(-2,1,0,-4,8,50))
[1] -1.0115896  0.2709533  0.0000000 -3.4880092  7.5000000 49.5000000

postmed

Description

Given a single value or a vector of data and sampling standard deviations (sd is 1 for Cauchy prior), find the corresponding posterior median estimate(s) of the underlying signal value(s).

사후 확률 중앙값 추정치 구하기

x = np.array([1.5, 2.5, 3.5])
s = 1
w = 0.5
prior = "laplace"
a = 0.5
pr = prior[0:1]
pr
'l'
if pr == "l":
    muhat = ebayesthresh.postmed_laplace(x, s, w, a)
elif pr == "c":
    if np.any(s != 1):
        raise ValueError("Only standard deviation of 1 is allowed for Cauchy prior.")
    muhat = ebayesthresh.postmed_cauchy(x, w)
else:
    raise ValueError(f"Unknown prior: {prior}")
muhat
array([0.        , 1.73413235, 2.97815763])

R코드

postmed <- function (x, s = 1, w = 0.5, prior = "laplace", a = 0.5) {
#
#  Find the posterior median for the appropriate prior for 
#   given x, s (sd), w and a. 
#
    pr <- substring(prior, 1, 1)
    if(pr == "l")
        muhat <- postmed.laplace(x, s, w, a)
    if(pr == "c") {
            if(any(s != 1))
                stop(paste("Only standard deviation of 1 is allowed",
                           "for Cauchy prior."))
        muhat <- postmed.cauchy(x, w)
    }
    return(muhat)
}

결과

  • Python
ebayesthresh.postmed(x = np.array([1.5, 2.5, 3.5]))
array([0.        , 1.73413235, 2.97815763])
  • R
 postmed(x=c(1.5, 2.5, 3.5))
[1] 0.000000 1.734132 2.978158

postmed_cauchy

x = np.array([10, 15, 20, 25])
w = 0.5
nx = len(x)
nx
4
zest = np.full(nx, np.nan)
zest
array([nan, nan, nan, nan])
w = np.full(nx, w)
w
array([0.5, 0.5, 0.5, 0.5])
ax = np.abs(x)
ax
array([10, 15, 20, 25])
j = (ax < 20)
j
array([ True,  True, False, False])
zest[~j] = ax[~j] - 2 / ax[~j]
zest
array([  nan,   nan, 19.9 , 24.92])
if isinstance(np.max(ax[j]), (int, float, np.int64)):
    a = np.array([np.max(ax[j])] * 4)
a
array([15, 15, 15, 15])
if np.sum(j) > 0:
    zest[j] = ebayesthresh.vecbinsolv(zf=np.zeros(np.sum(j)), fun=ebayesthresh.cauchy_medzero, tlo=0, thi=np.max(ax[j]), z=ax[j], w=w[j])
zest[zest < 1e-7] = 0
zest
array([ 9.80064279, 14.86686095, 19.9       , 24.92      ])
zest = np.sign(x) * zest
zest
array([ 9.80064279, 14.86686095, 19.9       , 24.92      ])

R코드

postmed.cauchy <- function(x, w) {
#
# find the posterior median of the Cauchy prior with mixing weight w,
# pointwise for each of the data points x
#
    nx <- length(x)
    zest <- rep(NA, length(x))
    w <- rep(w, length.out = nx)
    ax <- abs(x)
    j <- (ax < 20)
    zest[!j] <- ax[!j] - 2/ax[!j]
    if(sum(j) > 0) {
      zest[j] <- vecbinsolv(zf = rep(0, sum(j)), fun = cauchy.medzero,
                                tlo = 0, thi = max(ax[j]), z = ax[j],
                                w = w[j])
    }
    zest[zest < 1e-007] <- 0
    zest <- sign(x) * zest
    return(zest)

결과

  • Python
ebayesthresh.postmed_cauchy(x=np.array([10, 15, 20, 25]), w=0.5)
array([ 9.80064279, 14.86686095, 19.9       , 24.92      ])
  • R
> postmed.cauchy(x=c(10, 15, 20, 25),w=0.5)
[1]  9.800643 14.866861 19.900000 24.920000

postmed_laplace

x = np.array([1.5, 2.5, 3.5])
s = 1
w = 0.5
a = 0.5
a = min(a, 20)
a
0.5
sx = np.sign(x)
sx
array([1., 1., 1.])
x = np.abs(x)
x
array([1.5, 2.5, 3.5])
xma = x / s - s * a
xma
array([1., 2., 3.])
zz = 1 / a * (1 / s * norm.pdf(xma)) * (1 / w + ebayesthresh.beta_laplace(x, s, a))
zz
array([0.95559333, 0.60482943, 0.50871315])
zz[xma > 25] = 0.5
zz
array([0.95559333, 0.60482943, 0.50871315])
mucor = norm.ppf(np.minimum(zz, 1))
mucor
array([1.70169084, 0.26586765, 0.02184237])
muhat = sx * np.maximum(0, xma - mucor) * s
muhat
array([0.        , 1.73413235, 2.97815763])

R코드

postmed.laplace <- function(x, s = 1, w = 0.5, a = 0.5) {
#
#  Find the posterior median for the Laplace prior for 
#   given x (observations), s (sd), w and a.
#
    
  # Only allow a < 20 for input value
  a <- min(a, 20)
  
  # Work with the absolute value of x, and for x > 25 use the approximation
  #  to dnorm(x-a)*beta.laplace(x, a)
    sx <- sign(x)
    x <- abs(x)
    xma <- x/s - s*a
    zz <- 1/a * (1/s*dnorm(xma)) * (1/w + beta.laplace(x, s, a))
    zz[xma > 25] <- 1/2
    mucor <- qnorm(pmin(zz, 1))
    muhat <- sx * pmax(0, xma - mucor) * s
    return(muhat)
}

결과

  • Python
ebayesthresh.postmed_laplace(x = np.array([1.5, 2.5, 3.5]))
array([0.        , 1.73413235, 2.97815763])
  • R
> postmed.laplace(x=c(1.5, 2.5, 3.5), s = 1, w = 0.5, a = 0.5)
[1] 0.000000 1.734132 2.978158

threshld

임계값 t를 이용해서 데이터 조정

x = np.array(range(-5,5))
t=1.4
hard=False
if hard:
    z = x * (np.abs(x) >= t)
else:
    z = np.sign(x) * np.maximum(0, np.abs(x) - t)
z
array([-3.6, -2.6, -1.6, -0.6, -0. ,  0. ,  0. ,  0.6,  1.6,  2.6])

R코드

threshld <- function(x, t, hard = TRUE) {
#
#  threshold the data x using threshold t
#  if hard=TRUE use hard thresholding
#  if hard=FALSE use soft thresholding
    if(hard) z <- x * (abs(x) >= t) else {
        z <- sign(x) * pmax(0, abs(x) - t)
    }
    return(z)
}

결과

  • Python
ebayesthresh.threshld(np.array(range(-5,5)), t=1.4, hard=False)
array([-3.6, -2.6, -1.6, -0.6, -0. ,  0. ,  0. ,  0.6,  1.6,  2.6])
  • R
> threshld(as.array(seq(-5, 5)), t=1.4, hard=FALSE)
 [1] -3.6 -2.6 -1.6 -0.6  0.0  0.0  0.0  0.6  1.6  2.6  3.6

wandafromx

Given a vector of data and a single value or vector of sampling standard deviations, find the marginal maximum likelihood choice of both weight and scale factor under the Laplace prior

x = np.array([-2,1,0,-4,8], dtype=float)
s = 1
universalthresh = True
if universalthresh:
    thi = np.sqrt(2 * np.log(len(x))) * s
else:
    thi = np.inf
thi
1.7941225779941015
if isinstance(s, int):
    tlo = np.zeros(len(str(s)))
else:
    tlo = np.zeros(len(s))
tlo
array([0.])
lo = np.array([0, 0.04])
lo
array([0.  , 0.04])
hi = np.array([1, 3])
hi
array([1, 3])
startpar = np.array([0.5, 0.5])
startpar
array([0.5, 0.5])

negloglik_laplace 결과도 같음

ebayesthresh.negloglik_laplace(startpar, x, s, tlo, thi).round(5)
-32.32783
  • R
> negloglik.laplace(startpar,x,s,thi,tlo)
[1] -32.32783

if 'optim' in globals():
    result = minimize(ebayesthresh.negloglik_laplace, startpar, method='L-BFGS-B', bounds=[(lo[0], hi[0]), (lo[1], hi[1])], args=(x, s, tlo, thi))
    uu = result.x
else:
    result = minimize(ebayesthresh.negloglik_laplace, startpar, bounds=[(lo[0], hi[0]), (lo[1], hi[1])], args=(x, s, tlo, thi))
    uu = result.x
minimize(ebayesthresh.negloglik_laplace, startpar, method='L-BFGS-B',
         bounds=[(lo[0], hi[0]), (lo[1], hi[1])], args=(x, s, tlo, thi)).x
array([0.69481312, 0.30010834])
minimize(ebayesthresh.negloglik_laplace, startpar, method='L-BFGS-B',
         bounds=[(lo[0], hi[0]), (lo[1], hi[1])], args=(x, s, thi, tlo)).fun.round(5)
-33.07559
> optim(startpar, negloglik.laplace, method="L-BFGS-B",
+               lower = lo, upper = hi, xx = x, ss = s, thi = thi,
+               tlo = tlo)$par
[1] 0.6948121 0.3001091
> optim(startpar, negloglik.laplace, method="L-BFGS-B",
+               lower = lo, upper = hi, xx = x, ss = s, thi = thi,
+               tlo = tlo)$value
[1] -33.07559
a = uu[1]
a
0.30010833999317726
wlo = ebayesthresh.wfromt(thi, s, a=a)
wlo
0.49762405565749024
whi = ebayesthresh.wfromt(tlo, s, a=a)
whi
array([1.])
wlo = np.max(wlo)
wlo
0.49762405565749024
whi = np.min(whi)
whi
1.0

여기서 uu[0]값이 R과 다름

w = uu[0] * (whi - wlo) + wlo
w
0.8466814522110238

R코드

wandafromx <- function(x, s = 1, universalthresh = TRUE) {
#
#  Find the marginal max lik estimators of w and a given standard
#   deviation s, using a bivariate optimization;
#    
#  If universalthresh=TRUE, the thresholds will be upper bounded by
#   universal threshold adjusted by standard deviation. The threshold
#   is constrained to lie between 0 and sqrt ( 2 log (n)) *
#   s. Otherwise, threshold can take any nonnegative value;
#
#  If running R, the routine optim is used; in S-PLUS the routine is
#   nlminb.
#
    
  # Range for thresholds
  if(universalthresh) {
    thi <- sqrt(2 * log(length(x))) * s
  } else{
    thi <- Inf
  }
  
    tlo <- rep(0, length(s))
    lo  <-  c(0,0.04)
    hi  <-  c(1,3)
    startpar  <-  c(0.5,0.5)
    if (exists("optim")) {
      uu <- optim(startpar, negloglik.laplace, method="L-BFGS-B",
                      lower = lo, upper = hi, xx = x, ss = s, thi = thi,
                      tlo = tlo)
          uu <- uu$par
    }
    else {
          uu <- nlminb(startpar, negloglik.laplace, lower = lo,
                       upper = hi, xx = x, ss = s, thi = thi, tlo = tlo)
          uu <- uu$parameters
        }
    
    a <- uu[2]
    wlo <- wfromt(thi, s, a = a)
    whi <- wfromt(tlo, s, a = a)
    wlo <- max(wlo)
    whi <- min(whi)
    w <- uu[1]*(whi - wlo) + wlo
    return(list(w=w, a=a))
}

결과

  • Pythom
ebayesthresh.wandafromx(np.array([-2,1,0,-4,8,50], dtype=float))
{'w': 1.0, 'a': 0.41641372425894785}
ebayesthresh.wandafromx(np.array([-2,1,0,-4,8], dtype=float))
{'w': 0.8466814522110238, 'a': 0.30010833999317726}
  • R
> wandafromx(c(-2,1,0,-4,8,50))
$w
[1] 1

$a
[1] 0.4163946
> wandafromx(c(-2,1,0,-4,8))
$w
[1] 0.8466808

$a
[1] 0.3001091

Mad(Median Absolute Deviation)

중앙값 절대 편차, 분산이나 퍼진 정도 확인 가능

결과

  • Python
ebayesthresh.mad(np.array([1, 2, 3, 3, 4, 4, 4, 5, 5.5, 6, 6, 6.5, 7, 7, 7.5, 8, 9, 12, 52, 90]))
2.9652
  • R
> mad(c(1, 2, 3, 3, 4, 4, 4, 5, 5.5, 6, 6, 6.5, 7, 7, 7.5, 8, 9, 12, 52, 90))
[1] 2.9652

wfromt

Description

Given a value or vector of thresholds and sampling standard deviations (sd equals 1 for Cauchy prior), find the mixing weight for which this is(these are) the threshold(s) of the posterior median estimator. If a vector of threshold values is provided, the vector of corresponding weights is returned.

주어진 임계값과 표준편차에 대해, posterior median estimator에서 이 임계값이 나오도록 하는 혼합 가중치를 계산하는 함수가 제공된다.

# tt = np.array([2,3,5])
tt = 2.14
s = 1
prior = 'laplace"'
a = 0.5
pr = prior[0:1]
pr
'l'
if pr == "l":
    tma = tt / s - s * a
    wi = 1 / np.abs(tma)
    if isinstance(wi, (int, float, str, bool)) and tma > -35:
        wi = norm.cdf(tma) / norm.pdf(tma)
    elif isinstance(wi, (int, float, str, bool)) and tma < -35:
        wi = wi
    else:
        wi[tma > -35] = norm.cdf(tma[tma > -35])/norm.pdf(tma[tma > -35])
    wi = a * s * wi - ebayesthresh.beta_laplace(tt, s, a)
tma
1.6400000000000001
wi
3.1985740994349467
if pr == "c":
    dnz = norm.pdf(tt)
    wi = 1 + (norm.cdf(tt) - tt * dnz - 1/2) / (np.sqrt(np.pi/2) * dnz * tt**2)
    if isinstance(wi, np.ndarray):
        for i in range(len(wi)):
            if not np.isfinite(wi[i]):
                wi[i] = 1
    else:
        if not np.isfinite(wi):
            wi = 1
1 / wi
0.31263931017782515
  • R코드
wfromt <- function(tt, s = 1, prior = "laplace", a = 0.5) {
  #
  #  Find the weight that has posterior median threshold tt, 
  #   given s (sd) and a.
  #
pr <- substring(prior, 1, 1)
if(pr == "l"){
  tma <- tt/s - s*a
  wi <- 1/abs(tma)
  wi[tma > -35] <- pnorm(tma[tma > -35])/dnorm(tma[tma > -35])
  wi <- a * s * wi - beta.laplace(tt, s, a)
}
if(pr == "c") {
  dnz <- dnorm(tt)
  wi <- 1 + (pnorm(tt) - tt * dnz - 1/2)/
    (sqrt(pi/2) * dnz * tt^2)
  wi[!is.finite(wi)] <- 1
}
1/wi
}

결과

  • Python
ebayesthresh.wfromt(np.array([2,3,5]),prior='cachy')
array([4.22963403e-01, 9.33799336e-02, 9.31590884e-05])
ebayesthresh.wfromt(2,prior='cachy')
0.4229634032635055
ebayesthresh.wfromt(2,prior='laplace')
0.3686337675493349
  • R
> wfromt(c(2,3,5),prior='cachy')
[1] 4.229634e-01 9.337993e-02 9.315909e-05
> wfromt(2,prior='cachy')
[1] 0.4229634
> wfromt(2,prior='laplace')
[1] 0.3686338

wfromx

Description

The weight is found by marginal maximum likelihood. The search is over weights corresponding to threshold \(t_i\) in the range \([0, s_i \sqrt{2 log n}]\) if universalthresh=TRUE, where n is the length of the data vector and \((s_1, ..., s_n\)) (\(s_i\) is \(1\) for Cauchy prior) is the vector of sampling standard deviation of data (\(x_1, ..., x_n\)); otherwise, the search is over \([0, 1]\). The search is by binary search for a solution to the equation \(S(w) = 0\), where \(S\) is the derivative of the log likelihood. The binary search is on a logarithmic scale in \(w\). If the Laplace prior is used, the scale parameter is fixed at the value given for \(a\), and defaults to \(0.5\) if no value is provided. To estimate a as well as \(w\) by marginal maximum likelihood, use the routine wandafromx.

Suppose the vector \((x_1, \cdots, x_n)\) is such that \(x_i\) is drawn independently from a normal distribution with mean \(\theta_i\) and standard deviation \(s_i\) (\(s_i\) equals \(1\) for Cauchy prior). The prior distribution of the \(\theta_i\) is a mixture with probability \(1 − w\) of zero and probability \(w\) of a given symmetric heavy-tailed distribution. This routine finds the marginal maximum likelihood estimate of the parameter \(w\).

주어진 정규 분포 데이터에 대해 \(\theta_𝑖\)의 사전 분포가 주어진 상황에서, 모수 \(w\)의 최대우도 추정치를 계산하는 방법을 제공한다

s = np.concatenate((np.repeat(0, 90), np.repeat(5, 10)))
x = np.random.normal(0, s, size=100)
prior = "cauchy"
a = 0.5
universalthresh = True
pr = prior[0:1]
pr
'c'
if pr == "c":
    s = 1
if universalthresh:
    tuniv = np.sqrt(2 * np.log(len(x))) * s
    wlo = ebayesthresh.wfromt(tuniv, s, prior, a)
    wlo = np.max(wlo)
else:
    wlo = 0
if pr == "l":
    beta = ebayesthresh.beta_laplace(x, s, a)
elif pr == "c":
    beta = ebayesthresh.beta_cauchy(x)
whi = 1
beta = np.minimum(beta, 1e20)

shi = np.sum(beta / (1 + beta))
if shi >= 0:
    shi =  1
slo = np.sum(beta / (1 + wlo * beta))
if slo <= 0:
    slo = wlo
for _ in range(1,31):
    wmid = np.sqrt(wlo * whi)
    smid = np.sum(beta / (1 + wmid * beta))
    if smid == 0:
        smid = wmid
    if smid > 0:
        wlo = wmid
    else:
        whi = wmid
np.sqrt(wlo * whi)
0.1397274377272373
  • R코드
wfromx <- function (x, s = 1, prior = "laplace", a = 0.5,
                    universalthresh = TRUE) {
#    
#  Given the vector of data x and s (sd),
#   find the value of w that zeroes S(w) in the
#   range by successive bisection, carrying out nits harmonic bisections
#   of the original interval between wlo and 1.  
#  
    pr <- substring(prior, 1, 1)
    if(pr == "c")
          s = 1
    if(universalthresh) {
          tuniv <- sqrt(2 * log(length(x))) * s
          wlo <- wfromt(tuniv, s, prior, a)
          wlo <- max(wlo)
    } else 
          wlo = 0
    if(pr == "l")
      beta <- beta.laplace(x, s, a)
    if(pr == "c")
      beta <- beta.cauchy(x)
    whi  <- 1
    beta <- pmin(beta, 1e20) 
    shi  <- sum(beta/(1 + beta))
    if(shi >= 0)
      return(1)
    slo <- sum(beta/(1 + wlo * beta))
    if(slo <= 0)
      return(wlo)
    for(j in (1:30)) {
      wmid <- sqrt(wlo * whi)
      smid <- sum(beta/(1 + wmid * beta))
      if(smid == 0)
        return(wmid)
      if(smid > 0)
        wlo <- wmid
      else
        whi <- wmid
        }
    return(sqrt(wlo * whi))
}

결과

  • Python
ebayesthresh.wfromx(x= np.random.normal(0, np.concatenate((np.repeat(0, 90), np.repeat(5, 10))), size=100), prior = "cauchy")
0.08644292644513768
  • R
> wfromx(x = rnorm(100, s = c(rep(0,90),rep(5,10))), prior = "cauchy")
[1] 0.116067

wmonfromx

Given a vector of data, find the marginal maximum likelihood choice of weight sequence subject to the constraints that the weights are monotone decreasing

데이터에 대해 가중치 시퀀스를 선택하는 과정에서 조건이 주어지는데, 이 가중치 시퀀스는 각각의 가중치 값이 단조 감소해야 하며, 주어진 데이터에 대한 최대 우도를 갖도록 선택되어야 함.

xd = np.random.normal(0, 1, size=10)
prior = "laplace"
a = 0.5
tol = 1e-08
maxits = 20
pr = prior[0:1]
pr
'l'
nx = len(xd)
nx
10
wmin = ebayesthresh.wfromt(np.sqrt(2 * np.log(len(xd))), prior=prior, a=a)
wmin
0.31029680918570224
winit = 1
winit
1
if pr == "l":
    beta = ebayesthresh.beta_laplace(xd, a=a)
if pr == "c":
    beta = ebayesthresh.beta_cauchy(xd)
w = np.repeat(winit, len(beta))
w
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
for j in range(maxits):
    aa = w + 1 / beta
    ps = w + aa
    ww = 1 / aa ** 2
    wnew = ebayesthresh.isotone(ps, ww, increasing=False)
    wnew = np.maximum(wmin, wnew)
    wnew = np.minimum(1, wnew)
    zinc = np.max(np.abs(np.diff(wnew)))
    w = wnew
    # if zinc < tol:
    #     return w
# warnings.filterwarnings("More iterations required to achieve convergence")

R코드

wmonfromx <- function (xd, prior = "laplace", a = 0.5,
                       tol = 1e-08, maxits = 20) {
#
#  Find the monotone marginal maximum likelihood estimate of the
#   mixing weights for the Laplace prior with parameter a.  It is
#   assumed that the noise variance is equal to one.
#
#  Find the beta values and the minimum weight
#  
#  Current version allows for standard deviation of 1 only.
#
    pr <- substring(prior, 1, 1)
    nx <- length(xd)
    wmin <- wfromt(sqrt(2 * log(length(xd))), prior=prior, a=a)
    winit <- 1
    if(pr == "l")
      beta <- beta.laplace(xd, a=a)
    if(pr == "c")
           beta <- beta.cauchy(xd)
        
#   now conduct iterated weighted least squares isotone regression
#    
    w <- rep(winit, length(beta))
    for(j in (1:maxits)) {
        aa <- w + 1/beta
        ps <- w + aa
        ww <- 1/aa^2
        wnew <- isotone(ps, ww, increasing = FALSE)
        wnew <- pmax(wmin, wnew)
        wnew <- pmin(1, wnew)
        zinc <- max(abs(range(wnew - w)))
        w <- wnew
        if(zinc < tol)
          return(w)
    }
        
    warning("More iterations required to achieve convergence")
    return(w)
}

결과

  • Python
ebayesthresh.wmonfromx(xd = np.random.normal(0, 1, size=10), prior = "laplace")
array([0.31029681, 0.31029681, 0.31029681, 0.31029681, 0.31029681,
       0.31029681, 0.31029681, 0.31029681, 0.31029681, 0.31029681])
  • R
> wmonfromx(xd <- rnorm(10,0,1), prior = "laplace")
 [1] 0.3102968 0.3102968 0.3102968 0.3102968 0.3102968 0.3102968 0.3102968 0.3102968 0.3102968 0.3102968

wmonfromx(xd=rnorm(5, s = 1), prior = “laplace”, a = 0.5, tol = 1e-08, maxits = 20) [1] 0.9363989 0.9363989 0.9363989 0.4522184 0.4522184

vecbinsolv

zf = np.array([1, 2, 3])
# zf = 0
def fun(t, *args, **kwargs):
    c = kwargs.get('c', 0) 
    return [x**2+c for x in t]
tlo = 0
thi = 10
nits = 30
if isinstance(zf, int):
    nz = len(str(zf))
else:
    nz = len(zf)
nz
3
if isinstance(tlo, (int, float)):
    tlo = [tlo] * nz
tlo
[0, 0, 0]
if len(tlo) != nz:
    raise ValueError("Lower constraint has to be homogeneous or has the same length as #functions.")
if isinstance(thi, (int, float)):
    thi = [thi] * nz
thi
[10, 10, 10]
if len(thi) != nz:
    raise ValueError("Upper constraint has to be homogeneous or has the same length as #functions.")
c=2

for _ in range(nits):
    tmid = np.array([(lo + hi) / 2 for lo, hi in zip(tlo, thi)])
    if fun == ebayesthresh.cauchy_threshzero:
        fmid = fun(tmid, s=s,w=w)
    elif fun == ebayesthresh.laplace_threshzero:
        fmid = fun(tmid, s=s,w=w,a=a)
    else:
        fmid = fun(tmid)
    if isinstance(fmid, (list,np.ndarray)) and isinstance(zf, (list,np.ndarray)):
        indt = [f <= z for f, z in zip(fmid, zf)]
    else: 
        indt = fmid <= zf
    tlo = [tm if ind else lo for tm, lo, ind in zip(tmid, tlo, indt)]
    thi = [tm if not ind else hi for tm, hi, ind in zip(tmid, thi, indt)]
tmid
array([1.00000001, 1.41421356, 1.7320508 ])
fmid
[1.000000011175871, 2.0000000000315445, 2.999999989096492]
indt
[False, False, True]
tlo
[0.9999999962747097, 1.414213553071022, 1.7320508044213057]
thi
[1.0000000055879354, 1.4142135623842478, 1.7320508137345314]
tsol = [(lo + hi) / 2 for lo, hi in zip(tlo, thi)]
tsol
[1.0000000009313226, 1.414213557727635, 1.7320508090779185]

R코드

vecbinsolv <- function(zf, fun, tlo, thi, nits = 30, ...) {
#
#  Given a monotone function fun, and a vector of values
#   zf find a vector of numbers t such that f(t) = zf.
#   The solution is constrained to lie on the interval (tlo, thi)
#
#  The function fun may be a vector of increasing functions 
#
#  Present version is inefficient because separate calculations
#   are done for each element of z, and because bisections are done even
#   if the solution is outside the range supplied
#    
#  It is important that fun should work for vector arguments.
#   Additional arguments to fun can be passed through ...
#
#  Works by successive bisection, carrying out nits harmonic bisections
#   of the interval between tlo and thi
#
    nz <- length(zf)
    if(length(tlo)==1) tlo <- rep(tlo, nz)
    if(length(tlo)!=nz)
          stop(paste("Lower constraint has to be homogeneous",
                     "or has the same length as #functions."))
    if(length(thi)==1) thi <- rep(thi, nz)
    if(length(thi)!=nz)
          stop(paste("Upper constraint has to be homogeneous",
                     "or has the same length as #functions."))

#  carry out nits bisections
#
    for(jj in (1:nits)) {
        tmid <- (tlo + thi)/2
        fmid <- fun(tmid, ...)
        indt <- (fmid <= zf)
        tlo[indt] <- tmid[indt]
        thi[!indt] <- tmid[!indt]
    }
    tsol <- (tlo + thi)/2
    return(tsol)
}

결과

  • Python
zf = np.array([1, 2, 3])
def fun(t, *args, **kwargs):
    return [2*x for x in t]
tlo = 0
thi = 10
ebayesthresh.vecbinsolv(zf, fun, tlo, thi)
[0.5000000027939677, 1.0000000009313226, 1.4999999990686774]
def fun(t, *args, **kwargs):
    return [x**2 for x in t]
ebayesthresh.vecbinsolv(zf, fun, tlo, thi)
[1.0000000009313226, 1.414213557727635, 1.7320508090779185]
  • R
> zf <- c(1, 2, 3)
> fun <- function(x) 2*x
> tlo <- 0
> thi <- 10
> vecbinsolv(zf, fun, tlo, thi)
[1] 0.5 1.0 1.5
> fun <- function(x) x**2
> vecbinsolv(zf, fun, tlo, thi)
[1] 1.000000 1.414214 1.732051

tfromw

Given a single value or a vector of weights (i.e. prior probabilities that the parameter is nonzero) and sampling standard deviations (sd equals 1 for Cauchy prior), find the corresponding threshold(s) under the specified prior.

주어진 가중치 벡터 w와 s(표준 편차)에 대해 지정된 사전 분포를 사용하여 임계값 또는 해당 가중치에 대한 임계값 벡터 찾기. 만약 bayesfac=True이면 베이즈 요인 임계값을 찾고, 그렇지 않으면 사후 중앙값 임계값을 찾음. 만약 Laplace 사전 분포를 사용하는 경우, a는 역 스케일(즉, rate) 매개변수의 값 나옴.

Parameters:

  • w (array-like): 가중치 벡터
  • s (float): 표준 편차(default: 1)
  • prior (str): 사전 분포 (default: “laplace”)
  • bayesfac (bool): 베이즈 요인 임계값을 찾는지 여부 (default: False)
  • a (float): a < 20인 입력 값 (default: 0.5)
w = np.array([0.05, 0.1])
# w = 0.5
s = 1
prior = "laplace"
# prior = "c"
bayesfac = False
a = 0.5
w=np.array([0.05,0.1])
s = 1
prior = "laplace"
bayesfac = False
a = 0.5
universalthresh = True
pr = prior[0:1]
pr
'l'
if bayesfac:
    z = 1 / w - 2
    if pr == "l":
        if isinstance(s, (int, float, str, bool)) and len(w) >= len(str(s)):
            zz = z
        elif isinstance(s, (int, float, str, bool)) and len(w) <len(str(s)):
            zz = [z] * len(str(s))
        elif len(w) >= len(s):
            zz = z
        elif len(w) < len(str(s)):
            zz = [z] * len(s)
        tt = ebayesthresh.vecbinsolv(zz, ebayesthresh.beta_laplace, 0, 10, 30, s=s,w=w, a=a)
    elif pr == "c":
        tt = ebayesthresh.vecbinsolv(z, ebayesthresh.beta_cauchy, 0, 10, 30, w=w)

else:
    z = 0
    if pr == "l":
        if isinstance(s, (int, float, str, bool)) and not isinstance(w, (int, float, str, bool)):
            zz = np.array([0] * max(len(str(s)), len(w)))
        elif not isinstance(s, (int, float, str, bool)) and isinstance(w, (int, float, str, bool)):
            zz = np.array([0] * max(len(s), len(str(w))))
        elif isinstance(s, (int, float, str, bool)) and isinstance(w, (int, float, str, bool)):
            zz = np.array([0] * max(len(str(s)), len(str(w))))
        else:
            zz = [0] * max(len(s), len(w))
        tt = ebayesthresh.vecbinsolv(zz, ebayesthresh.laplace_threshzero, 0, s * (25 + s * a), 30, s=s, w=w, a=a)
    elif pr == "c":
        tt = ebayesthresh.vecbinsolv(z, ebayesthresh.cauchy_threshzero, 0, 10, 30, w=w)
tt
[3.1152117408346385, 2.8163059337530285]

R코드

tfromw <- function(w, s = 1, prior = "laplace", bayesfac = FALSE, a = 0.5) {
#
#  Given the vector of weights w and s (sd), find the threshold or
#   vector of thresholds corresponding to these weights, under the
#   specified prior.
#  If bayesfac=TRUE the Bayes factor thresholds are found, otherwise
#   the posterior median thresholds are found.
#  If the Laplace prior is used, a gives the value of the inverse scale
#   (i.e., rate) parameter
#
    pr <- substring(prior, 1, 1)
    if(bayesfac) {
        z <- 1/w - 2
        if(pr == "l"){ 
          if(length(w)>=length(s)) {
            zz <- z
          } else { zz <- rep(z, length(s)) }
          tt <- vecbinsolv(zz, beta.laplace, 0, 10, s = s, a = a)
        }
        if(pr == "c")
            tt <- vecbinsolv(z, beta.cauchy, 0, 10)
    }
    else {
      z <- 0
        if(pr == "l"){
          zz <- rep(0, max(length(s), length(w)))
                  
          # When x/s-s*a>25, laplace.threshzero has value
          #  close to 1/2; The boundary value of x can be
          #  treated as the upper bound for search.
          tt <- vecbinsolv(zz, laplace.threshzero, 0, s*(25+s*a),
                                   s = s, w = w, a = a)
        }
        if(pr == "c")
            tt <- vecbinsolv(z, cauchy.threshzero, 0, 10, w = w)
    }
    return(tt)
}

결과

  • Python
ebayesthresh.tfromw(np.array([0.05, 0.1]), s = 1)
[3.1152117408346385, 2.8163059337530285]
ebayesthresh.tfromw(np.array([0.05, 0.1]), prior = "cauchy", bayesfac = True)
[3.2596351904794574, 2.9597404645755887]
  • R
> tfromw(c(0.05, 0.1), s = 1)
[1] 3.115212 2.816306
> tfromw(c(0.05, 0.1), prior = "cauchy", bayesfac = TRUE)
[1] 3.259635 2.959740

tfromx

Given a vector of data and standard deviations (sd equals 1 for Cauchy prior), find the value or vector (heterogeneous sampling standard deviation with Laplace prior) of thresholds corresponding to the marginal maximum likelihood choice of weight.

데이터가 주어졌을때, 가중치의 한계 최대 우도로 임계값 찾는 함수

x=np.array([0.05,0.1]),
s=1
prior="laplace"
bayesfac=False
a=0.5
universalthresh=True
pr = prior[0:1]
if pr == "c":
    s = 1
if pr == "l" and np.isnan(a):
    wa = ebayesthresh.wandafromx(x, s, universalthresh)
    w = wa['w']
    a = wa['a']
else:
    w = ebayesthresh.wfromx(x, s, prior=prior, a=a)
ebayesthresh.tfromw(w, s, prior=prior, bayesfac=bayesfac, a=a)
[1.1874362826347351e-08, 1.1874362826347351e-08, 1.1874362826347351e-08]

R코드

tfromx <- function (x, s = 1, prior = "laplace", bayesfac = FALSE, a = 0.5,
                    universalthresh = TRUE) {
#
#  Given the data x, the prior, and any other parameters, find the
#   threshold corresponding to the marginal maximum likelihood
#   estimator of the mixing weight.
#
  pr <- substring(prior, 1, 1)
  if(pr == "c")
    s = 1
  if ( pr=="l" && is.na (a) ) {
    wa <-  wandafromx(x, s, universalthresh)
    w  <-  wa$w
    a  <-  wa$a 
  } else {
    w <- wfromx(x, s, prior = prior, a = a)
  }
  return(tfromw(w, s, prior = prior, bayesfac = bayesfac, a = a))
}

결과

  • Python
ebayesthresh.tfromx(x=np.array([0.05,0.1]), s = 1, prior = "laplace", bayesfac = False, a = 0.5, universalthresh = True)
1.1774100337643176
ebayesthresh.tfromx(x = np.concatenate([np.random.normal(0, 1, 90), np.random.normal(5, 1, 10)]), prior = "cauchy")
2.2073050821200013
  • R
> tfromx(x=c(0.05,0.1), s = 1, prior = "laplace", bayesfac = FALSE, a = 0.5,universalthresh = TRUE)
[1] 1.17741
> tfromx(x=c(rnorm(90, mean=0, sd=1), rnorm(10, mean=5, sd=1)), prior = "cauchy")
[1] 2.301196

wpost_laplace

Calculate the posterior weight for non-zero effect

  • 0이 아닌 효과에 대한 사후 가중치 계산
= 0.5
x = np.array([-2,1,0,-4,8,50])
s = 1
a = 0.5
laplace_beta = ebayesthresh.beta_laplace(x, s, a)
laplace_beta
array([ 8.89852030e-001, -3.80041717e-001, -5.61817772e-001,
        2.85459467e+002,  1.02698062e+012,  6.34453954e+265])
1 - (1 - w) / (1 + w * laplace_beta)
array([1., 1., 1., 1., 1., 1.])

R코드

wpost.laplace <- function(w, x, s = 1, a = 0.5)
#
#  Calculate the posterior weight for non-zero effect
#
  1 - (1 - w)/(1 + w * beta.laplace(x, s, a))

결과

  • Python
ebayesthresh.wpost_laplace(0.5,np.array([-2,1,0,-4,8,50]))
array([0.65396152, 0.38270015, 0.30467782, 0.99652125, 1.        ,
       1.        ])
  • R

이베이즈 깃헙에 있는데 R 패키지에는 없음.

zetafromx

Description

Suppose a sequence of data has underlying mean vector with elements \(\theta_i\). Given the sequence of data, and a vector of scale factors cs and a lower limit pilo, this routine finds the marginal maximum likelihood estimate of the parameter zeta such that the prior probability of \(\theta_i\) being nonzero is of the form median(pilo, zeta*cs, 1).

파라메터 제타의 최대 우도 추정치 계산

xd = np.array([-2,1,0,-4,8,50])
cs = np.array([2,3,5,6,1,-1])
pilo = None
prior="laplace"
# priir="c"
a=0.5
pr = prior[0:1]
pr
'l'
nx = len(xd)
nx
6
if pilo is None:
    pilo = ebayesthresh.wfromt(np.sqrt(2 * np.log(len(xd))), prior=prior, a=a)
pilo
0.4121150272751926
if pr == "l":
    beta = ebayesthresh.beta_laplace(xd, a=a)
elif pr == "c":
    beta = ebayesthresh.beta_cauchy(xd)
beta
array([ 8.89852030e-001, -3.80041717e-001, -5.61817772e-001,
        2.85459467e+002,  1.02698062e+012,  6.34453954e+265])
zs1 = pilo / cs
zs1
array([ 0.20605751,  0.13737168,  0.08242301,  0.06868584,  0.41211503,
       -0.41211503])
zs2 = 1 / cs
zs2
array([ 0.5       ,  0.33333333,  0.2       ,  0.16666667,  1.        ,
       -1.        ])
zj = np.sort(np.unique(np.concatenate((zs1, zs2))))
zj
array([-1.        , -0.41211503,  0.06868584,  0.08242301,  0.13737168,
        0.16666667,  0.2       ,  0.20605751,  0.33333333,  0.41211503,
        0.5       ,  1.        ])
cb = cs * beta
cb
array([ 1.77970406e+000, -1.14012515e+000, -2.80908886e+000,
        1.71275680e+003,  1.02698062e+012, -6.34453954e+265])
mz = len(zj)
mz
12
zlmax = None
zlmax
lmin = np.zeros(mz, dtype=bool)
lmin
array([False, False, False, False, False, False, False, False, False,
       False, False, False])
for j in range(1, mz - 1):
    ze = zj[j]
    cbil = cb[(ze > zs1) & (ze <= zs2)]
    ld = np.sum(cbil / (1 + ze * cbil))
    if ld <= 0:
        cbir = cb[(ze >= zs1) & (ze < zs2)]
        rd = np.sum(cbir / (1 + ze * cbir))
        lmin[j] = rd >= 0
cbir = cb[zj[0] == zs1]
cbir
array([], dtype=float64)
rd = np.sum(cbir / (1 + zj[0] * cbir))
rd
0.0
if rd > 0:
    lmin[0] = True
else:
    zlmax = zj[0]
cbil = cb[zj[mz - 1] == zs2]
cbil
array([1.02698062e+12])
ld = np.sum(cbil / (1 + zj[mz - 1] * cbil))
ld
0.9999999999990262
if ld < 0:
    lmin[mz - 1] = True
else:
    zlmax = zj[mz - 1]
zlmin = zj[lmin]
zlmin
array([-0.41211503,  0.06868584,  0.33333333])
nlmin = len(zlmin)
nlmin
3
for j in range(1, nlmin):
    zlo = zlmin[j - 1]
    zhi = zlmin[j]
    ze = (zlo + zhi) / 2
    zstep = (zhi - zlo) / 2
    for nit in range(10):
        cbi = cb[(ze >= zs1) & (ze <= zs2)]
        likd = np.sum(cbi / (1 + ze * cbi))
        zstep /= 2
        ze += zstep * np.sign(likd)
    zlmax = np.append(zlmax, ze)
nlmax = len(zlmax)
nlmax
3
zm = np.empty(nlmax)
zm
array([  0.82238932,   0.56239524, 267.81623818])
for j in range(nlmax):
    pz = np.maximum(zs1, np.minimum(zlmax[j], zs2))
    zm[j] = np.sum(np.log(1 + cb * pz))
pz
array([ 0.20605751,  0.15591096,  0.15591096,  0.15591096,  0.41211503,
       -0.41211503])
zm
array([643.79470765, 642.57221213, 643.04900807])
zeta = zlmax[zm == np.max(zm)]
zeta
array([1.])
zeta = np.min(zeta)
zeta
1.0
w = np.minimum(1, np.maximum(zeta * cs, pilo))
w
array([1.        , 1.        , 1.        , 1.        , 1.        ,
       0.41211503])

R 코드

zetafromx <- function(xd, cs, pilo = NA, prior = "laplace", a = 0.5) {
#
#  Given a sequence xd, a vector of scale factors cs and
#   a lower limit pilo, find the marginal maximum likelihood
#   estimate of the parameter zeta such that the prior prob
#   is of the form median( pilo, zeta*cs, 1)
#
#  If pilo=NA then it is calculated according to the sample size
#   to corrrespond to the universal threshold
#  
#  Find the beta values and the minimum weight if necessary
#
#  Current version allows for standard deviation of 1 only.
#
    pr <- substring(prior, 1, 1)
    nx <- length(xd)
    if (is.na(pilo))
          pilo <- wfromt(sqrt(2 * log(length(xd))), prior=prior, a=a)
    if(pr == "l")
      beta <- beta.laplace(xd, a=a)
    if(pr == "c") beta <- beta.cauchy(xd)
#
#  Find jump points zj in derivative of log likelihood as function
#    of z, and other preliminary calculations
#
    zs1 <- pilo/cs
    zs2 <- 1/cs
    zj <- sort(unique(c(zs1, zs2)))
    cb <- cs * beta
    mz <- length(zj)
    zlmax <- NULL
        
#  Find left and right derivatives at each zj and check which are
#  local minima Check internal zj first
    lmin <- rep(FALSE, mz)
    for(j in (2:(mz - 1))) {
        ze <- zj[j]
        cbil <- cb[(ze > zs1) & (ze <= zs2)]
        ld <- sum(cbil/(1 + ze * cbil))
        if(ld <= 0) {
            cbir <- cb[(ze >= zs1) & (ze < zs2)]
            rd <- sum(cbir/(1 + ze * cbir))
            lmin[j] <- (rd >= 0)
        }
    }
#
#  Deal with the two end points in turn, finding right deriv
#   at lower end and left deriv at upper
#
#  In each case the corresponding end point is either a local min
#   or a local max depending on the sign of the relevant deriv
#
    cbir <- cb[zj[1] == zs1]
    rd <- sum(cbir/(1 + zj[1] * cbir))
    if(rd > 0) lmin[1] <- TRUE else zlmax <- zj[1]
    cbil <- cb[zj[mz] == zs2]
    ld <- sum(cbil/(1 + zj[mz] * cbil))
    if(ld < 0) lmin[mz] <- TRUE else zlmax <- zj[mz]
        
#  Flag all local minima and do a binary search between them to find
#  the local maxima
#
    zlmin <- zj[lmin]
    nlmin <- length(zlmin)
    if(nlmin >= 2) for(j in (2:nlmin)) {
            zlo <- zlmin[j - 1]
            zhi <- zlmin[j]
            ze <- (zlo + zhi)/2
            zstep <- (zhi - zlo)/2
            for(nit in (1:10)) {
                cbi <- cb[(ze >= zs1) & (ze <= zs2)]
                likd <- sum(cbi/(1 + ze * cbi))
                zstep <- zstep/2
                ze <- ze + zstep * sign(likd)
            }
            zlmax <- c(zlmax, ze)
        }
#
#  Evaluate all local maxima and find global max; use smaller value
#   if there is an exact tie for the global maximum.
#
    nlmax <- length(zlmax)
    zm <- rep(NA, nlmax)
    for(j in (1:nlmax)) {
        pz <- pmax(zs1, pmin(zlmax[j], zs2))
        zm[j] <- sum(log(1 + cb * pz))
    }
    zeta <- zlmax[zm == max(zm)]
    zeta <- min(zeta)
    w <- pmin(1, pmax(zeta*cs, pilo)) 
    return(list(zeta=zeta, w=w, cs=cs, pilo=pilo))
}

결과

  • Python
ebayesthresh.zetafromx(np.array([-2,1,0,-4,8,50]),np.array([2,3,5,6,1,-1]))
{'zeta': 1.0,
 'w': array([1.        , 1.        , 1.        , 1.        , 1.        ,
        0.41211503]),
 'cs': array([ 2,  3,  5,  6,  1, -1]),
 'pilo': 0.4121150272751926}
  • R
> zetafromx(c(-2,1,0,-4,8,50),c(2,3,5,6,1,-1))
$zeta
[1] 1

$w
[1] 1.000000 1.000000 1.000000 1.000000 1.000000 0.412115

$cs
[1]  2  3  5  6  1 -1

$pilo
[1] 0.412115

Layer

import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
def make_Psi(T):
    W = np.zeros((T,T))
    for i in range(T):
        for j in range(T):
            if i==j :
                W[i,j] = 0
            elif np.abs(i-j) <= 1 : 
                W[i,j] = 1
    d = np.array(W.sum(axis=1))
    D = np.diag(d)
    L = np.array(np.diag(1/np.sqrt(d)) @ (D-W) @ np.diag(1/np.sqrt(d)))
    lamb, Psi = np.linalg.eigh(L)
    return Psi
T = 100
t = np.arange(T)/T * 10
y_true = 3*np.sin(0.5*t) + 1.2*np.sin(1.0*t) + 0.5*np.sin(1.2*t) 
y = y_true + np.random.normal(size=T)
plt.figure(figsize=(10,6))
plt.plot(t,y_true)

plt.plot(t,y,'o')
plt.plot(t,y_true,'--')

f = np.array(y)
if len(f.shape)==1: f = f.reshape(-1,1)
T,N = f.shape
Psi = make_Psi(T)
fbar = Psi.T @ f # apply dft 
plt.plot(t,fbar**2) # periodogram 

fbar_threshed = ebayesthresh.ebayesthresh(fbar[:,0])
# fbar_threshed = np.stack([ebayesthresh(FloatVector(fbar[:,i])) for i in range(N)],axis=1)
plt.plot((fbar**2)) # periodogram 
plt.plot((fbar_threshed**2)) 

plt.plot((fbar**2)[20:80]) # periodogram 
plt.plot((fbar_threshed**2)[20:80]) 

yhat = Psi @ fbar_threshed # inverse dft
plt.figure(figsize=(10,6))
plt.plot(t,y,'.')
plt.plot(t,y_true,'--')
plt.plot(t,yhat)

class ebayesthresh_nn(torch.nn.Module):
    def __init__(self):
        super().__init__()
    def forward(self,input):
        return ebayesthresh.ebayesthresh(input)
thresh_layer = ebayesthresh_nn()
plt.plot(fbar[:,0])

plt.plot(thresh_layer(fbar[:,0]))