Optimize loop for changepoint test

Question

Optimize loop for changepoint test

I am trying to write a simple point finding tool in Python. The loglike (xs) function below returns the maximized logarithmic likelihood for a regular xs pattern. The most_probable_cp (xs) function passes through each point in the middle of ~ 75% of xs and uses the likelihood coefficient to find the most likely point of change in xs.

I use binary segmentation and I load to get critical values for the likelihood ratio, so I will need to call most_probable_cp () thousands of times. Is there any way to speed it up? Will Keaton help at all? I have never used it.

import numpy as np

def loglike(xs):
    n = len(xs)
    mean = np.sum(xs)/n
    sigSq = np.sum((xs - mean)**2)/n
    return -0.5*n*np.log(2*np.pi*sigSq) - 0.5*n


def most_probable_cp(xs, left=None, right=None):
    """
    Finds the most probable changepoint location and corresponding likelihood for xs[left:right]
    """
    if left is None:
        left = 0

    if right is None:
        right = len(xs)

    OFFSETPCT = 0.125
    MINNOBS = 12

    ys = xs[left:right]
    offset = min(int(len(ys)*OFFSETPCT), MINNOBS)
    tLeft, tRight = left + offset, right - offset
    if tRight <= tLeft:
        raise ValueError("left and right are too close together.")

    maxLike = -1e9
    cp = None
    dataLike = loglike(ys)
    # Bottleneck is below.
    for t in xrange(tLeft, tRight):
        profLike = loglike(xs[left:t]) + loglike(xs[t:right])
        lr = 2*(profLike - dataLike)
        if lr > maxLike:
            cp = t
            maxLike = lr

    return cp, maxLike

+4

python numpy statistics

hahdawg Mar 31 '14 at 13:47

source share

2 answers

, : , , " " .

, , - - np.sum. -0.5*n*np.log(2*np.pi*sigSq) - 0.5*n n, mean sigSq ( stddev ).

, np.std. , .

, proflike, lr , - ( 2 * 0.5 * , )

HLearn .

: , , , , . - np.log. fencepost, :

rcnt = n
rsum = np.sum(arr)
rssq = np.sum(arr**2)
lcnt = 0
lsum = 0
lssq = 0

maxlike = -1e9 # or -Inf ideally
cp = -1

for i in arr: # arr is length n
    lcnt += 1
    lsum += i
    lssq += i*i
    lmean = lsum/lcnt
    lvar = ((lssq - lmean**2)/(lcnt)) 
    loglike_l = lcnt*np.log(2*np.pi*lvar) - lcnt

    rcnt -= 1
    rsum -= i
    rssq -= i*i
    rmean = rsum/rcnt
    rvar = ((rssq - rmean**2)/(rcnt)) 
    loglike_r = rcnt*np.log(2*np.pi*rvar) - rcnt

    loglike_total = loglike_l + loglike_r

    if maxlike < loglike_total:
        cp = lcnt
        maxlike = loglike_total

+1

nimish 01 . '14 16:49

Davidmh · Accepted Answer · 2014-03-31T14:04:34+0000

-, Numpy. , .

def loglike(xs):
    n = len(xs)
    return -0.5 * n * np.log(2 * np.pi * np.std(xs)) - 0.5 * n

, nanstd, . , np.log math.log, , xs - , xs.std() . , , , .

Edit

loglike python -m cProfile -o output yourprogram.py; runsnake output, , ( 80%) np.std. . , - bottleneck.nanstd.

import bottleneck as bn

def loglike(xs):
    n = len(xs)
    return -0.5 * n * np.log(2 * np.pi * bn.nanstd(xs)) - 0.5 * n

8 , 30% . len 5%, . np.log np.pi , .

return -0.5 * n * (math.log(2 * math.pi * bn.nanstd(xs)) - 1)

10% - :

factor = math.log(2*math.pi)

def loglike(xs):
    n = len(xs)
    return -0.5 * n * (factor + math.log(bn.nanstd(xs)) - 1)

2

, bn.nanstd . std, _ = bn.func.nansum_selector(xs, axis=0) bn.nanstd func.nanstd_1d_float64_axisNone, dtype.

, , Python. , , , , Cython , Python , .

Optimize loop for changepoint test

More articles: