No speed increase from Keaton again?

Question

No speed increase from Keaton again?

The following is my cython code, whose purpose is to create a boot file.

def boots(int trial, np.ndarray[double, ndim=2] empirical, np.ndarray[double, ndim=2] expected):
    cdef int length = len(empirical)
    cdef np.ndarray[double, ndim=2] ret = np.empty((trial, 100))
    cdef np.ndarray[long] choices
    cdef np.ndarray[double] m
    cdef np.ndarray[double] n
    cdef long o
    cdef int i
    cdef int j

    for i in range(trial):
        choices = np.random.randint(0, length, length)

        m = np.zeros(100)
        n = np.zeros(100)
        for j in range(length):
            o = choices[j]
            m.__iadd__(empirical[o])
            n.__iadd__(expected[o])
        empirical_boot = m / length
        expected_boot = n / length

        ret[i] = empirical_boot / expected_boot - 1
    ret.sort(axis=0)
    return ret[int(trial * 0.025)].reshape((10,10)), ret[int(trial * 0.975)].reshape((10,10))


# test code
empirical = np.ones((40000, 100))
expected = np.ones((40000, 100))
%prun -l 10 boots(100, empirical,expected)

It takes 11 seconds in pure python with fantastic indexing, and no matter how much I tuned in to cython, it stays the same.

np.random.randint(0, 40000, 40000) takes 1 ms, so 100x takes 0.1 s.

np.sort(np.ones((40000, 100)) takes 0.2 s.

Therefore, I believe that there should be ways to improve boots.

+4

performance python cython

colinfang Feb 17 '14 at 18:35

source share

1 answer

IanH · Accepted Answer · 2014-02-17T20:51:18+0000

, , , Cython . , , NumPy, - Python. , , , Python, -. , . , .

from numpy cimport ndarray as ar
from numpy cimport int32_t as int32
from numpy import empty
from numpy.random import randint
cimport cython
ctypedef int

# Notice the use of these decorators to tell Cython to turn off
# some of the checking it does when accessing arrays.
@cython.boundscheck(False)
@cython.wraparound(False)
def boots(int32 trial, ar[double, ndim=2] empirical, ar[double, ndim=2] expected):
    cdef:
        int32 length = empirical.shape[0], i, j, k
        int32 o
        ar[double, ndim=2] ret = empty((trial, 100))
        ar[int32] choices
        ar[double] m = empty(100), n = empty(100)
    for i in range(trial):
        # Still calling Python on this line
        choices = randint(0, length, length)
        # It was faster to compute m and n separately.
        # I suspect that has to do with cache management.
        # Instead of allocating new arrays, I just filled the old ones with the new values.
        o = choices[0]
        for k in range(100):
            m[k] = empirical[o,k]
        for j in range(1, length):
            o = choices[j]
            for k in range(100):
                m[k] += empirical[o,k]
        o = choices[0]
        for k in range(100):
            n[k] = expected[o,k]
        for j in range(1, length):
            o = choices[j]
            for k in range(100):
                n[k] += expected[o,k]
        # Here I simplified some of the math and got rid of temporary arrays
        for k in range(100):
            ret[i,k] = m[k] / n[k] - 1.
    ret.sort(axis=0)
    return ret[int(trial * 0.025)].reshape((10,10)), ret[int(trial * 0.975)].reshape((10,10))

, Python, Cython html , , Python. . , , , cython. IPython, --annotate Cython.

C.

No speed increase from Keaton again?

More articles: