Take a look at this python function:
def py_fun(i,N,step):
res=0.0
while i<N:
res+=i
i+=step
return res
and use ipython-magic for its time:
In [11]: %timeit py_fun(0.0,1.0e5,1.0)
10 loops, best of 3: 25.4 ms per loop
- . , cython /cythonizing :
%load_ext Cython
%%cython
def cy_fun(i,N,step):
res=0.0
while i<N:
res+=i
i+=step
return res
50%:
In [13]: %timeit cy_fun(0.0,1.0e5,1.0)
100 loops, best of 3: 10.9 ms per loop
c-, , / ceval
, , :
static PyObject *__pyx_pf_4test_cy_fun(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_i, PyObject *__pyx_v_N, PyObject *__pyx_v_step) {
...
while (1) {
__pyx_t_1 = PyObject_RichCompare(__pyx_v_i, __pyx_v_N, Py_LT);
...
__pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_1);
...
if (!__pyx_t_2) break;
...
__pyx_t_1 = PyNumber_InPlaceAdd(__pyx_v_res, __pyx_v_i);
...
__pyx_t_1 = PyNumber_InPlaceAdd(__pyx_v_i, __pyx_v_step);
}
...
return __pyx_r;
}
, cython python, float c-style, PyNumber_InPlaceAdd
, (integer, float, something else?) , .
cython float:
%%cython
def c_fun(double i,double N, double step):
cdef double res=0.0
while i<N:
res+=i
i+=step
return res
i
, N
, step
res
c-style double python. PyNumber_InPlaceAdd
, +
- double
:
static PyObject *__pyx_pf_4test_c_fun(CYTHON_UNUSED PyObject *__pyx_self, double __pyx_v_i, double __pyx_v_N, double __pyx_v_step) {
...
__pyx_v_res = 0.0;
...
while (1) {
__pyx_t_1 = ((__pyx_v_i < __pyx_v_N) != 0);
if (!__pyx_t_1) break;
__pyx_v_res = (__pyx_v_res + __pyx_v_i);
__pyx_v_i = (__pyx_v_i + __pyx_v_step);
}
...
return __pyx_r;
}
:
In [15]: %timeit c_fun(0.0,1.0e5,1.0)
10000 loops, best of 3: 148 µs per loop
100 , .
, , + - ( 100- ) : 50% (15 ) "" 10 .
, "" : Float , , , / .
, /:
%%cython
cdef class MutableFloat:
cdef double x
def __cinit__(self, x):
self.x=x
def __iadd__(self, MutableFloat other):
self.x=self.x+other.x
return self
def __lt__(MutableFloat self, MutableFloat other):
return self.x<other.x
def __gt__(MutableFloat self, MutableFloat other):
return self.x>other.x
def __repr__(self):
return str(self.x)
( , ):
def py_fun(i,N,step,acc):
while i<N:
acc+=i
i+=step
return acc
%timeit py_fun(1.0, 5e5,1.0,0.0)
30.2 ms ± 1.12 ms per loop (mean ± std. dev. of 7 runs, 10 loops each
%timeit cy_fun(1.0, 5e5,1.0,0.0)
16.9 ms ± 612 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
%timeit i,N,step,acc=MutableFloat(1.0),MutableFloat(5e5),MutableFloat(1
...: .0),MutableFloat(0.0); py_fun(i,N,step,acc)
23 ms ± 254 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
%timeit i,N,step,acc=MutableFloat(1.0),MutableFloat(5e5),MutableFloat(1
...: .0),MutableFloat(0.0); cy_fun(i,N,step,acc)
11 ms ± 66.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
i
, !
immutable mutable
py_fun 30ms 23ms
cy_fun 17ms 11ms
, 7 ( 20%) / ( , ), 33% .
:
, , : , . std::vector<>
( array.array
), python, , .
python:
#list.py
N=int(1e7)
lst=[0]*int(N)
for i in range(N):
lst[i]=i
print(sum(lst))
N=int(1e7)
b=bytearray(8*N)
m=memoryview(b).cast('L')
for i in range(N):
m[i]=i
print(sum(m))
1e7
, Python-, - c-int, .
: (D) :
valgrind --tool=cachegrind python list.py
...
D1 misses: 33,964,276 ( 27,473,138 rd + 6,491,138 wr)
valgrind --tool=cachegrind python bytearray.py
...
D1 misses: 4,796,626 ( 2,140,357 rd + 2,656,269 wr)
, python 8 . , python 8 (, 32 , 4), (, 100% , , , - , ), - , , c- bytearray
.