Nested loop acceleration

Question

Nested loop acceleration

I am writing a simulation for a wireless network in python using numpy and cython, where, suppose there are several nodes no_nodesrandomly scattered on the 2d plane, which send some signals and their respective receivers, again randomly scattered on the 2d plane. Each node transmission causes a signal that I call output(each can produce an output signal of different lengths).

What I want to do is summarize these outputs from each node to one large waveform that each receiver will input for demodulation, etc. Now two key points:

Transmitters are sent asynchronously, so start_clockand end_clockshould be kept for each transmission node, in order to properly summarize corresponding signals
the output of the jtransmitting node will be weakened before it is received by the inode according to the functionattenuate(i,j)

So here is the code:

#create empty 2d array (no_rx_nodes x no_samples for each waveform)
waveforms = np.zeros((no_nodes, max(end_clock))) 

for i in range(no_nodes): #calculate the waveform for each receiver
    for j in range(no_nodes): #sum the waveforms produced by each transmitter
        waveforms[i, start_clock[j]:end_clock[j]] += output[j,:] * attenuate(i,j)
return waveforms

Some comments about this:

output[j, :] is the output of the transmitter j
waveforms[i,:] is the waveform received by receiver i

, , . ( 10 ^ 6 ), cython, - (, 5-10 , ). , - , , , (, , , , ).

+4

python numpy for-loop slice

user113478 09 '14 13:14

3

jtaylor · Answer 1 · 2014-05-09T17:20:39+0000

3 /, 2-4 . , (numexpr ):

for i in range(no_nodes):
    for j in range(no_nodes):
        # should be chosen so all operands fit in the (next-to-)last level cache
        # first level is normally too small to be usable due to python overhead
        s  = 15000 
        a = attenuation[i,j]
        o = output[j]
        w = waveforms[i]
        for k in range(0, w.size, s): 
            u = min(k + s, w.size)
            w[k:u] += o[k:u] * a
        # or: numexpr.evaluate("w + o * a", out=w)

float32 float64 .

, , .

Jaime · Answer 2 · 2014-05-09T16:49:34+0000

, , , , , . , . :

def no_buffer(output, attenuate):
    waveforms = np.zeros_like(output)
    for i in xrange(len(output)):
        for j in xrange(len(output)):
            waveforms[i,:] += output[j, :] * attenuate[i, j]

    return waveforms

def with_buffer(output, attenuate):
    waveforms = np.zeros_like(output)
    buffer_arr = np.empty_like(output[0])
    for i in xrange(len(output)):
        for j in xrange(len(output)):
            np.multiply(output[j, :], attenuate[i, j], out=buffer_arr)
            np.add(waveforms[i, :], buffer_arr, out=waveforms[i, :])

    return waveforms

o = np.random.rand(20, 1e6)
a = np.random.rand(20, 20)

In [17]: np.allclose(no_buffer(o, a), with_buffer(o, a))
Out[17]: True

In [18]: %timeit no_buffer(o, a)
1 loops, best of 3: 2.3 s per loop

In [19]: %timeit with_buffer(o, a)
1 loops, best of 3: 1.57 s per loop

, .

, , - , BLAS . , MKL:

In [21]: np.allclose(with_buffer(o, a), np.dot(o.T, a.T).T)
Out[21]: True

In [22]: %timeit np.dot(o.T, a.T).T
10 loops, best of 3: 123 ms per loop

user113478 · Answer 3 · 2014-05-09T16:28:18+0000

, , . , , , for. , 3 . , :

import numpy as np
import time

def calc(no_nodes):

    output = np.random.rand(no_nodes, 7e5) #some random data, 7e5 samples here
    attenuate= np.random.rand(no_nodes,no_nodes) #some random data
    start_time = time.time()
    output_per_node = np.zeros((no_nodes,no_nodes,7e5))
    output_per_node += output[None, :, :]
    data = attenuate[:,:,None] * output_per_node
    waveforms = np.sum(data, axis=1)
    end_time = time.time()
    print end_time - start_time
    return waveforms

:

def calc1(no_nodes):
    output = np.random.rand(no_nodes, 7e5)
    attenuation = np.random.rand(no_nodes,no_nodes)
    waveforms = np.zeros((no_nodes, 7e5))
    start_time = time.time()
    for i in range(no_nodes):
        for j in range(no_nodes):
            waveforms[i] += output[j] * attenuation[i,j]
    print time.time() - start_time
    return waveforms

? , Numpy , . , , - . cython, ( ) , , . , ? : no_nodes = 10

, , ipython , , ipynb, html :

IPython notebook html ( )
IPython .ipynb ( )

.

Nested loop acceleration

More articles: