Create a Numpy array without listing the array

Starting from this:

x = range(30,60,2)[::-1];
x = np.asarray(x); x

array([58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, 30])

Create an array like this: (Note that the first element is repeated). But if I can get it faster without repeating the first element, I can the np.hstackfirst element.

[[58 58 56 54 52]
 [56 56 54 52 50]
 [54 54 52 50 48]
 [52 52 50 48 46]
 [50 50 48 46 44]
 [48 48 46 44 42]
 [46 46 44 42 40]
 [44 44 42 40 38]
 [42 42 40 38 36]
 [40 40 38 36 34]
 [38 38 36 34 32]
 [36 36 34 32 30]
 [34 34 32 30 None]
 [32 32 30 None None]
 [30 30 None None None]]

Below is the code you need faster, without the 'for' loop and enumeration.

arr = np.empty((0,5), int)

for i,e in enumerate(x):
    arr2 = np.hstack((x[i], x[i:i+4], np.asarray([None]*5)))[:5]
    arr  = np.vstack((arr,arr2))
+1
source share
4 answers

Approach No. 1

Here a vector approach is used using NumPy broadcasting-

N = 4 # width factor
x_ext = np.concatenate((x,[None]*(N-1)))
arr2D = x_ext[np.arange(N) + np.arange(x_ext.size-N+1)[:,None]]
out = np.column_stack((x,arr2D))

Approach # 2

Here is another using hankel-

from scipy.linalg import hankel

N = 4 # width factor
x_ext = np.concatenate((x,[None]*(N-1)))
out = np.column_stack((x,hankel(x_ext[:4], x_ext[3:]).T))

Runtime test

@Aaron benchmarking script , , script -

upper_limit = 58 # We will edit this to vary the dataset sizes

print "Timings are : "
t = time()
for _ in range(1000):  #1000 iterations of @Aaron soln.
    width = 3
    x = np.array(range(upper_limit,28,-2) + [float('nan')]*width)
    arr = np.empty([len(x)-width, width+2])
    arr[:,0] = x[:len(x)-width]
    for i in xrange(len(x)-width): 
        arr[i,1:] = x[i:i+width+1]
print(time()-t)

t = time()
for _ in range(1000): 
    N = 4 # width factor
    x_ext = np.array(range(upper_limit,28,-2) + [float('nan')]*(N-1))
    arr2D = x_ext[np.arange(N) + np.arange(x_ext.size-N+1)[:,None]]
    out = np.column_stack((x_ext[:len(x_ext)-N+1],arr2D))
print(time()-t)

№1 (upper_limit = 58):

Timings are : 
0.0316879749298
0.0322730541229

№2 (upper_limit = 1058):

Timings are : 
0.680443048477
0.124517917633

№ 3 (upper_limit = 5058):

Timings are : 
3.28129291534
0.47504901886
+5

, _stack() ...

edit: @Divakar ...

import numpy as np
from time import time

t = time()
for _ in range(1000):  #1000 iterations of my soln.
    width = 3
    x = np.array(range(58,28,-2) + [float('nan')]*width)
    arr = np.empty([len(x)-width, width+2])
    arr[:,0] = x[:len(x)-width]
    for i in xrange(len(x)-width): 
        arr[i,1:] = x[i:i+width+1]
print(time()-t)

t = time()
for _ in range(1000):  #1000 iterations of OP code
    x = range(30,60,2)[::-1];
    x = np.asarray(x)
    arr = np.empty((0,5), int)
    for i,e in enumerate(x):
        arr2 = np.hstack((x[i], x[i:i+4], np.asarray([None]*5)))[:5]
        arr  = np.vstack((arr,arr2))
print(time()-t)

t = time()
for _ in range(1000): 
    x = np.array(range(58,28,-2))
    N = 4 # width factor
    x_ext = np.hstack((x,[None]*(N-1)))
    arr2D = x_ext[np.arange(N) + np.arange(x_ext.size-N+1)[:,None]]
    out = np.column_stack((x,arr2D))
print(time()-t)

:

>>> runfile('...temp.py', wdir='...')
0.0160000324249
0.374000072479
0.0319998264313
>>> 
+3

Divaker padded x

N = 4 # width factor
x_ext = np.concatenate((x,[None]*(N-1)))

, None ( ) np.nan ( float) .

:

idx = np.r_[0,np.arange(N)] + np.arange(x_ext.size-N+1)[:,None]

array([[ 0,  0,  1,  2,  3],
       [ 1,  1,  2,  3,  4],
       [ 2,  2,  3,  4,  5],
       [ 3,  3,  4,  5,  6],
       [ 4,  4,  5,  6,  7],
       ...

x_ext[idx]

================

.

as_strided = np.lib.stride_tricks.as_strided
arr2D = as_strided(x_ext, shape=(15,4), str‌​ides=(4,4))

as_strided. shape - - ( ) (x.shape[0],N).

In [177]: x_ext.strides
Out[177]: (4,)

1d 4 . 2d 3 , 12 - 3 * 4 (3 ).

In [181]: x_ext.reshape(6,3).strides
Out[181]: (12, 4)

strides=(4,4) , 4 , .

as_strided(x_ext,shape=(8,4),strides=(8,4))

2

array([[58, 56, 54, 52],
       [54, 52, 50, 48],
       [50, 48, 46, 44],
       [46, 44, 42, 40],
       ....

as_strided , , . , None . , , C, .

as_strided - ( ). . column_stack x , .

+3

, np.roll() :

import numpy as np
import timeit as ti
import numpy.matlib

x = range(30,60,2)[::-1];
x = np.asarray(x);

def sol1():
    # Your solution, for comparison
    arr = np.empty((0,5), int)

    for i,e in enumerate(x):
        arr2 = np.hstack((x[i], x[i:i+4], np.asarray([None]*5)))[:5]
        arr  = np.vstack((arr,arr2))
    return arr

def sol2():
    # My proposal
    x2 = np.hstack((x, [None]*3))
    mat = np.matlib.repmat(x2, 5, 1)
    for i in range(3):
        mat[i+2, :] = np.roll(mat[i+2, :], -(i+1))
    return mat[:,:-3].T


print(ti.timeit(sol1, number=100))
print(ti.timeit(sol2, number=100))

:

0.026760146000015084
0.0038611710006080102

It uses a for loop, but it only iterates over the shorter axis. In addition, it is not difficult to adapt this code for other configurations instead of using hardcoded numbers.

+2
source

Source: https://habr.com/ru/post/1653029/


All Articles