Unique random number selection with Numpy

I need to create an array of size 10,000 x 50, in which each row contains an ascending sequence of random numbers between 1 and 365, for example:

[[  4  11  14 ..., 355 360 364]
 [  2  13  15 ..., 356 361 361]
 [  4  12  18 ..., 356 361 365]
 ..., 
 [  6   9  17 ..., 356 362 364]
 [  1  10  19 ..., 352 357 360]
 [  1   9  17 ..., 356 358 364]]

The only way I decided to do this is with an iterator:

sample_dates = np.array([np.sort(np.random.choice(365, 50, replace=False)) for _ in range(10000)])

which works, but rather slow (~ 0.33 seconds to run), and I will do it thousands of times). Is there a faster way to do this?

EDIT: from what I can say, the most expensive part of this solution is iterating and 10k individual calls to np.random.choice, not sorting

+4
source share
4 answers

, , . , 10 . - . , , 10 . . , . , .

:

arr = np.random.choice(365, 10000)
for i in range(49):
    arr2 = np.random.choice(365, 10000)
    comp = (arr2 == arr)
    while comp.any():
        duplicate = comp if i==0 else comp.any(axis=0)
        arr2[duplicate] = np.random.choice(365, duplicate.sum())
        comp = (arr2 == arr)
    arr = np.vstack([arr, arr2])
arr = arr.T
arr.sort(axis=1)

93,4 . 590 , ~ 6 .

+2

np.argpartition/np.argsort.

-

  • (10000,365), argsort . , replace=False, np.random.choice.

  • 50 .

  • , .

np.argpartition k=50.

, , :

np.sort(np.random.rand(10000,365).argpartition(50,axis=1)[:,:50])

In [209]: out = np.sort(np.random.rand(10000,365).argpartition(50,axis=1)[:,:50])

In [210]: count = np.bincount(out.ravel(), minlength=365)

In [211]: print count.min(), count.max()
1277 1466

! , .

-

# Original approach
def org_app():
    return np.array([np.sort(np.random.choice(365, 50, replace=False)) for _ in range(10000)])

# @Nils Werner soln
def sort_random_choice():
    return np.sort([np.random.choice(365, 50, replace=False) for _ in range(10000)], axis=1)

# @Miriam Farber soln
def random_permute():
    l = np.array([True]*50 + [False]*315)
    total = np.arange(1,366)
    return np.array([total[np.random.permutation(l)] for _ in range(10000)])

# Proposed in this post
def argpartition_sort(nrows=10000, maxc=365, ncols=50):
    return np.sort(np.random.rand(nrows,maxc).argpartition(ncols,axis=1)[:,:ncols])

# @ayhan soln
def while_loop():
    arr = np.random.choice(365, 10000)
    for i in range(49):
        arr2 = np.random.choice(365, 10000)
        comp = (arr2 == arr)
        while comp.any():
            duplicate = comp if i==0 else comp.any(axis=0)
            arr2[duplicate] = np.random.choice(365, duplicate.sum())
            comp = (arr2 == arr)
        arr = np.vstack([arr, arr2])
    arr = arr.T
    arr.sort(axis=1)
    return arr

-

In [44]: %timeit org_app()
    ...: %timeit sort_random_choice()
    ...: %timeit random_permute()
    ...: %timeit argpartition_sort()
    ...: %timeit while_loop()
    ...: 
1 loops, best of 3: 258 ms per loop
1 loops, best of 3: 232 ms per loop
10 loops, best of 3: 166 ms per loop
10 loops, best of 3: 79.9 ms per loop
10 loops, best of 3: 58.6 ms per loop
+2

sort :

sample_dates = np.sort([np.random.choice(365, 50, replace=False) for _ in range(10000)], axis=1)
+1

sort:

l = np.array([True]*50 + [False]*315)
total = np.arange(1,366)
sample_dates = np.array([total[np.random.permutation(l)] for _ in range(10000)])

, , ( 0,44 0,77 "Nils Werner". OP 0,81 ).

+1

Source: https://habr.com/ru/post/1681227/


All Articles