What is the fastest way to generate a random sequence from a list of data?

Let's say that I have a list of data: {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, where n = 10 elements

I would like to randomly select k elements of this set to form a sublist, say k = 5.

In this case, I could get a sublist that looks like {9, 3, 5, 2, 7}

I could do this:

  • Randomly determining the offset in the list between 0 and the current list size minus 1
  • Adding this item to my sublist
  • Removing this item from the source list
  • Repeat until the desired size is found.

The problem is that as the original list grows, the offset and deletion times increase, and for any significantly large list (for example, more than 1,000,000 items), this algorithm takes quite a lot of time.

Is there a faster way to generate a random sequence from a list of data? The implementation of the random number generator should be rejected for this task, instead focusing on how the RNG result is used in the proposed algorithm.

Any thoughts?

I am now using the STL C ++ list

+3
source share
10 answers

Or you could do this:

  • Random determination of the offset within the list, between 0 and the current size of the list.
  • .
  • , , , , . , 10 1 000 000 , 10, , . , .
  • , . , . , . , .

, , , .

, random_shuffle 10 ^ 6 .

+1

random_shuffle. , .

, std::vector (, , std::list, , ) - . :

int data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
std::random_shuffle(data, data + 10); 

// or

std::vector data; // populate it
std::random_shuffle(data.begin(), data.end());

, k :

// now treat data[0] through data[k] as your random subset, or:
std::vector subset(data, data + k);

// or
data.resize(k); // shrink vector

, Jerry , , .

+9

OutputIterators std::random_shuffle. , , , .

#include <iostream>
#include <algorithm>
#include <vector>
#include <iterator>

template<class It, class OutIt>
void take_random_n(It begin, It end, OutIt out, size_t n) {
  std::random_shuffle(begin, end);
  It end2 = begin;
  std::advance(end2, n);
  std::copy(begin, end2, out);
}

int main() {
  std::vector<int> a;
  int b[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
  take_random_n(b, b + 10, std::back_inserter(a), 4);
  for(std::vector<int>::iterator it = a.begin(); it != a.end(); ++it)
    std::cout << *it << " ";
}
+2

, ( ) k . O (N), Fisher- , ().

+1
0

.

0

, . n , .

0

. , , , . @pmr ( , ):

template <typename InputIterator, typename Size, typename OutputIterator>
void take_random_n(InputIterator first, InputIterator  last, 
                   Size          n,     OutputIterator result)
{
    typedef typename std::iterator_traits<InputIterator>::value_type value_type;

    std::vector<value_type> shufflingVec(first, last);

    std::random_shuffle(shufflingVec.begin(), shufflingVec.end());

    std::copy(shufflingVec.begin(), shufflingVec.begin() + n, result);
}

, . :

template <typename InputIterator, typename Size, typename OutputIterator>
void take_random_n(InputIterator first, InputIterator  last, 
                   Size          n,     OutputIterator result)
{
    typedef typename 
        std::iterator_traits<InputIterator>::value_type      value_type;
    typedef typename 
        std::iterator_traits<InputIterator>::difference_type difference_type;

    difference_type size = std::distance(first, last);

    std::vector<value_type> indexesVec(
        boost::counting_iterator<size_t>(0),
        boost::counting_iterator<size_t>(size));

    // counting_iterator generates incrementing numbers. Easy to implement if you
    // can't use Boost

    std::random_shuffle(indexesVec.begin(), indexesVec.end());

    for (Size i = 0 ; i < n ; ++i)
    {
        *result++ = *std::advance(first, indexesVec[i]);
    }
}

// Disclaimer: I have not tested the code above!

, - , : (, vector<T>::iterator), , std::distance, std::advance .

0

2 ( stl ):

//-----------------------------------------------------------------------------
#include <cstdlib>
//-----------------------------------------------------------------------------
#include <iostream>
#include <list>
#include <iterator>
#include <algorithm>
//-----------------------------------------------------------------------------
// random generator
template< typename DiffType >
struct RandomlyRandom{
  DiffType operator()( DiffType i ){
    return std::rand() % i;
  }
};
//-----------------------------------------------------------------------------
// we'll have two iterators:
//  - the first starts at the begining of the range
// and moves one element at a time for n times
//  - the second starts at random in the middle of the range
// and will move a random number of elements inside the range
//
// then we swap their values
template< typename FwdIter, typename Fn >
void random_shuffle_n( FwdIter begin, FwdIter end, Fn& Func, size_t n ){
typedef typename std::iterator_traits<FwdIter>::difference_type difference_type;

FwdIter first = begin;
FwdIter second = begin;

difference_type dist  = std::distance( begin, end );
difference_type offset = Func( dist ) % dist;
difference_type index = offset;
std::advance( second, offset ); // try to put some distance between first & second

  do{
    offset = Func( dist ) % dist;
    index += offset;
    if( index >= dist ){
      second = begin;
      index = offset = index % dist;
    }
    std::advance( second, offset );

    std::swap( *first++, *second );
  }while( n-- > 0 );
}
//-----------------------------------------------------------------------------
int main( int argc, char* argv[] ){
int arr[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
std::list< int > lst( arr, arr + sizeof( arr ) / sizeof( arr[ 0 ] ) );

  std::copy( lst.begin(), lst.end(), std::ostream_iterator< int >( std::cout, " " ) ); 
  std::cout << std::endl;
  RandomlyRandom< std::list< int >::difference_type > rand;

  for( int i = 0; i < 100;  i++ ){
    random_shuffle_n( lst.begin(), lst.end(), rand, 5 );
    std::copy( lst.begin(), lst.end(), std::ostream_iterator< int >( std::cout, " " ) ); 
    std::cout << std::endl;
  }

  return 0;
}
//-----------------------------------------------------------------------------
0

Source: https://habr.com/ru/post/1756015/


All Articles