Return non-duplicate random values ​​from a very large range

I need a function that will generate k pseudo-random values ​​from a set of n integers, from zero to n-1, without repeating any previous result. k is less than or equal to n. O (n) memory is unacceptable due to the large size nand frequency with which I will need to shuffle.

These are the methods that I have reviewed so far:

Array : Usually, if I need random values ​​that do not contain duplicates, I would shuffle the array, but this memory is O (n). n will probably be too big for that.

long nextvalue(void) {
  static long array[4000000000];
  static int s = 0;
  if (s == 0) {
    for (int i = 0; i < 4000000000; i++) array[i] = i;
    shuffle(array, 4000000000);
  }
  return array[s++];
}

n-state PRNG : There are many random number generators that can be designed to have a period nand visit nunique states for that period. The simplest example:

long nextvalue(void) {
static long s = 0;
static const long i = 1009; // assumed co-prime to n
  s = (s + i) % n;
  return s;
}

The problem is that it is not always easy to create a good PRNG on the fly for a given one n, and it is unlikely that this PRNG will bring a fair shuffle if it does not have a large number of variable parameters (it is even more difficult to design). But perhaps there is a good one about which I do not know.

m- : , - f(), 1:1 , . , s :

long nextvalue(void) {
  static long s = 0;
  return f(s++);
}

, f(), , , .

NPOT: , , f(), , ; , , , . , , n, :

long nextvalue(void) {
  static long s = 0;
  long x = s++;
  do { x = f(x); } while (x >= n);
}

( ).

? , , f(), , n .

, , - , , j, , j k ( ). j , .

+4
2

, , O (n), , . . . , .

. n, , , n, . , - .

- , , , , . (low high, , 0xefff 0xefffffff), .

size_t hash(unsigned char *value, size_t value_size, size_t low, size_t high) {
    size_t x = 0;
    while (value_size--) {
        x += *value++;
        x *= low;
    }
    return x % high;
}
#define hash(value, value_size, low, high) (hash((void *) value, value_size, low, high))

- , , , . , -.

+2

... , , - .

, , "" n- . , , 1:1 ; , , , .

, , , - ( , - ), .

, , , , . :

uint64_t mix(uint64_t x, uint64_t k) {
  const int s0 = BITS * 4 / 5;
  const int s1 = BITS / 5 + (k & 1);
  const int s2 = BITS * 2 / 5;
  k |= 1;

  x *= k;
  x ^= (x & BITMASK) >> s0;
  x ^= (x << s1) & BITMASK;
  x ^= (x & BITMASK) >> s2;
  x += 0x9e3779b97f4a7c15;

  return x & BITMASK;
}

, , :

uint64_t unmix(uint64_t x, uint64_t k) {
  const int s0 = BITS * 4 / 5;
  const int s1 = BITS / 5 + (k & 1);
  const int s2 = BITS * 2 / 5;
  k |= 1;
  uint64_t kp = k * k;
  while ((kp & BITMASK) > 1) {
    k *= kp;
    kp *= kp;
  }

  x -= 0x9e3779b97f4a7c15;
  x ^= ((x & BITMASK) >> s2) ^ ((x & BITMASK) >> s2 * 2);
  x ^= (x << s1) ^ (x << s1 * 2) ^ (x << s1 * 3) ^ (x << s1 * 4) ^ (x << s1 * 5);
  x ^= (x & BITMASK) >> s0;
  x *= k;

  return x & BITMASK;
}

PRNG :

uint64_t key[ROUNDS];
uint64_t seed = 0;
uint64_t rand_no_rep(void) {
  uint64_t x = seed++;
  do {
    for (int i = 0; i < ROUNDS; i++) x = mix(x, key[i]);
  } while (x >= RANGE);
  return x;
}

seed key , .

, seed rand_no_rep() ; .

, a, b. ROUNDS==1 50% ( a b; 0, 1 - ). , k, a --b k ( ). , . k .

50% 25%, , ( , ). , , . , , 36% 37%. ", ", , , , , .

ROUNDS==2, , .

, , . , mix() (, , mod RANGE). / , . , , , , , , .

, , . - b -follows- a c, c a b.

- , 50% c a b, k, b a. , 25% a b ( , , , ), 25% k.

, , , c a b. a/b ( , - ), a, b c ( ).

, , , . ; , , , , .

, , , ; PRNG.

PRNG, SIMD .

0

Source: https://habr.com/ru/post/1648454/


All Articles