Find a string of N 1 bits in a bit array

As the name sais, I want to find the sequential start of n single-bit bits in a variable-size bitmap (M).

A common use case is N <= 8 and M <= 128

I do this operation in the internal channel on the embedded device. Writing a trivial implementation is easy, but not fast enough for my taste (e.g. brute force search until a solution is found).

I wonder if anyone has a more elegant solution in their tricks bag.

+3
source share
8 answers
int nr = 0;    
for ( int i = 0; i < M; ++i )
{
  if ( bits[i] )
    ++nr;
  else
  {
    nr = 0; continue;
  }
  if ( nr == n ) return i - nr + 1; // start position    
}

What do you mean by brute force? O (M * N) or is it an O (M) solution? if you had this in mind then I'm not sure how much you can optimize things.

, , , . : , N .

for ( int i = 0; i < M; i += N )
  if ( bits[i] == 0 ) // if the first bit of a byte is 0, that byte alone cannot be a solution. Neither can it be a solution in conjunction with the previous byte, so skip it.
    continue;
  else // if the first bit is 1, then either the current byte is a solution on its own or it is a solution in conjunction with the previous byte
  {
    // search the bits in the previous byte.
    int nrprev = 0;
    while ( i - nrprev >= 0 && bits[i - nrprev] ) ++nrprev;

    // search the bits in the current byte;
    int nrcurr = 0;
    while ( bits[i + nrcurr + 1] && nrcurr + nrprev <= N ) ++nrcurr;

    if ( nrcurr + nrprev >= N ) // solution starting at i - nrprev + 1.
      return i - nrprev + 1;   
  }

. , , .

+4

.

:

00000001 - // Bytes ending with one or more 1's.  These start a run.
11111111 - // All 1's.  These continue a run.
10000000 - // Bytes starting with 1 but ending with 0's.  These end a run.
10111000 - // All the rest.  These can be enders or short runs.

, . .

. , , 256 , :

Number of bits set.
Number of bits set before first zero.
Number of bits set after last zero.

, , .

+1

- , MIPS. MIPS CLZ ( "Count Leading Zeroes" ), . , CLZ.

, , C CLZ :

unsigned numbits = 0, totalbits = 0;
while (data != 0 && numbits != N) {
    numbits = CLZ(data);  // count leading zeroes
    data <<= numbits;     // shift off leading zeroes
    totalbits += numbits; // keep track of how many bits we've shifted off
    numbits = CLZ(~data); // count leading ones
    data <<= numbits;     // shift off leading ones
    totalbits += numbits; // keep track of how many bits we've shifted off
}

totalbits ( ) N . ​​ ( , ).

, MIPS, .

+1

SWAR answer:

V, , N M - . N N N V >> n.

bitwise AND(all N) M-wide. , , .

, M -bit-wide, .

+1

, count-zeroes.

y = x ^ x-1

1 1- x.

y + 1

- , 1 0,

x ^ x-(y+1)

1 1-.

(y + 1) recurse...

... ...

... ... , . n, ≥2n-1 1 . , 4 , - 32 . , :

const unsigned int word_starts = 0x11111111;
unsigned int word = whatever;
unsigned int flips = word + word_starts;
if ( carry bit from previous addition ) return true;
return ~ ( word ^ flips ) & word_starts;

, ( ) flips, 1- word_starts ( )

word ^ carry_from_right ^ 1

xor , ANDing. , 1- .

, , C , .

+1

, Intel, asm BSF (Bit Scan Forward) BSR (Bit Scan Reverse) . , .

+1

, , - . N < 32, .

For backward compatibility, the most significant bit of the first word is considered bit 0.

Note that the algorithm uses the sentinel word (all zeros) at the end to stop any search, rather than constantly checking the end of the array. Also note that the algorithm allows you to start the search from any position in the bitmap (usually at the end of the last successful distribution), and not always start from the beginning of the bitmap.

Put your own msbit32 () function of your compiler.

#define leftMask(x) (((int32_t)(0x80000000)) >> ((x) - 1))     // cast so that sign extended (arithmetic) shift used
#define rightMask(x) (1 << ((x) - 1))


/* Given a multi-word bitmap array find a run of consecutive set bits and clear them.
 *
 * Returns 0 if bitrun not found.
 *         1  if bitrun found, foundIndex contains the bit index of the first bit in the run (bit index 0 is the most significant bit of the word at lowest address).
 */

static int findBitRun(int runLen, uint32_t *pBegin, uint32_t *pStartMap, uint32_t *pEndMap, uint32_t *foundIndex)
{
    uint32_t *p = pBegin;
    unsigned int bit;

    if (runLen == 1)
    {    // optimise the simple & hopefully common case
        do {
            if (*p)
            {
                bit = msbit32(*p);
                *p &= ~(1 << bit);
                *foundIndex = ((p - pStartMap) * 32ul) + (31 - bit);
                return 1;
            }
            if (++p > pEndMap)
            {
                p = pStartMap;
            }
        } while (p != pBegin);
    }

    else if (runLen < 32)
    {
        uint32_t rmask = (1 << runLen) - 1;
        do {
            uint32_t map = *p;
            if (map)
            {
                // We want to find a run of at least runLen consecutive ones within the word.
                // We do this by ANDing each bit with the runLen-1 bits to the right
                // if there are any ones remaining then this word must have a suitable run.

                // The single bit case is handled above so can assume a minimum run of 2 required

                uint32_t w = map & (map << 1); // clobber any 1 bit followed by 0 bit
                int todo = runLen - 2;  // -2 as clobbered 1 bit and want to leave 1 bit

                if (todo > 2)
                {
                    w &= w << 2;      // clobber 2 bits
                    todo -= 2;

                    if (todo > 4)
                    {
                        w &= w << 4;      // clobber 4 bits
                        todo -= 4;
                        if (todo > 8)
                        {
                            w &= w << 8;      // clobber 8 bits
                            todo -= 8;
                        }
                    }
                }

                w &= w << todo;     // clobber any not accounted for

                if (w)              // had run >= runLen within word
                {
                    bit = msbit32(w); // must be start of left most run
                    *p &= ~(rmask << ((bit + 1) - runLen));
                    *foundIndex = ((p - pStartMap) * 32ul) + (31 - bit);
                    return 1;
                }
                else if ((map & 1) && (p[1] & 0x80000000ul))    // assumes sentinel at end of map
                {
                    // possibly have a run overlapping two words
                    // calculate number of bits at right of current word
                    int rbits = msbit32((map + 1) ^ map);
                    int lmask = rmask << ((32 + rbits) - runLen);
                    if ((p[1] | lmask) == p[1])
                    {
                        p[0] &= ~((1 << rbits) - 1);
                        p[1] &= ~lmask;
                        *foundIndex = ((p - pStartMap) * 32ul) + (32 - rbits);
                        return 1;
                    }
                }
            }
            if (++p > pEndMap)
            {
                p = pStartMap;
            }
        } while (p != pBegin);
    }
    else    // bit run spans multiple words
    {
        pEndMap -= (runLen - 1)/32;    // don't run off end
        if (pBegin > pEndMap)
        {
            pBegin = pStartMap;
        }

        do {
            if ((p[0] & 1) && ((p[0] | p[1]) == 0xfffffffful))   // may be first word of run
            {
                uint32_t map = *p;
                uint32_t *ps = p;      // set an anchor
                uint32_t bitsNeeded;
                int sbits;

                if (map == 0xfffffffful)
                {
                    if (runLen == 32)        // easy case
                    {
                        *ps = 0;
                        *foundIndex = (ps - pStartMap) * 32ul;
                        return 1;
                    }
                    sbits = 32;
                }
                else
                {
                    sbits = msbit32((map + 1) ^ map);
                }

                bitsNeeded = runLen - sbits;

                while (p[1] == 0xfffffffful)
                {
                    if (bitsNeeded <= 32)
                    {
                        p[1] = ~(0xfffffffful << (32 - bitsNeeded));
                        while (p != ps)
                        {
                            *p = 0;
                            --p;
                        }
                        *ps &= ~rightMask(sbits);
                        *foundIndex = ((p - pStartMap) * 32ul) + (32 - sbits);
                        return 1;
                    }
                    bitsNeeded -= 32;
                    if (++p == pBegin) 
                    {
                        ++pBegin;   // ensure we terminate
                    }
                }

                if ((bitsNeeded < 32) & (p[1] & 0x80000000ul))
                {
                    uint32_t lmask = leftMask(bitsNeeded);

                    if ((p[1] | lmask) == p[1])
                    {
                        p[1] &= ~lmask;
                        while (p != ps)
                        {
                            *p = 0;
                            --p;
                        }
                        *ps &= ~rightMask(sbits);
                        *foundIndex = ((p - pStartMap) * 32ul) + (32 - sbits);
                        return 1;
                    }
                }
            }

            if (++p > pEndMap)
            {
                p = pStartMap;
            }
        } while (p != pBegin);
    }

    return 0;
}
+1
source

Source: https://habr.com/ru/post/1732419/


All Articles