How to search for "n bits" in an array of bytes?

I have an array of bytes. Now I need to know the number of occurrences of a bitmap whose length is N.

For example, my byte array is "00100100 10010010" and the pattern is "001". here N = 3, and the number is 5.

Work with bits is always weak.

+3
source share
4 answers

You can always XOR the first N bits, and if you get 0, you end up with a match. Then shift the desired bit β€œstream” one bit to the left and repeat. This assumes that you want to get matches if these sub-patterns overlap. Otherwise, you must shift along the length of the pattern by coincidence.

+7
source

, unsigned int:

int main () {
    unsigned int curnum;
    unsigned int num = 0x2492;
    unsigned int pattern = 0x1;
    unsigned int i;
    unsigned int mask = 0;
    unsigned int n = 3;
    unsigned int count = 0;

    for (i = 0; i < n; i++) {
        mask |= 1 << i;
    }

    for (i = 8 * sizeof(num) - n; i >= 0; i--) {
        curnum = (num >> i) & mask;
        if (! (curnum ^ pattern)) {
            count++;
        }
    }
}
0

std::vector<bool>, std::search(source.begin(), source.end(), pattern.begin(), pattern.end());. vector<bool> idiosyncracies, .

0

N ,

vector<unsigned char> pattern;

(N + 7) / 8

, . , , , N == 19, :

|<-    v[0]   ->|<-    v[1]   ->|<-    v[2]   ->|
 0 0 0 0 0 0 1 1 0 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1
|         |<-             pattern             ->|

, , , , .

, , . window

vector<unsigned char> window;

N , 8, window . :

unsigned char mask = (1 << (N % 8)) - 1;

, window , , window ==,

window[0] &= mask;
bool isMatch = (window == pattern);

. N , , , , N + 1:

vector<int> shifts;

, , , window.

0001001100. window . missmatch, 1, 1 2, 0, . , , , , , window, 2. , ( 2, 0), window 7, 3 . 4, window 8 . sifts i , window, i. , window , shifts[N]. 8.

, , window ( ), , , .

if(window[i] != pattern[i])
{
    int j = 0;
    unsigned char mismatches = window[i] ^ pattern[i];
    while((mismatches & 1) == 0)
    {
        mismatches >>= 1;
        ++j;
    }
    mismatch_position = 8 * (window.size() - i - 1) + j;
}

, , window. #, ++ . # , , , ++. unsigned char byte, vector<unsigned char> & byte [], size() Length , , . , , , , , , , , , , . .

public static void shiftBitsIntoWindow_MSbFirst(byte[] window, byte[] source,
                                                int startBitPosition, int numberOfBits)
{
    int nob = numberOfBits / 8;
    // number of full bytes from the source

    int ntsh = numberOfBits % 8;
    // number of bits, by which to shift the left part of the window,
    // in the case, when numberOfBits is not an integer multiple of 8

    int nfstbb = (8 - startBitPosition % 8);
    // number Of bits from the start to the first byte boundary
    // The value is from the range [1, 8], which comes handy,
    // when checking if the substring of ntsh first bits
    // crosses the byte boundary in the source, by evaluating
    // the expression ntsh <= nfstbb.

    int nfbbte = (startBitPosition + numberOfBits) % 8;
    // number of bits from the last byte boundary to the end

    int sbtci;
    // index of the first byte in the source, from which to start
    // copying nob bytes from the source
    // The way in which the (sbtci) index is calculated depends on,
    // whether nob < window.Length

    if(nob < window.Length)// part of the window will be replaced
    // with bits from the source, but some part will remain in the
    // window, only moved to the beginning and possibly shifted
    {
        sbtci = (startBitPosition + ntsh) / 8;

        //Loop below moves bits form the end of the window to the front
        //making room for new bits that will come form the source

        // In the corner case, when the number by which to shift (ntsh)
        // is zero the expression (window[i + nob + 1] >> (8 - ntsh)) is
        // zero and the loop just moves whole bytes
        for(int i = 0; i < window.Length - nob - 1; ++i)
        {
            window[i] = (byte)((window[i + nob] << ntsh)
                | (window[i + nob + 1] >> (8 - ntsh)));
        }

        // At this point, the left part of the window contains all the
        // bytes that could be constructed solely from the bytes
        // contained in the right part of the window. Next byte in the
        // window may contain bits from up to 3 different bytes. One byte
        // form the right edge of the window and one or two bytes form
        // the source. If the substring of ntsh first bits crosses the
        // byte boundary in the source it two.

        int si = startBitPosition / 8; // index of the byte in the source
        // where the bit stream starts

        byte byteSecondPart; // Temporary variable to store the bits,
        // that come from the source, to combine them later with the bits
        // form the right edge of the window

        int mask = (1 << ntsh) - 1;
        // the mask of the form 0 0 1 1 1 1 1 1
        //                         |<-  ntsh ->|

        if(ntsh <= nfstbb)// the substring of ntsh first bits
        // doesn't cross the byte boundary in the source
        {
            byteSecondPart = (byte)((source[si] >> (nfstbb - ntsh)) & mask);
        }
        else// the substring of ntsh first bits crosses the byte boundary
        // in the source
        {
            byteSecondPart = (byte)(((source[si] << (ntsh - nfstbb))
                                   | (source[si + 1] >> (8 - ntsh + nfstbb))) & mask);
        }

        // The bits that go into one byte, but come form two sources
        // -the right edge of the window and the source, are combined below
        window[window.Length - nob - 1] = (byte)((window[window.Length - 1] << ntsh)
                                                | byteSecondPart);

        // At this point nob whole bytes in the window need to be filled
        // with remaining bits form the source. It done by a common loop
        // for both cases (nob < window.Length) and (nob >= window.Length)

    }
    else// !(nob < window.Length) - all bits of the window will be replaced
    // with the bits from the source. In this case, only the appropriate
    // variables are set and the copying is done by the loop common for both
    // cases
    {
        sbtci = (startBitPosition + numberOfBits) / 8 - window.Length;
        nob = window.Length;
    }


    if(nfbbte > 0)// The bit substring coppied into one byte in the
    // window crosses byte boundary in the source, so it has to be
    // combined form the bits, commming form two consecutive bytes
    // in the source
    {
        for(int i = 0; i < nob; ++i)
        {
            window[window.Length - nob + i] = (byte)((source[sbtci + i] << nfbbte)
                | (source[sbtci + 1 + i] >> (8 - nfbbte)));
        }
    }
    else// The bit substring coppied into one byte in the window
    // doesn't cross byte boundary in the source, so whole bytes
    // are simply coppied
    {
        for(int i = 0; i < nob; ++i)
        {
            window[window.Length - nob + i] = source[sbtci + i];
        }
    }
}
0

Source: https://habr.com/ru/post/1747843/


All Articles