C ++ performance: checking a block of memory for the presence of certain values ​​in certain cells

I am researching 2D Bin Packing algorithms. I asked a similar question regarding PHP performance - it was too slow to pack - and now the code is converted to C ++.

It is still pretty slow. What my program does, therefore, allocates blocks of dynamic memory and fills them with the character "o"

char* bin;
bin = new (nothrow) char[area];
if (bin == 0) {
    cout << "Error: " << area << " bytes could not be allocated";
    return false;
}
for (int i=0; i<area; i++) {
    bin[i]='o';
}

(their size is from 1 to 30 kilobytes for my data sets)

The program then checks for different combinations of characters "x" inside the current memory block.

void place(char* bin, int* best, int width)
{   
    for (int i=best[0]; i<best[0]+best[1]; i++)
        for (int j=best[2]; j<best[2]+best[3]; j++)
            bin[i*width+j] = 'x';
}

One of the functions that checks for non-overlapping ones gets millions of times at runtime.

bool fits(char* bin, int* pos, int width)
{   
    for (int i=pos[0]; i<pos[0]+pos[1]; i++)
        for (int j=pos[2]; j<pos[2]+pos[3]; j++)
            if (bin[i*width+j] == 'x')
                return false;
    return true;
}

, ( ) . ?

: "x" "o" , , char. , , ?

!

: int* pos rect pos ( best), MSalters. , , , . .

: memset memchr . "x" "o" "\ 1" "\ 0" . __restrict . , , . -02 (-03)... .

+3
8

.

. SSE ~ 16 , , , (- , , , ).

+2

[: !]

, , .

, 4 8 32- 64- (, ), "oooo" "oooooooo" . .

, , , , - 64 . .

+2

, , , . , place best , , bin best. restrict, . place memset, fits memchr; , .

+1

, , ?

..

, , , , , 32 64 .

, , , , -, , . , , , . , , - alloc/free, ; , - .

:

void place(char* bin, int* best, int width)
{   
    for (int i=best[0]; i<best[0]+best[1]; i++)
        for (int j=best[2]; j<best[2]+best[3]; j++)
            bin[i*width+j] = 'x';
}

- , , , best[0] .

, :

void place(char* bin, int const* best, int const width)
{
    int const maxY = best[0] + best[1];
    int const maxX = best[2] + best[3];

    for( int y = best[0]; y < maxY; ++y )
    {
        for( int x = best[2]; x < maxX; ++x )
        {
            bin[y*width + x] = 'x';
        }
    }
}

, y*width , , :

void place(char* bin, int* best, int const width)
{
    int const maxY = best[0]+best[1];
    int const maxX = best[2]+best[3];

    for( int y = best[0]; y < maxY; ++y )
    {
        int const startOfRow  = y*width;

        for( int x = best[2]; x < maxX; ++x )
        {
            bin[startOfRow + x] = 'x';
        }
    }
}

( ) , , .

, , std::fill ( memset), .

, .

, , , ( blitter). , - . , , – .; -)

hth.,

+1

, , :

// changed pos to class rect for cleaner syntax
bool fits(char* bin, rect pos, int width)
{
    if (bin[pos.top()*width+pos.left()] == 'x')
                return false;
    if (bin[(pos.bottom()-1*width+pos.right()] == 'x')
                return false;
    if (bin[(pos.bottom()*width+pos.left()] == 'x')
                return false;
    if (bin[pos.top()*width+pos.right()] == 'x')
                return false;

    for (int i=pos.top(); i<=pos.bottom(); i++)
        for (int j=pos.left(); j<=pos.right(); j++)
            if (bin[i*width+j] == 'x')
                return false;
    return true;
}

, bin[(pos.bottom()-1*width+pos.right()]. . , , . , , . .

+1

, , , - . .

for (int i=0; i<area; i++) {
    bin[i]='o';
}

memset(bin, 'o', area);

memset , .

void place(char* bin, int* best, int width)
{   
    for (int i=best[0]; i<best[0]+best[1]; i++)
        for (int j=best[2]; j<best[2]+best[3]; j++)
            bin[i*width+j] = 'x';
}

.

void place(char* bin, int* best, int width)
{   
    for (int i=best[0]; i<best[0]+best[1]; i++)

        memset(                         (i * width)  + best[2], 
                'x', 
                (best[2] + best[3]) - (((i * width)) + best[2]) + 1); 
}

.

- . '\ 0' "o" "\ 1" "x". .

.

if (best[1])
{
    // Is a 'x'
}
else
{
    // Is a 'o'
}

. - :)

"" "".

int sum = 0;
for (int i = 0; i < 12; i++)
{
    sum += best[i];
}

cout << "There are " << sum << "'x in the range" << endl;

.

0

2 , bool. , 2 . Appart add const, (, fits (bool const *,...)).

0

. - , , . , , . , , . , . , , [2 ^ i, 2 ^ i], { , , , }.

1) .. [X, Y], X * Y, [x, y] (x, y) :

(y * X + x):

unsigned position( rx, ry )
{
  unsigned x = rx;
  unsigned y = rx;
  unsigned part = 1;
  unsigned pos = 0;
  while( ( x != 0 ) && ( y != 0 ) ) {
    unsigned const lowest_bit_x = ( x % 2 );
    unsigned const lowest_bit_y = ( y % 2 );
    pos += ( ((2*lowest_bit_y) + lowest_bit_x) * part );
    x /= 2; //throw away lowest bit
    y /= 2;
    part *= 4; //size grows by sqare(2)
  }
  return pos;
}

, , . , .

, X * Y, (2 ^ (2 * k)), , X Y . , sqaures.

benfits (x, y).

2), then try to find the best way to run the submatrix elements in fits () and place (). Not sure what it is, not necessarily as it is now. Basically, a submatrix of size [x, y] should be in no more than y * log (x) * log (y) blocks that are adjacent in the array representation, but they all fit in no more than 4 blocks of size 4 * x * y. So finally, for matrices that are smaller than the memory cache page, you will get no more than 4 memory cache breaks, while your source code may break y times.

0
source

Source: https://habr.com/ru/post/1791406/


All Articles