Slow heap array performance

I am having a strange memory access performance problem, any ideas?

int* pixel_ptr = somewhereFromHeap;

int local_ptr[307200]; //local

//this is very slow
for(int i=0;i<307200;i++){
  pixel_ptr[i] = someCalculatedVal ;
}

//this is very slow
for(int i=0;i<307200;i++){
  pixel_ptr[i] = 1 ; //constant
}

//this is fast
for(int i=0;i<307200;i++){
  int val = pixel_ptr[i];
  local_ptr[i] = val;
}

//this is fast
for(int i=0;i<307200;i++){
  local_ptr[i] = someCalculatedVal ;
}

I tried to combine the values ​​into a local scan

int scanline[640]; // local

//this is very slow
for(int i=xMin;i<xMax;i++){
  int screen_pos = sy*screen_width+i;
  int val = scanline[i];
  pixel_ptr[screen_pos] = val ;
}

//this is fast
for(int i=xMin;i<xMax;i++){
  int screen_pos = sy*screen_width+i;
  int val = scanline[i];
  pixel_ptr[screen_pos] = 1 ; //constant
}

//this is fast
for(int i=xMin;i<xMax;i++){
  int screen_pos = sy*screen_width+i;
  int val = i; //or a constant
  pixel_ptr[screen_pos] = val ;
}

//this is slow
for(int i=xMin;i<xMax;i++){
  int screen_pos = sy*screen_width+i;
  int val = scanline[0];
  pixel_ptr[screen_pos] = val ;
}

Any ideas? I use mingw with cflags -01 -std = C ++ 11 -fpermissive.

update4: I have to say that these are fragments from my program, and there are heavy codes / functions launched before and after. The scanline block was executed at the end of the function before exiting.

Now with the proper testing program. thks for @Iwillnotexist.

#include <stdio.h>
#include <unistd.h>
#include <sys/time.h>

#define SIZE 307200
#define SAMPLES 1000

double local_test(){
    int local_array[SIZE];

    timeval start, end;
    long cpu_time_used_sec,cpu_time_used_usec;
    double cpu_time_used;

    gettimeofday(&start, NULL);
    for(int i=0;i<SIZE;i++){
        local_array[i] = i;
    }
    gettimeofday(&end, NULL);
    cpu_time_used_sec = end.tv_sec- start.tv_sec;
    cpu_time_used_usec = end.tv_usec- start.tv_usec;
    cpu_time_used = cpu_time_used_sec*1000 + cpu_time_used_usec/1000.0;

    return cpu_time_used;
}

double heap_test(){
    int* heap_array=new int[SIZE];

    timeval start, end;
    long cpu_time_used_sec,cpu_time_used_usec;
    double cpu_time_used;

    gettimeofday(&start, NULL);
    for(int i=0;i<SIZE;i++){
        heap_array[i] = i;
    }
    gettimeofday(&end, NULL);
    cpu_time_used_sec = end.tv_sec- start.tv_sec;
    cpu_time_used_usec = end.tv_usec- start.tv_usec;
    cpu_time_used = cpu_time_used_sec*1000 + cpu_time_used_usec/1000.0;

    delete[] heap_array;

    return cpu_time_used;
}


double heap_test2(){
    static int* heap_array = NULL;

    if(heap_array==NULL){
        heap_array = new int[SIZE];
    }

    timeval start, end;
    long cpu_time_used_sec,cpu_time_used_usec;
    double cpu_time_used;

    gettimeofday(&start, NULL);
    for(int i=0;i<SIZE;i++){
        heap_array[i] = i;
    }
    gettimeofday(&end, NULL);
    cpu_time_used_sec = end.tv_sec- start.tv_sec;
    cpu_time_used_usec = end.tv_usec- start.tv_usec;
    cpu_time_used = cpu_time_used_sec*1000 + cpu_time_used_usec/1000.0;

    return cpu_time_used;
}


int main (int argc, char** argv){
    double cpu_time_used = 0;

    for(int i=0;i<SAMPLES;i++)
        cpu_time_used+=local_test();

    printf("local: %f ms\n",cpu_time_used);

    cpu_time_used = 0;

    for(int i=0;i<SAMPLES;i++)
        cpu_time_used+=heap_test();

    printf("heap_: %f ms\n",cpu_time_used);

    cpu_time_used = 0;

    for(int i=0;i<SAMPLES;i++)
        cpu_time_used+=heap_test2();

    printf("heap2: %f ms\n",cpu_time_used);

}

Optimization not performed.

local: 577.201000 ms

heap_: 826.802000 ms

heap2: 686.401000 ms

The first cumulative test with the new and remote 2x slower. (paging, as suggested?)

1,2 . , , , , , . , pixel_ptr, , prograim.

- / , !

, , . , , /.

?:

, , , . , , complier , , , .

+4
2

, , , .

, , , , ( , ).

, . , , (, ). , , . . , , , 4096 ( 1024 int), 8192, 4096 . , 4096 , 8192 , , . , .

, , , " ". , , , .

Windows MS x64.

EDIT:. , , . delete , delete , , , ( new, ).

+3

. , :

//this is fast
for(int i=0;i<307200;i++){
  int val = pixel_ptr[i];
  local_ptr[i] = val;
}

//this is fast
for(int i=0;i<307200;i++){
  local_ptr[i] = pixel_ptr[i];
}

.

0

Source: https://habr.com/ru/post/1570411/


All Articles