, :
#include <stdio.h>
#define SIZE 1024
double A[SIZE][SIZE], B[SIZE][SIZE], C[SIZE][SIZE];
int main ()
{
int i, j, k;
for (i = 0; i < SIZE; i++) {
for (j = 0; j < SIZE; j++) {
for (k = 0; k < SIZE; k++) {
C[i][j] += A[i][k] * B[k][j];
}
}
}
return 0;
}
:
real 0m35.137s
user 0m34.996s
sys 0m0.067s
j k:
#include <stdio.h>
#define SIZE 1024
double A[SIZE][SIZE], B[SIZE][SIZE], C[SIZE][SIZE];
int main ()
{
int i, j, k;
for (i = 0; i < SIZE; i++) {
for (k = 0; k < SIZE; k++) { // this is the only change
for (j = 0; j < SIZE; j++) {
C[i][j] += A[i][k] * B[k][j];
}
}
}
return 0;
}
:
real 0m5.489s
user 0m5.436s
sys 0m0.040s
This is much faster, because loop iterations are more consistent with array index strings. Thus, the data will most likely be available sequentially and therefore will be more accessible in the cache.
source
share