, . .
-, , , , .
:
for (i = 0; i < iLen; i++) {
for (i = iLen-1; i <= 0; i--) {
, 0 , .
, , ( ).
. (), , .
for (p = rgiFilter; p <= rgiFilter+8; ) {
iPred += (I32) (*p) + *rgiPreval++;
*p++ += *rgiUpdate++;
....
}
. . , , , , . , , /. , , rgiFilter 16- , , - 32- 32- .
for (p = rgiFilter; p <= rgiFilter+8; ) {
I16 x = *p;
I16 y = *(p+1); // Hope that the compiler can combine these loads
iPred += (I32) x + *rgiPreval++;
iPred += (I32) y + *rgiPreval++;
*p++ += *rgiUpdate++;
*p++ += *rgiUpdate++; // Hope that the complier can combine these stores
....
}
/ , . , gcc :
__builtin_prefetch (const void * addr)
__builtin_prefetch (const void * addr, int rw)
__builtin_prefetch (const void * addr, int rw, int locality)
, , , , addr . , . rw , . , . , , , , , , .
In addition, since the __builtin_ functions are special, the normal rules for variable number of arguments do not actually apply - this is a hint to the compiler, not a function call.
You should also examine any vector operations that your target supports, as well as any general or platform-specific functions, built-in functions, or pragmas that your compiler supports to perform vector operations.