Initially, the problem arose when I tried to optimize the algorithm for the neon hand, and some part of it was 80% according to the profiler. I tried to check what can be done to improve it, and for this I created an array of function pointers for different versions of my optimized function, and then ran them in a loop to see in the profiler which works better:
typedef unsigned(*CalcMaxFunc)(const uint16_t a[8][4], const uint16_t b[4][4]);
CalcMaxFunc CalcMaxFuncs[] =
{
CalcMaxFunc_NEON_0,
CalcMaxFunc_NEON_1,
CalcMaxFunc_NEON_2,
CalcMaxFunc_NEON_3,
CalcMaxFunc_C_0
};
int N = sizeof(CalcMaxFunc) / sizeof(CalcMaxFunc[0]);
for (int i = 0; i < 10 * N; ++i)
{
auto f = CalcMaxFunc[i % N];
unsigned retI = f(a, b);
if (retI > 1000000)
break;
ret |= retI;
}
: CalcMaxFuncs. , CalcMaxFunc_NEON_3, 3-4 , , , , .
, , ? , iPhone6 xcode, .
soem , , , . , , ?