How to quickly find the maximum element of the sum of vectors?

I have the following code in the innermost loop of my program

struct V {
  float val [200]; // 0 <= val[i] <= 1
};

V a[600];
V b[250];
V c[250];
V d[350];
V e[350];

// ... init values in a,b,c,d,e ...

int findmax(int ai, int bi, int ci, int di, int ei) {
  float best_val = 0.0;
  int best_ii = -1;

  for (int ii = 0; ii < 200; ii++) {
    float act_val =
      a[ai].val[ii] +
      b[bi].val[ii] +
      c[ci].val[ii] +
      d[ci].val[ii] +
      e[ci].val[ii];

    if (act_val > best_val) {
      best_val = act_val;
      best_ii = ii;
    }
  }

  return best_ii;
}

I don’t care if this is some kind of smart algorithm (but it would be very interesting) or some C ++ tricks, either built-in or assembler. But I need to make the findmax function more efficient.

Thank you very much in advance.

Edit: It seems that the branch is the slowest operation (incorrect prediction?).

+3
source share
7 answers

, . , , , . , , 200 .

, , , Assembler MMX SSE x86 , , ( ) ++ , , .

+2

, :

int findmax(int ai, int bi, int ci, int di, int ei) {
  float best_val = 0.0;
  int best_ii = -1;

  float* a_it = &a[ai].val[0]
  float* b_it = &b[bi].val[0]
  float* c_it = &c[ci].val[0]
  float* d_it = &d[di].val[0] // assume typo ci->di
  float* e_it = &e[ei].val[0] // assume typo ci->ei

  for (int ii = 0; ii < 200; ii++) {
    float act_val = *(a_it++) + *(b_it++) + *(c_it++) + *(d_it++) + *(e_it++);
    best_val =  (act_val <= best_val) ? best_val : act_val; // becomes _fsel
    best_ii  =  (act_val <= best_val) ? best_ii : ii; // becomes _fsel
  }

  return best_ii;
}

. :

int findmax(int ai, int bi, int ci, int di, int ei) {
  float best_val = 0.0;
  int best_ii = -1;

  float* its[] = {&a[ai].val[0], &a[bi].val[0], &a[ci].val[0], &a[di].val[0], &a[ei].val[0] };

  V sums;
  for (int ii = 0; ii < 200; ii++) {
    sums.val[ii] = * (++its[0]);
  }

  for (int iter = 1 ; iter < 5; ++iter)  {
      for (int ii = 0; ii < 200; ii++) {
        sums.val[ii] += * (++its[iter]);
      }
    }
  }
  for (int ii = 0; ii < 200; ii++) {
    best_val =  (sums.val[ii] <= best_val) ? best_val : sums.val[ii]; // becomes _fsel
    best_ii  =  (sums.val[ii] <= best_val) ? best_ii : ii; // becomes _fsel
  } 
  return best_ii;
}
+4

, , O (n). , Intel/AMD MMX SSE. . Microsoft intrinsics:

http://msdn.microsoft.com/en-us/library/y0dh78ez(VS.71).aspx

+2

, a[ai] .. ( ), , findmax. - :

int findmax(int ai, int bi, int ci, int di, int ei) {
    float    best_val = std::numeric_limits<float>::min();
    int      best_ii = 0;
    const V& a(a[ai]);
    const V& b(b[bi]);
    const V& c(c[ci]);
    const V& d(d[di]);
    const V& e(e[ei]);

    for (int ii = 0; ii < 200; ++ii) {
        float act_val = a.val[ii] + b.val[ii] + c.val[ii] +
                        d.val[ii] + e.val[ii];

        if (act_val > best_val) {
            best_val = act_val;
            best_ii = ii;
        }
    }

    return best_ii;
}

, ( ) findmax.

+2

. :

for (float *ap = a[ai].val, *bp = b[bi].val; ap - a[ai].val < 200; ap++, bp ++) {
    float act_val = *ap + *bp;
    // check for max and return if necessary
}
+1
+1

You can not get much more than that, without more information about the data (values) stored in the a, b, c, dand e. You must check each amount to determine which is the largest.

A bit worse for the Nth element, but, fortunately, you did not ask about it.

0
source

Source: https://habr.com/ru/post/1716808/


All Articles