Clang provides very different performance for expressions that intuitively need to be equivalent

Question

Clang provides very different performance for expressions that intuitively need to be equivalent

Can anyone explain to me these significant differences in performance between these expressions, which I would expect to get similar performance. I am going with Apple LLVM version 5.1 (clang-503.0.38) (based on LLVM 3.4svn) in release mode.

Here is my test code (just change CASE to 1, 2, 3 or 4 to test yourself):

#include <iostream>
#include <chrono>

#define CASE 1

inline int foo(int n) {
    return
#if CASE == 1
    (n % 2) ? 9 : 6

#elif CASE == 2
    (n % 2) == true ? 9 : 6

#elif CASE == 3
    6 + (n % 2) * 3

#elif CASE == 4
    6 + bool(n % 2) * 3

#endif
    ;
}

int main(int argc, const char* argv[])
{
    std::chrono::time_point<std::chrono::system_clock> start, end;
    start = std::chrono::system_clock::now();

    int n = argc;
    for (int i = 0; i < 100000000; ++i) {
        n += foo(n);
    }

    end = std::chrono::system_clock::now();
    std::chrono::duration<double> elapsed_seconds = end-start;

    std::cout << "elapsed time: " << elapsed_seconds.count() << "\n";
    std::cout << "value: " << n << "\n";

    return 0;
}

And here is the time that I get:

CASE   EXPRESSION                TIME
1      (n % 2) ? 9 : 6           0.1585
2      (n % 2) == true ? 9 : 6   0.3491
3      6 + (n % 2) * 3           0.2559
4      6 + bool(n % 2) * 3       0.1906

Here is the build difference between CASE 1 and CASE 2:

CASE 1:

Ltmp12:
LBB0_1:                                 ## =>This Inner Loop Header: Depth=1
    ##DEBUG_VALUE: main:argv <- RSI
    ##DEBUG_VALUE: i <- 0
    .loc    1 24 0                  ## /Test/main.cpp:24:0
    movl    %ebx, %ecx
    andl    $1, %ecx
    leal    (%rcx,%rcx,2), %ecx
Ltmp13:
    .loc    1 48 14                 ## /Test/main.cpp:48:14
    leal    6(%rbx,%rcx), %ebx

CASE 2:

Ltmp12:
LBB0_1:                                 ## =>This Inner Loop Header: Depth=1
    ##DEBUG_VALUE: main:argv <- RSI
    ##DEBUG_VALUE: i <- 0
    .loc    1 24 0                  ## /Test/main.cpp:24:0
    movl    %ebx, %ecx
    shrl    $31, %ecx
    addl    %ebx, %ecx
    andl    $-2, %ecx
    movl    %ebx, %edx
    subl    %ecx, %edx
    cmpl    $1, %edx
    sete    %cl
    movzbl  %cl, %ecx
    leal    (%rcx,%rcx,2), %ecx
Ltmp13:
    .loc    1 48 14                 ## /Test/main.cpp:48:14
    leal    6(%rbx,%rcx), %ebx

And here is the build difference between CASE 3 and CASE 4:

CASE 3:

Ltmp12:
LBB0_1:                                 ## =>This Inner Loop Header: Depth=1
    ##DEBUG_VALUE: main:argv <- RSI
    ##DEBUG_VALUE: i <- 0
    .loc    1 24 0                  ## /Test/main.cpp:24:0
    movl    %ebx, %ecx
    shrl    $31, %ecx
    addl    %ebx, %ecx
    andl    $-2, %ecx
    movl    %ebx, %edx
    subl    %ecx, %edx
    leal    (%rdx,%rdx,2), %ecx
Ltmp13:
    .loc    1 48 14                 ## /Test/main.cpp:48:14
    leal    6(%rbx,%rcx), %ebx

CASE 4:

Ltmp12:
LBB0_1:                                 ## =>This Inner Loop Header: Depth=1
    ##DEBUG_VALUE: main:argv <- RSI
    ##DEBUG_VALUE: i <- 0
    .loc    1 24 0                  ## /Test/main.cpp:24:0
    movl    %ebx, %ecx
    andl    $1, %ecx
    negl    %ecx
    andl    $3, %ecx
Ltmp13:
    .loc    1 48 14                 ## /Test/main.cpp:48:14
    leal    6(%rbx,%rcx), %ebx

+4

c ++ performance performance-testing clang clang ++

user3490189 Apr 2 '14 at 17:53

source share

1 answer

dyp · Accepted Answer · 2014-04-03T16:14:29+0000

.

(n % 2)? , 0 1, ?

. 0, 1 -1. n - , % .

(n % 2) ? 6 : 9 n % 2 bool. true IFF . , (n % 2) != 0.

In, (n % 2) == true ? 6 : 9, (n % 2) == true ( , bool ). true int 1. , (n % 2) == 1.

(n % 2) != 0 (n % 2) == 1 n: n = -1. n % 2 == -1, -1 != 0 - true, -1 == 1 false.

, .

, n (, n % 2 != false).

, , :

shrl    $31, %eax

, , .

Clang provides very different performance for expressions that intuitively need to be equivalent

More articles: