Clang provides very different performance for expressions that intuitively need to be equivalent

Can anyone explain to me these significant differences in performance between these expressions, which I would expect to get similar performance. I am going with Apple LLVM version 5.1 (clang-503.0.38) (based on LLVM 3.4svn) in release mode.

Here is my test code (just change CASE to 1, 2, 3 or 4 to test yourself):

#include <iostream>
#include <chrono>

#define CASE 1

inline int foo(int n) {
    return
#if CASE == 1
    (n % 2) ? 9 : 6

#elif CASE == 2
    (n % 2) == true ? 9 : 6

#elif CASE == 3
    6 + (n % 2) * 3

#elif CASE == 4
    6 + bool(n % 2) * 3

#endif
    ;
}

int main(int argc, const char* argv[])
{
    std::chrono::time_point<std::chrono::system_clock> start, end;
    start = std::chrono::system_clock::now();

    int n = argc;
    for (int i = 0; i < 100000000; ++i) {
        n += foo(n);
    }

    end = std::chrono::system_clock::now();
    std::chrono::duration<double> elapsed_seconds = end-start;

    std::cout << "elapsed time: " << elapsed_seconds.count() << "\n";
    std::cout << "value: " << n << "\n";

    return 0;
}

And here is the time that I get:

CASE   EXPRESSION                TIME
1      (n % 2) ? 9 : 6           0.1585
2      (n % 2) == true ? 9 : 6   0.3491
3      6 + (n % 2) * 3           0.2559
4      6 + bool(n % 2) * 3       0.1906

Here is the build difference between CASE 1 and CASE 2:

CASE 1:

Ltmp12:
LBB0_1:                                 ## =>This Inner Loop Header: Depth=1
    ##DEBUG_VALUE: main:argv <- RSI
    ##DEBUG_VALUE: i <- 0
    .loc    1 24 0                  ## /Test/main.cpp:24:0
    movl    %ebx, %ecx
    andl    $1, %ecx
    leal    (%rcx,%rcx,2), %ecx
Ltmp13:
    .loc    1 48 14                 ## /Test/main.cpp:48:14
    leal    6(%rbx,%rcx), %ebx

CASE 2:

Ltmp12:
LBB0_1:                                 ## =>This Inner Loop Header: Depth=1
    ##DEBUG_VALUE: main:argv <- RSI
    ##DEBUG_VALUE: i <- 0
    .loc    1 24 0                  ## /Test/main.cpp:24:0
    movl    %ebx, %ecx
    shrl    $31, %ecx
    addl    %ebx, %ecx
    andl    $-2, %ecx
    movl    %ebx, %edx
    subl    %ecx, %edx
    cmpl    $1, %edx
    sete    %cl
    movzbl  %cl, %ecx
    leal    (%rcx,%rcx,2), %ecx
Ltmp13:
    .loc    1 48 14                 ## /Test/main.cpp:48:14
    leal    6(%rbx,%rcx), %ebx

And here is the build difference between CASE 3 and CASE 4:

CASE 3:

Ltmp12:
LBB0_1:                                 ## =>This Inner Loop Header: Depth=1
    ##DEBUG_VALUE: main:argv <- RSI
    ##DEBUG_VALUE: i <- 0
    .loc    1 24 0                  ## /Test/main.cpp:24:0
    movl    %ebx, %ecx
    shrl    $31, %ecx
    addl    %ebx, %ecx
    andl    $-2, %ecx
    movl    %ebx, %edx
    subl    %ecx, %edx
    leal    (%rdx,%rdx,2), %ecx
Ltmp13:
    .loc    1 48 14                 ## /Test/main.cpp:48:14
    leal    6(%rbx,%rcx), %ebx

CASE 4:

Ltmp12:
LBB0_1:                                 ## =>This Inner Loop Header: Depth=1
    ##DEBUG_VALUE: main:argv <- RSI
    ##DEBUG_VALUE: i <- 0
    .loc    1 24 0                  ## /Test/main.cpp:24:0
    movl    %ebx, %ecx
    andl    $1, %ecx
    negl    %ecx
    andl    $3, %ecx
Ltmp13:
    .loc    1 48 14                 ## /Test/main.cpp:48:14
    leal    6(%rbx,%rcx), %ebx
+4
source share
1 answer

.


(n % 2)? , 0 1, ?

. 0, 1 -1. n - , % .

(n % 2) ? 6 : 9 n % 2 bool. true IFF . , (n % 2) != 0.

In, (n % 2) == true ? 6 : 9, (n % 2) == true ( , bool ). true int 1. , (n % 2) == 1.

(n % 2) != 0 (n % 2) == 1 n: n = -1. n % 2 == -1, -1 != 0 - true, -1 == 1 false.

, .

, n (, n % 2 != false).


, , :

shrl    $31, %eax

, , .

+4

Source: https://habr.com/ru/post/1534700/


All Articles