Is gcc-C ++ not an optimization of atomic operations for current x86-64 processors

Given the following test program:

#include <atomic>
#include <iostream>

int64_t process_one() {
        int64_t a;
        //Should be atomic on my haswell
        int64_t assign = 42;
        a = assign;
        return a;
}

int64_t process_two() {
        std::atomic<int64_t> a;
        int64_t assign = 42;
        a = assign;
        return a;
}

int main() {
        auto res_one = process_one();
        auto res_two = process_two();
        std::cout << res_one << std::endl;
        std::cout << res_two << std::endl;
}

Compiled with

g++ --std=c++17 -O3 -march=native main.cpp

The code generates the following asm for two functions:

00000000004007c0 <_Z11process_onev>:
  4007c0:       b8 2a 00 00 00          mov    $0x2a,%eax
  4007c5:       c3                      retq
  4007c6:       66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
  4007cd:       00 00 00

00000000004007d0 <_Z11process_twov>:
  4007d0:       48 c7 44 24 f8 2a 00    movq   $0x2a,-0x8(%rsp)
  4007d7:       00 00
  4007d9:       0f ae f0                mfence
  4007dc:       48 8b 44 24 f8          mov    -0x8(%rsp),%rax
  4007e1:       c3                      retq
  4007e2:       66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
  4007e9:       00 00 00
  4007ec:       0f 1f 40 00             nopl   0x0(%rax)

Personally, I don't know much about assembler, but (and I could be wrong here) it seems that process_two has been compiled to include all process_one and then some.

However, as far as I know, “modern” x86-64 processors (for example, Haswell, on which I compiled this) will perform the assignment atomically without the need for any additional operations (in this case, I believe that the additional operation is mfencein process_two) .

gcc , , ? , .

, , , , 8 .

+4
2

, std::atomic

std::memory_order order = std::memory_order_seq_cst

, , . mfence.

    a = assign;

    a.store(assign, std::memory_order_relaxed);

process_two():
        mov     QWORD PTR [rsp-8], 42
        mfence
        mov     rax, QWORD PTR [rsp-8]
        ret

process_two():
        mov     QWORD PTR [rsp-8], 42
        mov     rax, QWORD PTR [rsp-8]
        ret

.

+10

. , clang - mov eax, 42.

gcc, , , gcc , . clang, icc gcc, -, , , clang ( ), , , . icc - . two_reads , : , - rax , mov .

, , .

+2

Source: https://habr.com/ru/post/1681852/


All Articles