struct Big {
int a[8];
};
void foo(Big a);
Big getStuff();
void test1() {
foo(getStuff());
}
compiles (using clang 6.0.0 for x86_64 on Linux, therefore System V ABI, flags:) -O3 -march=broadwellon
test1(): # @test1()
sub rsp, 72
lea rdi, [rsp + 40]
call getStuff()
vmovups ymm0, ymmword ptr [rsp + 40]
vmovups ymmword ptr [rsp], ymm0
vzeroupper
call foo(Big)
add rsp, 72
ret
If I read this correctly, this is what happens:
getStuffa pointer to foostack ( rsp + 40) is passed for use for the return value, so after it getStuffreturns rsp + 40to it rsp + 71contains the result getStuff.- This result is then immediately copied to the bottom address of the stack
rspbefore rsp + 31. Then called foowhich will read its argument from rsp.
Why is the following code not completely equivalent (and why does the compiler not generate it)?
test1(): # @test1()
sub rsp, 32
mov rdi, rsp
call getStuff()
call foo(Big)
add rsp, 32
ret
: getStuff , foo .
:
( 12 ints 8), v++ x64, , windows x64 ABI , !
_TEXT SEGMENT
$T3 = 32
$T1 = 32
?bar@@YAHXZ PROC ; bar, COMDAT
$LN4:
sub rsp, 88 ; 00000058H
lea rcx, QWORD PTR $T1[rsp]
call ?getStuff@@YA?AUBig@@XZ ; getStuff
lea rcx, QWORD PTR $T3[rsp]
movups xmm0, XMMWORD PTR [rax]
movaps XMMWORD PTR $T3[rsp], xmm0
movups xmm1, XMMWORD PTR [rax+16]
movaps XMMWORD PTR $T3[rsp+16], xmm1
movups xmm0, XMMWORD PTR [rax+32]
movaps XMMWORD PTR $T3[rsp+32], xmm0
call ?foo@@YAHUBig@@@Z ; foo
add rsp, 88 ; 00000058H
ret 0