. , , - 64- imul:
; mov eax,eax ; if eax isn't already zero-extended into rax
imul rax, [rel broadcast_low32_multiplier]
section .rodata
broadcast_low32_multiplier: dq 0x100000001
1-- ( 3- ) Intel Sandybridge AMD Ryzen. 4 . (http://agner.org/optimize/)
imul r64, r64, imm32, 33 .
, , , .
; mov eax,eax ; if eax isn't already zero-extended into rax
imul rax, [rel broadcast_low32_multiplier]
jmp after_constant
broadcast_low32_multiplier: dq 0x100000001
after_constant:
x86-64, EAX RAX. POV . NASM:
1 00000000 480FAF0502000000 imul rax, [rel broadcast_low32_multiplier]
2 00000008 EB08 jmp after_constant
3 0000000A 0100000001000000 broadcast_low32_multiplier: dq 0x100000001
4 after_constant:
, , AFAIK , . L1D dTLB, L1I iTLB.
Jester 2- /1 , , Atom (pre Silvermont):
push rax
mov [rsp+4], eax ; overwrite high bytes
pop rax ; store-forwarding stall when a wide load covers 2 narrow stores
, , rbp+disp8, [rbp-12], 1 rsp - ( SIB ).
, , , , .
. / 15 , , . , , . clobber, , -, .
; rax = garbage:eax
push rbx ; save/restore if needed
mov ebx, eax
;mov eax, eax ; zero-extend eax into rax if needed
shl rbx, 32
or rax, rbx
pop rbx
2 ( rax) MOV (Ivybridge + Ryzen) . / RBX, , , .
BMI2, rorx rcx, rax, 32/or rax, rcx ( 32- RAX , + ). ( 2c , BMI2. mov regs, ).
SSE2 ( x86-64). xmm0 rax:
; movd xmm0, eax ; instead of whatever was setting rax
punpckldq xmm0, xmm0 ; [dcba] -> [bbaa]
; movq rax, xmm0
1 ( SlowShuffle Core2/K8, pshuflw imm8, ) xmm0 rax.
RAX / XMM0 - , . clobber xmm0, , .
, xmm0 . xmm ( , ). paddd/paddq, pslldq, pmuludq/pand. , , , 32 , , , .