Copy EAX to RAX higher bits?

I am wondering if there is any sequence of instructions without using any other register to copy the lower 32 bits of RAX to the higher 32 bits. Of course, I want EAX to be damaged as well.

Thanks in advance.

+4
source share
3 answers

My attempt ... caused a headache from a music composer taking place here at a demo party (or most likely from traveling here), so I abandoned the imul rax,rax,imm3231 bits used for copying and tried to save 1 bit per character, and then to fix the intermediate result, as it turned out that I have several problems that I did not foresee.

, 2x 16- , , ( xchg al,ah, , ).

    ; rax =                           00001234 (bytes, not hexa digits)
    ror     rax, 16                 ; 34000012
    imul    rax, rax, 0x00010001    ; 34001212
    shr     rax, 16                 ; 00340012
    imul    rax, rax, 0x00010001    ; 34341212
    ror     rax, 24                 ; 21234341
    xchg    al, ah                  ; 21234314
    ror     rax, 8                  ; 42123431
    xchg    al, ah                  ; 42123413
    rol     rax, 16                 ; 12341342
    xchg    al, ah                  ; 12341324
    ror     rax, 8                  ; 41234132
    xchg    al, ah                  ; 41234123
    rol     rax, 8                  ; 12341234

( ) ( rol ...,8 5-):

    ; eax =                           00001234 (bytes, not hexa digits)
    ror     rax, 8                  ; 40000123
    imul    rax, rax, 0x01000001    ; 40123123
    rol     rax, 16                 ; 12312340
    mov     al, ah                  ; 12312344
    rol     rax, 8                  ; 23123441
    rol     ax, 8                   ; 23123414
    rol     rax, 8                  ; 31234142
    rol     ax, 8                   ; 31234124
    rol     rax, 8                  ; 12341243
    rol     ax, 8                   ; 12341234
+7

: -D

shl rax, 8   ; 00012340
mov al, ah   ; 00012344
bswap rax    ; 44321000
shr rax, 16  ; 00443210
mov al, ah   ; 00443211
ror rax, 8   ; 10044321
xchg ah, al  ; 10044312
rol rax, 8   ; 00443121
xchg ah, al  ; 00443112
shl rax, 8   ; 04431120
mov al, ah   ; 04431122
ror rax, 32  ; 11220443
xchg ah, al  ; 11220434
ror rax, 8   ; 41122043
xchg ah, al  ; 41122034
ror rax, 8   ; 44112203
mov ah, al   ; 44112233
ror rax, 8   ; 34411223
xchg ah, al  ; 34411232
rol rax, 16  ; 41123234
xchg ah, al  ; 41123243
ror rax, 8   ; 34112324
xchg ah, al  ; 34112342
rol rax, 24  ; 12342341
xchg ah, al  ; 12342314
ror rax, 8   ; 41234231
xchg ah, al  ; 41234213
ror rax, 8   ; 34123421
xchg ah, al  ; 34123412
ror rax, 16  ; 12341234
+5

. , , - 64- imul:

; mov    eax,eax     ; if eax isn't already zero-extended into rax

imul      rax, [rel broadcast_low32_multiplier]

section .rodata
broadcast_low32_multiplier:   dq  0x100000001

1-- ( 3- ) Intel Sandybridge AMD Ryzen. 4 . (http://agner.org/optimize/)

imul r64, r64, imm32, 33 .

, , , .

; mov eax,eax      ; if eax isn't already zero-extended into rax

imul      rax, [rel broadcast_low32_multiplier]
jmp       after_constant
 broadcast_low32_multiplier:   dq  0x100000001
after_constant:

x86-64, EAX RAX. POV . NASM:

 1 00000000 480FAF0502000000          imul    rax, [rel broadcast_low32_multiplier]
 2 00000008 EB08                      jmp       after_constant
 3 0000000A 0100000001000000          broadcast_low32_multiplier:   dq  0x100000001
 4                                    after_constant:

, , AFAIK , . L1D dTLB, L1I iTLB.


Jester 2- /1 , , Atom (pre Silvermont):

push rax
mov  [rsp+4], eax     ; overwrite high bytes
pop  rax              ; store-forwarding stall when a wide load covers 2 narrow stores

, , rbp+disp8, [rbp-12], 1 rsp - ( SIB ).


, , , , .

. / 15 , , . , , . clobber, , -, .

; rax = garbage:eax
push   rbx            ; save/restore if needed

mov    ebx, eax
;mov    eax, eax       ; zero-extend eax into rax if needed
shl    rbx, 32
or     rax, rbx

pop    rbx

2 ( rax) MOV (Ivybridge + Ryzen) . / RBX, , , .

BMI2, rorx rcx, rax, 32/or rax, rcx ( 32- RAX , + ). ( 2c , BMI2. mov regs, ).


SSE2 ( x86-64). xmm0 rax:

; movd       xmm0, eax      ; instead of whatever was setting rax

punpckldq  xmm0, xmm0        ; [dcba] -> [bbaa]

; movq       rax, xmm0

1 ( SlowShuffle Core2/K8, pshuflw imm8, ) xmm0 rax.

RAX / XMM0 - , . clobber xmm0, , .

, xmm0 . xmm ( , ). paddd/paddq, pslldq, pmuludq/pand. , , , 32 , , , .

+2

Source: https://habr.com/ru/post/1693088/


All Articles