Mixing inline assembly with C code - how to protect registers and minimize memory access

I have a routine that I would like to write mainly in the assembly, but I need to call the C functions in order to get some data that I need to process. In some cases, I can pre-digest the data and load the register with a pointer to it, but in other cases I need to call the full function, because the possible data set is too large. These functions cannot be changed, because this is another user's code, and its interface must remain unchanged for other code fragments. Some of them are in shared libraries, although some of them are built-in functions through header files (which I cannot change).

I can assign local variables to registers using the asm construct:

register int myReg asm( "%r13" );

I am afraid that if I then directly manipulate% r13 in the assembly, I call the C function and return, it must be updated from memory or, even worse, it will simply be completely overwritten. For some ABIs, it’s also not safe for me to press / write registers right away, right? I work in x86-64 on Linux.

What I'm doing right now seems to work with -g -O0, but I'm afraid that when I turn on optimization for C code, it will start touching registers, which I hoped would be protected.

In general, my code stream looks like this:

asm( "movq %[src], %%r13" : : [src] "m" (myVariablePointer) : "r13" );

localVariable1 = callSomeCfunction( stuff );

storageLocation = index * sizeof( longTermStorageItem );
longTermStorage[storageLocation] = localVariable1;
// some intermediate registers need to be used here for dereferences and math

switch ( localVariable1 )
{
   case CONSTANT_VAL_A:
     C_MACRO_LOOP_CONSTRUCT
     {
       asm( "movdqa (%r13), %xmm0\n"
            // ... do some more stuff
     } C_MACRO_LOOP_CONSTRUCT_ENDING
   break;
   case CONSTANT_VAL_B:
     // ... and so forth
}

"C_MACRO_LOOP_CONSTRUCT" is the #defines from the external header file with loops for which should dereference some pointers and something else in this process and store the iterator in a local variable.

, , , % r13 . , , , . . , , , . .

/?

. :

#include <emmintrin.h>
#include <stdio.h>

__m128d buffer[100];   

int main( void )
{
  unsigned long long *valPtr;

  register __m128d val;
  register __m128d *regPtr;
#ifdef FORCED  
  asm( "movq %[src], %%r13" :
       :
       [src] "r"  (buffer) );
  asm( "pcmpeqd %[src], %[dst]" :
       [dst] "=x" (val) :
       [src] "x" (val) );
  asm( "movdqa %[src], (%%r13)" : :
       [src] "x" (val) );
  asm( "movdqa %[src], 16(%%r13)" : :
       [src] "x" (val) );   
#else
  asm( "pcmpeqd %[src], %[dst]" :
       [dst] "=x" (val) :
       [src] "x" (val) );
  asm( "movdqa %[src], %[dst]" :
       [dst] "=X" (buffer) :
       [src] "x" (val) );
  asm( "movdqa %[src], %[dst]" :
       [dst] "=X" (buffer+1) :
       [src] "x" (val) );
#endif

  valPtr = (unsigned long long *)buffer;
  printf( "OUTPUT: [0] %016llx%016llx, [1] %016llx%016llx\n",
   valPtr[0], valPtr[1], valPtr[2], valPtr[3] );

  return 0;
}

"FORCED", . , "% r13" ( , ). , 16(%%r13). .

"FORCED", gcc :

y.c: In function \u2018main\u2019:
y.c:32: error: invalid lvalue in asm statement
y.c:30: error: invalid lvalue in asm output 0

, , , ? "m", "X" "o". . , :

asm( "movdqa %[src], 16(%[dst])" :
 [dst] "=m" (buffer) :
 [src] "x" (val) );

GCC :

/tmp/ccoNwyco.s: Assembler messages:
/tmp/ccoNwyco.s:28: Error: junk `(buffer(%rip))' after expression

, ?

+4
1

, . #if:

__m128d buffer[100];   

int main( void )
{
  register __m128d val;

  asm( "movq %[src], %%r13" :
       :
       [src] "r"  (buffer) );
  asm( "pcmpeqd %[src], %[dst]" :
       [dst] "=x" (val) :
       [src] "x" (val) );
  asm( "movdqa %[src], (%%r13)" : :
       [src] "x" (val) );
  asm( "movdqa %[src], 16(%%r13)" : :
       [src] "x" (val) );   
}

r13, . . asm ( "r13" ) , asm, . , asms. , , .

, asm, , . gcc . :

__m128d buffer[100];   

int main( void )
{
  register __m128d val;

  asm("# val: %0" : "=x" (val)); /* fix "is used uninitialized" warning */

  asm( "pcmpeqd %[sval], %[dval]\n\t"
       "movdqa %[dval], %[buffer]\n\t"
       "movdqa %[dval], %[buffer1]" :

       [dval] "=x" (val), [buffer] "=m" (buffer[0]), [buffer1] "=m" (buffer[1]) :
       [sval] "x" (val) );
}

#else:

__m128d buffer[100];   

int main( void )
{
  register __m128d val;

  asm( "pcmpeqd %[src], %[dst]" :
       [dst] "=x" (val) :
       [src] "x" (val) );
  asm( "movdqa %[src], %[dst]" :
       [dst] "=X" (buffer) :
       [src] "x" (val) );
  asm( "movdqa %[src], %[dst]" :
       [dst] "=X" (buffer+1) :
       [src] "x" (val) );
}

:

__m128d buffer[100];   

int main( void )
{
  register __m128d val;

  asm("# val: %0" : "=x" (val)); /* fix "is used uninitialized" warning */

  asm( "pcmpeqd %[sval], %[dval]\n\t"
       "movdqa %[dval], (%[sbuffer])\n\t"
       "movdqa %[dval], 16(%[sbuffer])" :

       [dval] "=x" (val), [buffer] "=m"  (buffer), [buffer1] "=m" (buffer[1]) :
       [sval] "x" (val), [sbuffer] "r"  (buffer));
}

.

  • asm val . val , . .
  • asm asm, gcc .
  • sbuffer, buffer buffer1, buffer1? sbuffer . "buffer" "buffer1" , gcc, . "" clobber , . ( gcc docs re extended asm):

{ "m" (({struct {char x [10];} * p = (void *) ptr; * p;}))}.

gcc, 10 , ptr. , , , . , asm ( ), gcc.

? , , asm (from -Os):

pcmpeqd %xmm0, %xmm0
movdqa %xmm0, (%rax)
movdqa %xmm0, 16(%rax)

, , r13, , , , , . , , rax, , , ? ! , :

__m128d buffer[100];   

int main( void )
{
  register __m128d val;

  for (int x=0; x < 10; x++)
  {
    asm("# val: %0" : "=x" (val)); /* fix "is used uninitialized" */

    asm( "pcmpeqd %[src], %[dst]\n\t"
         "movdqa %[src], (%[sbuffer])\n\t" /* buffer[0] */
         "movdqa %[src], 16(%[sbuffer])" : /* buffer[1] */

         [dst] "=x" (val), [buffer] "=m"  (buffer), [buffer1] "=m" (buffer[1]) :
         [src] "x" (val), [sbuffer] "r"  (buffer));

     printf("%d\n", val);
   }
}

Asm , printf (, ). ASM ? :

.L2:
    leaq    .LC0(%rip), %rcx
    movq    %rdi, %rdx
    pcmpeqd %xmm6, %xmm0
    movdqa %xmm6, (%rbx)
    movdqa %xmm6, 16(%rbx)
    movapd  %xmm0, 32(%rsp)
    call    printf
    subl    $1, %esi
    jne .L2

, rax rbx. ? , . c, , (ABI). , , , ( ), , , (.. ). wikipedia. , rbx ( x86-64).

, asm, , , rbx ( ). Gcc , - muck rbx, , . , , rbx, , , , , /.

"" - , , (. Global Reg Vars - ffixed-reg), , . x86 - . , - .

:

  • . , "" , , . Gcc () , .
  • gcc , asm, ( ). .

, (, , ). , , , .

+3

Source: https://habr.com/ru/post/1537619/


All Articles