Hardcoded byte array ends as data when it exceeds 0xF in length

My goal is for my sections to .rdatacontain as little material as possible and allow the compiler to use sections of text / code whenever possible. Now I have a little problem, and I hope someone can help me. In clang and GCC, when you compile the following C ++ code (note that the array is 15 bytes long):

#include <windows.h>

void _start() {
  unsigned char bytes[] = {0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF};
  MessageBoxA(nullptr, (char*)bytes, "Hi", MB_OK);
}

This compiles as we would like. All hardcoded data is well embedded in the code itself (it uses immediate movs), and therefore nothing is added to any data sections, no links are made to any data section. Here is the IDA PRO decompilation: enter image description here

15 ( 16 ), :

#include <windows.h>

void _start() {
  unsigned char bytes[] = {0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10};
  MessageBoxA(nullptr, (char*)bytes, "Hi", MB_OK);
}

, . , (unk_100402000, .rdata):

enter image description here

. MSVC , , . Clang GCC, , .

, .rdata, , , text/code . 1000 /. - , / .

Clang/GCC 15 + , mov .rdata? , ? , Clang, .

, , , .

!

Clang: 4.0.1

. mov . , . ?

enter image description here

+4
3

, , clang; mov (gcc ):

template <unsigned char C>
struct noopt { [[clang::optnone]] unsigned char operator()() const { return C; } };

#define NODATA(v) (noopt<(v)>{}())

int main(){
    unsigned char bytes[] = { NODATA(0x1), NODATA(0x2), NODATA(0x3), NODATA(0x4), };

    // ...
}

( ).

+1

( ), :

 void Check( unsigned const char* x);

 void Do() {
   static const unsigned char bytes[] __attribute__((section(".text"))) =
   {
       0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10,
       0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10,
       0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10,
       0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10,
       0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10,
       0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10,
       0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10,
       0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10
   };
   Check( bytes );
 }

 int main()
 {
     Do();
 }

:

Disassembly of section .text:
     08048580 <Do()>:
  8048580:  83 ec 18                sub    $0x18,%esp
  8048583:  68 a0 85 04 08          push   $0x80485a0
  8048588:  e8 e3 ff ff ff          call   8048570 <Check(unsigned char const*)>
  804858d:  83 c4 1c                add    $0x1c,%esp
  8048590:  c3                      ret

  ... much other disassembled code follows ...

 080485a0 <Do()::bytes>:
  80485a0:  01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10     ................
  80485b0:  01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10     ................
  80485c0:  01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10     ................
  80485d0:  01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10     ................
  80485e0:  01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10     ................
  80485f0:  01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10     ................
  8048600:  01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10     ................
  8048610:  01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10     ................

.text.

, , :

void Check( unsigned const char* x); 

template< typename IN, typename OUT, unsigned int size >
void Cpy( IN (&in)[size], OUT (&out)[size] )
{   
    static_assert( sizeof( IN) == sizeof( OUT ) );
    memcpy( out, in, size * sizeof(OUT) );
}   

void Do() {
  static const unsigned char static_bytes[] __attribute__((section(".text"))) = 
  {   
      0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10,
      0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10,
      0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10,
      0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10,
      0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10,
      0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10,
      0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10,
      0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10
  };  

  unsigned char bytes[ sizeof( static_bytes )]; 
  Cpy( static_bytes, bytes );


  Check( bytes );
}   

int main()
{   
    Do();
}

- .text, . .

+2

I tested the following in several versions of gcc and clang using the compiler explorer, and it seems to work.

cout is to prevent the compiler from optimizing to nop.

#include <iostream>
#include <cstring>

void f(int num) {
    char r0[15] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
    char r1[15] = {1,2,9,4,5,6,7,3,9,10,11,12,13,14,15};
    char r2[14] = {3,9,2,3,4,5,6,7,8,9,10,11,12};

    char r[44];

    std::memcpy(r, r0, 15);
    std::memcpy(r + 15, r1, 15);
    std::memcpy(r + 30, r2, 14);

    std::cout << r;
}
0
source

Source: https://habr.com/ru/post/1687892/


All Articles