When I handle IPv6 addresses in pure binary form, I use something like this:
// Compile with gcc #include <x86intrin.h> #include <stdint.h> #include <arpa/inet.h> // C99 supports __int128! typedef unsigned __int128 uint128_t; #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ # define htonll(v) __builtin_bswap64((v)) uint128_t hton128(uint128_t val) { // SSE2 is defined if SSSE3. # if __SSSE3__ // This routine is 100 cycles faster than the routine below. __m128i m; __m128i mask; m = _mm_loadu_si128((__m128i *)&val); // mask: 0x0f0e0d0c0b0a09080706050403020100 mask = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); _mm_store_si128((__m128i *)&val, _mm_shuffle_epi8(m, mask)); # else // No SSSE3.. Slowest approach: use pointers to shuffle. uint64_t *p, *q; p = (uint64_t *)&val; q = p + 1; *p = htonll(*p); *q = htonll(*q); { uint64_t t; t = *p; *p = *q; *q = t; } # endif return val; } #endif
source share