/ ( ) AVX2:
__m256i _mm256_sli_si256 ( __m256i, int );
__m256i _mm256_sri_si256 ( __m256i, int );
__m256i _mm256_rli_si256 ( __m256i, int );
__m256i _mm256_rri_si256 ( __m256i, int );
__m256i left_shift_000_063 ( __m256i a, int n ) {
return _mm256_or_si256 ( _mm256_slli_epi64 ( a, n ), _mm256_blend_epi32 ( _mm256_setzero_si256 ( ), _mm256_permute4x64_epi64 ( _mm256_srli_epi64 ( a, 64 - n ), _MM_SHUFFLE ( 2, 1, 0, 0 ) ), _MM_SHUFFLE ( 3, 3, 3, 0 ) ) );
}
__m256i left_shift_064_127 ( __m256i a, int n ) {
__m256i b = _mm256_slli_epi64 ( a, n );
__m256i d = _mm256_permute4x64_epi64 ( b, _MM_SHUFFLE ( 2, 1, 0, 0 ) );
__m256i c = _mm256_srli_epi64 ( a, 64 - n );
__m256i e = _mm256_permute4x64_epi64 ( c, _MM_SHUFFLE ( 1, 0, 0, 0 ) );
__m256i f = _mm256_blend_epi32 ( _mm256_setzero_si256 ( ), d, _MM_SHUFFLE ( 3, 3, 3, 0 ) );
__m256i g = _mm256_blend_epi32 ( _mm256_setzero_si256 ( ), e, _MM_SHUFFLE ( 3, 3, 0, 0 ) );
return _mm256_or_si256 ( f, g );
}
__m256i left_shift_128_191 ( __m256i a, int n ) {
__m256i b = _mm256_slli_epi64 ( a, n );
__m256i d = _mm256_permute4x64_epi64 ( b, _MM_SHUFFLE ( 1, 0, 0, 0 ) );
__m256i c = _mm256_srli_epi64 ( a, 64 - n );
__m256i e = _mm256_permute4x64_epi64 ( c, _MM_SHUFFLE ( 1, 0, 0, 0 ) );
__m256i f = _mm256_blend_epi32 ( _mm256_setzero_si256 ( ), d, _MM_SHUFFLE ( 3, 3, 0, 0 ) );
__m256i g = _mm256_blend_epi32 ( _mm256_setzero_si256 ( ), e, _MM_SHUFFLE ( 3, 0, 0, 0 ) );
return _mm256_or_si256 ( f, g );
}
__m256i left_shift_192_255 ( __m256i a, int n ) {
return _mm256_blend_epi32 ( _mm256_setzero_si256 ( ), _mm256_slli_epi64 ( _mm256_permute4x64_epi64 ( a, _MM_SHUFFLE ( 0, 0, 0, 0 ) ), n ), _MM_SHUFFLE ( 3, 0, 0, 0 ) );
}
__m256i _mm256_sli_si256 ( __m256i a, int n ) {
if ( n < 128 ) return n < 64 ? left_shift_000_063 ( a, n ) : left_shift_064_127 ( a, n % 64 );
else return n < 192 ? left_shift_128_191 ( a, n % 64 ) : left_shift_192_255 ( a, n % 64 );
}
__m256i right_shift_000_063 ( __m256i a, int n ) {
return _mm256_or_si256 ( _mm256_srli_epi64 ( a, n ), _mm256_blend_epi32 ( _mm256_setzero_si256 ( ), _mm256_permute4x64_epi64 ( _mm256_slli_epi64 ( a, 64 - n ), _MM_SHUFFLE ( 0, 3, 2, 1 ) ), _MM_SHUFFLE ( 0, 3, 3, 3 ) ) );
}
__m256i right_shift_064_127 ( __m256i a, int n ) {
__m256i b = _mm256_srli_epi64 ( a, n );
__m256i d = _mm256_permute4x64_epi64 ( b, _MM_SHUFFLE ( 3, 3, 2, 1 ) );
__m256i c = _mm256_slli_epi64 ( a, 64 - n );
__m256i e = _mm256_permute4x64_epi64 ( c, _MM_SHUFFLE ( 3, 3, 3, 2 ) );
__m256i f = _mm256_blend_epi32 ( _mm256_setzero_si256 ( ), d, _MM_SHUFFLE ( 0, 3, 3, 3 ) );
__m256i g = _mm256_blend_epi32 ( _mm256_setzero_si256 ( ), e, _MM_SHUFFLE ( 0, 0, 3, 3 ) );
return _mm256_or_si256 ( f, g );
}
__m256i right_shift_128_191 ( __m256i a, int n ) {
__m256i b = _mm256_srli_epi64 ( a, n );
__m256i d = _mm256_permute4x64_epi64 ( b, _MM_SHUFFLE ( 3, 2, 3, 2 ) );
__m256i c = _mm256_slli_epi64 ( a, 64 - n );
__m256i e = _mm256_permute4x64_epi64 ( c, _MM_SHUFFLE ( 3, 2, 1, 3 ) );
__m256i f = _mm256_blend_epi32 ( _mm256_setzero_si256 ( ), d, _MM_SHUFFLE ( 0, 0, 3, 3 ) );
__m256i g = _mm256_blend_epi32 ( _mm256_setzero_si256 ( ), e, _MM_SHUFFLE ( 0, 0, 0, 3 ) );
return _mm256_or_si256 ( f, g );
}
__m256i right_shift_192_255 ( __m256i a, int n ) {
return _mm256_blend_epi32 ( _mm256_setzero_si256 ( ), _mm256_srli_epi64 ( _mm256_permute4x64_epi64 ( a, _MM_SHUFFLE ( 0, 0, 0, 3 ) ), n ), _MM_SHUFFLE ( 0, 0, 0, 3 ) );
}
__m256i _mm256_sri_si256 ( __m256i a, int n ) {
if ( n < 128 ) return n < 64 ? right_shift_000_063 ( a, n ) : right_shift_064_127 ( a, n % 64 );
else return n < 192 ? right_shift_128_191 ( a, n % 64 ) : right_shift_192_255 ( a, n % 64 );
}
__m256i left_rotate_000_063 ( __m256i a, int n ) {
return _mm256_or_si256 ( _mm256_slli_epi64 ( a, n ), _mm256_permute4x64_epi64 ( _mm256_srli_epi64 ( a, 64 - n ), _MM_SHUFFLE ( 2, 1, 0, 3 ) ) );
}
__m256i left_rotate_064_127 ( __m256i a, int n ) {
__m256i b = _mm256_slli_epi64 ( a, n );
__m256i c = _mm256_srli_epi64 ( a, 64 - n );
__m256i d = _mm256_permute4x64_epi64 ( b, _MM_SHUFFLE ( 2, 1, 0, 3 ) );
__m256i e = _mm256_permute4x64_epi64 ( c, _MM_SHUFFLE ( 1, 0, 3, 2 ) );
return _mm256_or_si256 ( d, e );
}
__m256i left_rotate_128_191 ( __m256i a, int n ) {
__m256i b = _mm256_slli_epi64 ( a, n );
__m256i c = _mm256_srli_epi64 ( a, 64 - n );
__m256i d = _mm256_permute4x64_epi64 ( b, _MM_SHUFFLE ( 1, 0, 3, 2 ) );
__m256i e = _mm256_permute4x64_epi64 ( c, _MM_SHUFFLE ( 0, 3, 2, 1 ) );
return _mm256_or_si256 ( d, e );
}
__m256i left_rotate_192_255 ( __m256i a, int n ) {
return _mm256_or_si256 ( _mm256_srli_epi64 ( a, 64 - n ), _mm256_permute4x64_epi64 ( _mm256_slli_epi64 ( a, n ), _MM_SHUFFLE ( 0, 3, 2, 1 ) ) );
}
__m256i _mm256_rli_si256 ( __m256i a, int n ) {
if ( n < 128 ) return n < 64 ? left_rotate_000_063 ( a, n ) : left_rotate_064_127 ( a, n % 64 );
else return n < 192 ? left_rotate_128_191 ( a, n % 64 ) : left_rotate_192_255 ( a, n % 64 );
}
__m256i right_rotate_000_063 ( __m256i a, int n ) {
return _mm256_or_si256 ( _mm256_srli_epi64 ( a, n ), _mm256_permute4x64_epi64 ( _mm256_slli_epi64 ( a, 64 - n ), _MM_SHUFFLE ( 0, 3, 2, 1 ) ) );
}
__m256i right_rotate_064_127 ( __m256i a, int n ) {
__m256i b = _mm256_srli_epi64 ( a, n );
__m256i c = _mm256_slli_epi64 ( a, 64 - n );
__m256i d = _mm256_permute4x64_epi64 ( b, _MM_SHUFFLE ( 0, 3, 2, 1 ) );
__m256i e = _mm256_permute4x64_epi64 ( c, _MM_SHUFFLE ( 1, 0, 3, 2 ) );
return _mm256_or_si256 ( d, e );
}
__m256i right_rotate_128_191 ( __m256i a, int n ) {
__m256i b = _mm256_srli_epi64 ( a, n );
__m256i c = _mm256_slli_epi64 ( a, 64 - n );
__m256i d = _mm256_permute4x64_epi64 ( b, _MM_SHUFFLE ( 1, 0, 3, 2 ) );
__m256i e = _mm256_permute4x64_epi64 ( c, _MM_SHUFFLE ( 2, 1, 0, 3 ) );
return _mm256_or_si256 ( d, e );
}
__m256i right_rotate_192_255 ( __m256i a, int n ) {
return _mm256_or_si256 ( _mm256_slli_epi64 ( a, 64 - n ), _mm256_permute4x64_epi64 ( _mm256_srli_epi64 ( a, n ), _MM_SHUFFLE ( 2, 1, 0, 3 ) ) );
}
__m256i _mm256_rri_si256 ( __m256i a, int n ) {
if ( n < 128 ) return n < 64 ? right_rotate_000_063 ( a, n ) : right_rotate_064_127 ( a, n % 64 );
else return n < 192 ? right_rotate_128_191 ( a, n % 64 ) : right_rotate_192_255 ( a, n % 64 );
}
_mm256_permute4x64_epi64 ( ) , .
/ , , , . , / .
, , , / f.e. .