try the following:
// some bit operations stuff const unsigned char de_brujin_bit_map_64 [] = { 0,1,2,7,3,13,8,19,4,25,14,28,9,34,20,40,5,17,26,38,15,46,29,48,10,31,35,54,21,50,41,57, 63,6,12,18,24,27,33,39,16,37,45,47,30,53,49,56,62,11,23,32,36,44,52,55,61,22,43,51,60,42,59,58 }; inline uint8_t trailing_zero_count(uint64_t x) { return x?de_brujin_bit_map_64[(lower_bit(x)*0x0218A392CD3D5DBFL) >> 58]:64; } inline uint8_t leading_zero_count(uint64_t x) { return x?(63-de_brujin_bit_map_64[(upper_bit(x)*0x0218A392CD3D5DBFL) >> 58]):64; } inline uint64_t lower_bit(uint64_t x) { return x & -(int64_t&)x; } inline uint64_t upper_bit(uint64_t x) { if(!x) return 0; x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; x |= x >> 16; x |= x >> 32; return (x >> 1) + 1; } inline uint128_t upper_bit(const uint128_t x) { if(x.upper()) return uint128_t(upper_bit(x.upper()), 0); else return uint128_t(0, upper_bit(x.lower())); } inline uint128_t lower_bit(const uint128_t x) { if(x.lower()) return uint128_t(0, lower_bit(x.lower())); else return uint128_t(lower_bit(x.upper()), 0); } inline uint8_t trailing_zero_count(const uint128_t& x) { return x.lower()?trailing_zero_count(x.lower()):(64+trailing_zero_count(x.upper())); } inline uint8_t leading_zero_count(const uint128_t& x) { return x.upper()?leading_zero_count(x.upper()):(64+leading_zero_count(x.lower())); } // division operator uint128_t uint128_t::operator/(const uint128_t& rhs) const { if(rhs == 0) return uint128_t(0); // !!!! zero division if(rhs == rhs) return uint128_t(1); if(rhs > *this) return uint128_t(0); if(rhs == 1) return *this; if(!upper_ && !rhs.upper_) return uint128_t(0, lower_/rhs.lower_); if(lower_bit(rhs) == rhs) return *this >> trailing_zero_count(rhs); uint128_t result; uint128_t bit_mask = upper_bit(); uint128_t denom = 1; do { bit_mask >>= 1; denom <<= 1; if(*this & bit_mask) denom |= 1; result <<= 1; if(denom >= rhs) { denom -= rhs; result |= 1; } } while (bit_mask.lower_ != 1); return result; }