I am trying to serialize a float according to the BSON specification , which only supports 64bit double. so i need to cast my float to double.
On a system where sizeof(double) == 8 I would just do
float f = 3.14; serialize((double)f);
but since sizeof(double) == 4 in my target system I have to do something like
float f = 3.14; uint64_t d; float32_to_float64(f, &d); serialize(d);
I wrote some test code (on a machine where sizeof(double) == 8) trying to correctly convert float32 to float64 and save the result as uint64_t, but I do not get the expected result.
#include <stdio.h> #include <stdint.h> #define FLOAT_FRACTION_MSK 0xFFFFFF #define DOUBLE_FRACTION_S 52 // Fraction is 52 bits #define DOUBLE_EXPONENT_S 11 // Exponent is 11 bits #define FLOAT_FRACTION_S 23 // Fraction is 23 bits #define FLOAT_EXPONENT_S 8 // Exponent is 8 bits int main(void) { // float af = 3.14; float af = 0.15625; double bd = 0; //uint8_t buff[sizeof(int64_t)] = {0}; *(uint64_t*)&bd |= (*(uint32_t*)&af & (1UL << 31)) << 32; // check sign bit uint8_t exponent32 = (*(uint32_t*)&af & 0x7F800000) >> (FLOAT_FRACTION_S+1); if (exponent32 == 0xFF) return 1; // Error (infiniti if fraction is zero, // Nan ortherwise) printf("exponent32=%.4x\n", exponent32); int64_t temp = *(uint64_t*)&bd; *(uint64_t*)&bd |= ((uint64_t)exponent32 << (DOUBLE_FRACTION_S+4)); //& 0x7FF0000000000000; // (33); // 28 printf("exponent64=%llx, %d\n", *(uint64_t*)&bd, (DOUBLE_FRACTION_S+4)); // Do the fraction { printf("fraction64=%#.8llx\n", ( (uint64_t)( (*(uint32_t*)&af & FLOAT_FRACTION_MSK) // + ((exponent32 != 0) ? (1<<24) : 0) ) << (DOUBLE_FRACTION_S-FLOAT_FRACTION_S-4)//((52-22)-1) // 33 ) ); *(uint64_t*)&bd |= ( (uint64_t)( (*(uint32_t*)&af & FLOAT_FRACTION_MSK) // + ((exponent32 != 0) ? (1<<24) : 0) ) << (DOUBLE_FRACTION_S-FLOAT_FRACTION_S) ) ; } double expected = af; printf("Original float=%#.4x, converted double=%#.8llx expected=%.8llx,\n", *(uint32_t*)&af, *(uint64_t*)&bd, *(uint64_t*)&expected); printf("Original float=%f, converted double=%lf\n\n", af, bd); *(uint64_t*)&bd = temp; return 0; }
The result of this gives Original float=0x3e200000, converted double=0x3e04000000000000 expected=3fc4000000000000,
So it seems that I am missing something when converting the exponent, but I do not understand what it is.