Phil CK

Decompose A Float

Last updated on

Floats can be werid to get your head around, this is a little snippet that helped me understand what the binary data does

Resources

https://www.h-schmidt.net/FloatConverter/IEEE754.html http://fabiensanglard.net/floating_point_visually_explained/index.php

#include <cstdint>
#include <cstdio>
#include <cmath>


/*
* Decompose a float and rebuild it with the data.
*/
void
print_flt(float a) {

        /* decompose the float bits */
        struct flt_data {
                uint32_t mant : 23;
                uint32_t expo : 8;
                uint32_t sign : 1;
        };

        float *ap = &a;
        flt_data *ad = reinterpret_cast<flt_data*>(ap);

        /* NAN - Special Case */
        if(ad->expo == 255 && ad->mant > 0) {
                printf("NAN\n--\n");
                return;
        }

        /* Infinity - Special Case */
        if(ad->expo == 255 && ad->mant == 0) {
                /* handle +/- infinity by checking sign bit */
                printf("Infinity\n--\n");
                return;
        }

        /* Reassemble float into the number */

        printf("Float: %f - Sign: %d, Exponent: %d, Mantissa: %d\n",
                a,
                ad->sign,
                ad->expo,
                ad->mant);

        printf("(-1)^s * 1.M * 2^(E-127)\n");

        printf("(-1^%d) * (1 + (%d / 2^23)) * (2^(%d - 127))\n",
                ad->sign,
                ad->mant,
                ad->expo);

        double sign = pow(-1, ad->sign);
        double mant = 1.0 + ((double)ad->mant / pow(2, 23));
        double expo = pow(2, ad->expo - 127);

        printf("(%lf) * (%lf) * (%lf)\n",
                sign,
                mant,
                expo);

        double result = sign * mant * expo;
       
        printf("result: %f\n--\n", (float)result);
};


int
main() {
        float a = +3.142f;
        print_flt(a);

        float b = -3.142f;
        print_flt(b);

        float c = 1.234567;
        print_flt(c);

        float d = -12345.6789;
        print_flt(d);

        float e = 1234567.8901;
        print_flt(e);

        float f = 1.f / 0.f;
        print_flt(f);

        float g = -1.f / 0.f;
        print_flt(g);

        float h = 0.f / 0.f;
        print_flt(h);

        return 0;
};

build and go

c++ float.cpp -std=c++14 && ./a.out

Output

Float: 3.142000 - Sign: 0, Exponent: 128, Mantissa: 4789895
(-1)^s * 1.M * 2^(E-127)
(-1^0) * (1 + (4789895 / 2^23)) * (2^(128 - 127))
(1.000000) * (1.571000) * (2.000000)
result: 3.142000
--
Float: -3.142000 - Sign: 1, Exponent: 128, Mantissa: 4789895
(-1)^s * 1.M * 2^(E-127)
(-1^1) * (1 + (4789895 / 2^23)) * (2^(128 - 127))
(-1.000000) * (1.571000) * (2.000000)
result: -3.142000
--
Float: 1.234567 - Sign: 0, Exponent: 127, Mantissa: 1967691
(-1)^s * 1.M * 2^(E-127)
(-1^0) * (1 + (1967691 / 2^23)) * (2^(127 - 127))
(1.000000) * (1.234567) * (1.000000)
result: 1.234567
--
Float: -12345.678711 - Sign: 1, Exponent: 140, Mantissa: 4253367
(-1)^s * 1.M * 2^(E-127)
(-1^1) * (1 + (4253367 / 2^23)) * (2^(140 - 127))
(-1.000000) * (1.507041) * (8192.000000)
result: -12345.678711
--
Float: 1234567.875000 - Sign: 0, Exponent: 147, Mantissa: 1487935
(-1)^s * 1.M * 2^(E-127)
(-1^0) * (1 + (1487935 / 2^23)) * (2^(147 - 127))
(1.000000) * (1.177376) * (1048576.000000)
result: 1234567.875000
--
Infinity
--
Infinity
--
NAN
--

I’m not fully understanding the maths, but at least I understand the data better.