Float division functions

2022-06-12 09:34:36 +11:00 · 2022-06-12 09:34:36 +11:00 · f533aa779c
parent 105251088f
commit f533aa779c
6 changed files with 374 additions and 14 deletions
--- a/bake.cmd
+++ b/bake.cmd
@ -13,7 +13,7 @@ if "%~1" neq "_start_" (
 )
 shift /1
-set CIABATTA_OPTIONS=-Iinc -Wall -g -gcodeview -nodefaultlibs -D_CRT_SECURE_NO_WARNINGS
+set CIABATTA_OPTIONS=-Iinc -Wall -g -gcodeview -nodefaultlibs -D_CRT_SECURE_NO_WARNINGS -mfma
 set PLATFORM=win
 if "%1"=="test" (
--- a/code/math/_util.h
+++ b/code/math/_util.h
@ -0,0 +1,10 @@
 #define asuint64(x) ((union {double f; uint64_t i;}){x}).i
 #define asdouble(x) ((union {double f; uint64_t i;}){x}).f
 #include <_compiler.h>
 #if defined(_compiler_clang) || defined(_compiler_gnu)
    #define just_do_it(t) __attribute__((unused)) volatile t
 #else
    #define just_do_it(t) volatile t
 #endif
--- a/code/math/basic.c
+++ b/code/math/basic.c
@ -0,0 +1,112 @@
 #include <math.h>
 #include <stdint.h>
 #if !defined(__FMA__)
    #error "Get a better CPU (the kind that supports FMA) or enable -mfma"
 #endif
 #include <immintrin.h>
 double fabs(double x) {
    union {double f; uint64_t i;} u = {x};
    u.i &= -1ULL/2;
    return u.f;
 }
 float fabsf(float x) {
    union {float f; uint32_t i;} u = {x};
    u.i &= 0x7fffffff;
    return u.f;
 }
 long double fabsl(long double x) {
    return fabs(x);
 }
 double fdim(double x, double y) {
    if (isnan(x)) return x;
    if (isnan(y)) return y;
    return x > y ? x - y : 0;
 }
 float fdimf(float x, float y) {
    if (isnan(x)) return x;
    if (isnan(y)) return y;
    return x > y ? x - y : 0;
 }
 long double fdiml(long double x, long double y) {
    return fdim(x, y);
 }
 double fmax(double x, double y) {
    if (isnan(x)) return y;
    if (isnan(y)) return x;
    /* handle signed zeros, see C99 Annex F.9.9.2 */
    if (signbit(x) != signbit(y))
        return signbit(x) ? y : x;
    return x < y ? y : x;
 }
 float fmaxf(float x, float y) {
    if (isnan(x)) return y;
    if (isnan(y)) return x;
    /* handle signed zeroes, see C99 Annex F.9.9.2 */
    if (signbit(x) != signbit(y))
        return signbit(x) ? y : x;
    return x < y ? y : x;
 }
 long double fmaxl(long double x, long double y) {
    return fmax(x, y);
 }
 double fmin(double x, double y) {
    if (isnan(x)) return y;
    if (isnan(y)) return x;
    /* handle signed zeros, see C99 Annex F.9.9.2 */
    if (signbit(x) != signbit(y))
        return signbit(x) ? x : y;
    return x < y ? x : y;
 }
 float fminf(float x, float y) {
    if (isnan(x)) return y;
    if (isnan(y)) return x;
    /* handle signed zeros, see C99 Annex F.9.9.2 */
    if (signbit(x) != signbit(y))
        return signbit(x) ? x : y;
    return x < y ? x : y;
 }
 long double fminl(long double x, long double y) {
    return fmin(x, y);
 }
 double fma(double x, double y, double z) {
    __m128d xd = _mm_set_sd(x);
    __m128d yd = _mm_set_sd(y);
    __m128d zd = _mm_set_sd(z);
    __m128d rd = _mm_fmadd_sd(xd, yd, zd);
    double res = _mm_cvtsd_f64(rd);
    return res;
 }
 float fmaf(float x, float y, float z) {
    __m128 xd = _mm_set_ss(x);
    __m128 yd = _mm_set_ss(y);
    __m128 zd = _mm_set_ss(z);
    __m128 rd = _mm_fmadd_sd(xd, yd, zd);
    float res = _mm_cvtsd_f64(rd);
    return res;
 }
 long double fmal(long double x, long double y, long double z) {
    return fma(x, y, z);
 }
 float remainderf(float x, float y) {
    int q;
    return remquof(x, y, &q);
 }
--- a/code/math/division.c
+++ b/code/math/division.c
@ -0,0 +1,250 @@
 #include <math.h>
 #include <stdint.h>
 double remquo(double x, double y, int *quo) {
    union {double f; uint64_t i;} ux = {x}, uy = {y};
    int ex = ux.i>>52 & 0x7ff;
    int ey = uy.i>>52 & 0x7ff;
    int sx = ux.i>>63;
    int sy = uy.i>>63;
    uint32_t q;
    uint64_t i;
    uint64_t uxi = ux.i;
    *quo = 0;
    if (uy.i<<1 == 0 || isnan(y) || ex == 0x7ff)
        return (x*y)/(x*y);
    if (ux.i<<1 == 0)
        return x;
    /* normalize x and y */
    if (!ex) {
        for (i = uxi<<12; i>>63 == 0; ex--, i <<= 1);
        uxi <<= -ex + 1;
    } else {
        uxi &= -1ULL >> 12;
        uxi |= 1ULL << 52;
    }
    if (!ey) {
        for (i = uy.i<<12; i>>63 == 0; ey--, i <<= 1);
        uy.i <<= -ey + 1;
    } else {
        uy.i &= -1ULL >> 12;
        uy.i |= 1ULL << 52;
    }
    q = 0;
    if (ex < ey) {
        if (ex+1 == ey)
            goto end;
        return x;
    }
    /* x mod y */
    for (; ex > ey; ex--) {
        i = uxi - uy.i;
        if (i >> 63 == 0) {
            uxi = i;
            q++;
        }
        uxi <<= 1;
        q <<= 1;
    }
    i = uxi - uy.i;
    if (i >> 63 == 0) {
        uxi = i;
        q++;
    }
    if (uxi == 0)
        ex = -60;
    else
        for (; uxi>>52 == 0; uxi <<= 1, ex--);
 end:
    /* scale result and decide between |x| and |x|-|y| */
    if (ex > 0) {
        uxi -= 1ULL << 52;
        uxi |= (uint64_t)ex << 52;
    } else {
        uxi >>= -ex + 1;
    }
    ux.i = uxi;
    x = ux.f;
    if (sy)
        y = -y;
    if (ex == ey || (ex+1 == ey && (2*x > y || (2*x == y && q%2)))) {
        x -= y;
        q++;
    }
    q &= 0x7fffffff;
    *quo = sx^sy ? -(int)q : (int)q;
    return sx ? -x : x;
 }
 float remquof(float x, float y, int *quo) {
    union {float f; uint32_t i;} ux = {x}, uy = {y};
    int ex = ux.i>>23 & 0xff;
    int ey = uy.i>>23 & 0xff;
    int sx = ux.i>>31;
    int sy = uy.i>>31;
    uint32_t q;
    uint32_t i;
    uint32_t uxi = ux.i;
    *quo = 0;
    if (uy.i<<1 == 0 || isnan(y) || ex == 0xff)
        return (x*y)/(x*y);
    if (ux.i<<1 == 0)
        return x;
    /* normalize x and y */
    if (!ex) {
        for (i = uxi<<9; i>>31 == 0; ex--, i <<= 1);
        uxi <<= -ex + 1;
    } else {
        uxi &= -1U >> 9;
        uxi |= 1U << 23;
    }
    if (!ey) {
        for (i = uy.i<<9; i>>31 == 0; ey--, i <<= 1);
        uy.i <<= -ey + 1;
    } else {
        uy.i &= -1U >> 9;
        uy.i |= 1U << 23;
    }
    q = 0;
    if (ex < ey) {
        if (ex+1 == ey)
            goto end;
        return x;
    }
    /* x mod y */
    for (; ex > ey; ex--) {
        i = uxi - uy.i;
        if (i >> 31 == 0) {
            uxi = i;
            q++;
        }
        uxi <<= 1;
        q <<= 1;
    }
    i = uxi - uy.i;
    if (i >> 31 == 0) {
        uxi = i;
        q++;
    }
    if (uxi == 0)
        ex = -30;
    else
        for (; uxi>>23 == 0; uxi <<= 1, ex--);
 end:
    /* scale result and decide between |x| and |x|-|y| */
    if (ex > 0) {
        uxi -= 1U << 23;
        uxi |= (uint32_t)ex << 23;
    } else {
        uxi >>= -ex + 1;
    }
    ux.i = uxi;
    x = ux.f;
    if (sy)
        y = -y;
    if (ex == ey || (ex+1 == ey && (2*x > y || (2*x == y && q%2)))) {
        x -= y;
        q++;
    }
    q &= 0x7fffffff;
    *quo = sx^sy ? -(int)q : (int)q;
    return sx ? -x : x;
 }
 long double remquol(long double x, long double y, int *quo) {
    return remquo(x, y, quo);
 }
 double remainder(double x, double y) {
    int q;
    return remquo(x, y, &q);
 }
 float remainderf(float x, float y) {
    int q;
    return remquof(x, y, &q);
 }
 long double remainderl(long double x, long double y) {
    return remainder(x, y);
 }
 double modf(double x, double *iptr) {
    union {double f; uint64_t i;} u = {x};
    uint64_t mask;
    int e = (int)(u.i>>52 & 0x7ff) - 0x3ff;
    /* no fractional part */
    if (e >= 52) {
        *iptr = x;
        if (e == 0x400 && u.i<<12 != 0) /* nan */
            return x;
        u.i &= 1ULL<<63;
        return u.f;
    }
    /* no integral part*/
    if (e < 0) {
        u.i &= 1ULL<<63;
        *iptr = u.f;
        return x;
    }
    mask = -1ULL>>12>>e;
    if ((u.i & mask) == 0) {
        *iptr = x;
        u.i &= 1ULL<<63;
        return u.f;
    }
    u.i &= ~mask;
    *iptr = u.f;
    return x - u.f;
 }
 float modff(float x, float *iptr) {
    union {float f; uint32_t i;} u = {x};
    uint32_t mask;
    int e = (int)(u.i>>23 & 0xff) - 0x7f;
    /* no fractional part */
    if (e >= 23) {
        *iptr = x;
        if (e == 0x80 && u.i<<9 != 0) { /* nan */
            return x;
        }
        u.i &= 0x80000000;
        return u.f;
    }
    /* no integral part */
    if (e < 0) {
        u.i &= 0x80000000;
        *iptr = u.f;
        return x;
    }
    mask = 0x007fffff>>e;
    if ((u.i & mask) == 0) {
        *iptr = x;
        u.i &= 0x80000000;
        return u.f;
    }
    u.i &= ~mask;
    *iptr = u.f;
    return x - u.f;
 }
 long double modfl(long double x, long double *iptr) {
    double d;
    long double r = modf(x, &d);
    *iptr = d;
    return r;
 }
--- a/code/math/ieee754.c
+++ b/code/math/ieee754.c
@ -4,12 +4,6 @@
 #include <stdint.h>
 #include <float.h>
 #include <_compiler.h>
 #if defined(_compiler_clang) || defined(_compiler_gnu)
    #define just_do_it(t) __attribute__((unused)) volatile t
 #endif
 int _fpclassify(double x) {
    union {double f; uint64_t i;} u = {x};
    int e = u.i>>52 & 0x7ff;
--- a/code/math/round.c
+++ b/code/math/round.c
@ -5,13 +5,7 @@
 #include <float.h>
 #include <limits.h>
-#include <_compiler.h>
+#include "_util.h"
 #if defined(_compiler_clang) || defined(_compiler_gnu)
    #define just_do_it(t) __attribute__((unused)) volatile t
 #endif
 #define asuint64(x) ((union {double f; uint64_t i;}){x}).i
 #define asdouble(x) ((union {double f; uint64_t i;}){x}).f
 double nearbyint(double x) {
    #pragma STDC FENV_ACCESS ON