math.h rounding functions

2022-06-11 17:17:24 +11:00 · 2022-06-11 17:17:24 +11:00 · effd0acb63
parent 2320a22706
commit effd0acb63
3 changed files with 450 additions and 191 deletions
--- a/code/math/ieee754.c
+++ b/code/math/ieee754.c
@ -81,184 +81,3 @@ long double nanl(const char *s) {
    return NAN;
 }

-
-double rint(double x) {
-    static const double_t toint = 1/DBL_EPSILON;
-    union {double f; uint64_t i;} u = {x};
-    int e = u.i>>52 & 0x7ff;
-    int s = u.i>>63;
-    double y;
-    if (e >= 0x3ff+52) return x;
-    if (s) y = x - toint + toint;
-    else   y = x + toint - toint;
-    if (y == 0) return s ? -0.0 : +0.0;
-    return y;
-}
-
-float rintf(float x) {
-    static const float toint = 1/FLT_EPSILON;
-    union {float f; uint32_t i;} u = {x};
-    int e = u.i>>23 & 0xff;
-    int s = u.i>>31;
-    float y;
-    if (e >= 0x7f+23) return x;
-    if (s) y = x - toint + toint;
-    else y = x + toint - toint;
-    if (y == 0) return s ? -0.0f : 0.0f;
-    return y;
-}
-
-long double rintl(long double x) {
-    return rint(x);
-}
-
-
-double nearbyint(double x) {
-    #pragma STDC FENV_ACCESS ON
-    int e = fetestexcept(FE_INEXACT);
-    x = rint(x);
-    if (!e) feclearexcept(FE_INEXACT);
-    return x;
-}
-
-float nearbyintf(float x) {
-    #pragma STDC FENV_ACCESS ON
-    int e = fetestexcept(FE_INEXACT);
-    x = rintf(x);
-    if (!e) feclearexcept(FE_INEXACT);
-    return x;
-}
-
-long double nearbyintl(long double x) {
-    return nearbyint(x);
-}
-
-
-double nextafter(double x, double y) {
-    union {double f; uint64_t i;} ux={x}, uy={y};
-    uint64_t ax, ay;
-    int e;
-    if (isnan(x) || isnan(y)) return x + y;
-    if (ux.i == uy.i) return y;
-    ax = ux.i & -1ULL/2;
-    ay = uy.i & -1ULL/2;
-    if (ax == 0) {
-        if (ay == 0) return y;
-        ux.i = (uy.i & 1ULL<<63) | 1;
-    } else if (ax > ay || ((ux.i ^ uy.i) & 1ULL<<63)) {
-        ux.i--;
-    }
-    else {
-        ux.i++;
-    }
-    e = ux.i >> 52 & 0x7ff;
-    /* raise overflow if ux.f is infinite and x is finite */
-    if (e == 0x7ff) just_do_it(float) _x = x+x;
-    /* raise underflow if ux.f is subnormal or zero */
-    if (e == 0) just_do_it(float) _x = x*x + ux.f*ux.f;
-    return ux.f;
-}
-
-float nextafterf(float x, float y) {
-    union {float f; uint32_t i;} ux={x}, uy={y};
-    uint32_t ax, ay, e;
-
-    if (isnan(x) || isnan(y)) return x + y;
-    if (ux.i == uy.i) return y;
-    ax = ux.i & 0x7fffffff;
-    ay = uy.i & 0x7fffffff;
-    if (ax == 0) {
-        if (ay == 0) return y;
-        ux.i = (uy.i & 0x80000000) | 1;
-    } else if (ax > ay || ((ux.i ^ uy.i) & 0x80000000)) {
-        ux.i--;
-    }
-    else {
-        ux.i++;
-    }
-    e = ux.i & 0x7f800000;
-    /* raise overflow if ux.f is infinite and x is finite */
-    if (e == 0x7f800000) just_do_it(float) _x = x+x;
-    /* raise underflow if ux.f is subnormal or zero */
-    if (e == 0) just_do_it(float) _x = x*x + ux.f*ux.f;
-    return ux.f;
-}
-
-long double nextafterl(long double x, long double y) {
-    return nextafter(x, y);
-}
-
-double nexttoward(double x, long double y) {
-    return nextafter(x, y);
-}
-
-float nexttowardf(float x, long double y) {
-    union {float f; uint32_t i;} ux = {x};
-    uint32_t e;
-    if (isnan(x) || isnan(y)) return x + y;
-    if (x == y) return y;
-    if (x == 0) {
-        ux.i = 1;
-        if (signbit(y)) ux.i |= 0x80000000;
-    } else if (x < y) {
-        if (signbit(x)) ux.i--;
-        else            ux.i++;
-    } else {
-        if (signbit(x)) ux.i++;
-        else            ux.i--;
-    }
-    e = ux.i & 0x7f800000;
-    /* raise overflow if ux.f is infinite and x is finite */
-    if (e == 0x7f800000) just_do_it(float) _x = x+x;
-    /* raise underflow if ux.f is subnormal or zero */
-    if (e == 0) just_do_it(float) _x = x*x + ux.f*ux.f;
-    return ux.f;
-}
-
-long double nexttowardl(long double x, long double y) {
-    return nextafterl(x, y);
-}
-
-
-double round(double x) {
-    static const double_t toint = 1/DBL_EPSILON;
-    union {double f; uint64_t i;} u = {x};
-    int e = u.i >> 52 & 0x7ff;
-    double_t y;
-    if (e >= 0x3ff+52) return x;
-    if (u.i >> 63) x = -x;
-    if (e < 0x3ff-1) {
-        /* raise inexact if x!=0 */
-        just_do_it(float) _x = x + toint;
-        return 0*u.f;
-    }
-    y = x + toint - toint - x;
-    if (y > 0.5)        y = y + x - 1;
-    else if (y <= -0.5) y = y + x + 1;
-    else                y = y + x;
-    if (u.i >> 63) y = -y;
-    return y;
-}
-
-float roundf(float x) {
-    static const double_t toint = 1/FLT_EPSILON;
-    union {float f; uint32_t i;} u = {x};
-    int e = u.i >> 23 & 0xff;
-    float_t y;
-    if (e >= 0x7f+23) return x;
-    if (u.i >> 31) x = -x;
-    if (e < 0x7f-1) {
-        just_do_it(float) _x = x + toint;
-        return 0*u.f;
-    }
-    y = x + toint - toint - x;
-    if (y > 0.5f)        y = y + x - 1;
-    else if (y <= -0.5f) y = y + x + 1;
-    else                 y = y + x;
-    if (u.i >> 31) y = -y;
-    return y;
-}
-
-long double roundl(long double x) {
-    return round(x);
-}
--- a/code/math/round.c
+++ b/code/math/round.c
@ -0,0 +1,409 @@
+
+#include <math.h>
+#include <stdint.h>
+#include <fenv.h>
+#include <float.h>
+#include <limits.h>
+
+#include <_compiler.h>
+#if defined(_compiler_clang) || defined(_compiler_gnu)
+    #define just_do_it(t) __attribute__((unused)) volatile t
+#endif
+
+#define asuint64(x) ((union {double f; uint64_t i;}){x}).i
+#define asdouble(x) ((union {double f; uint64_t i;}){x}).f
+
+double nearbyint(double x) {
+    #pragma STDC FENV_ACCESS ON
+    int e = fetestexcept(FE_INEXACT);
+    x = rint(x);
+    if (!e) feclearexcept(FE_INEXACT);
+    return x;
+}
+
+float nearbyintf(float x) {
+    #pragma STDC FENV_ACCESS ON
+    int e = fetestexcept(FE_INEXACT);
+    x = rintf(x);
+    if (!e) feclearexcept(FE_INEXACT);
+    return x;
+}
+
+long double nearbyintl(long double x) {
+    return nearbyint(x);
+}
+
+double nextafter(double x, double y) {
+    union {double f; uint64_t i;} ux={x}, uy={y};
+    uint64_t ax, ay;
+    int e;
+    if (isnan(x) || isnan(y)) return x + y;
+    if (ux.i == uy.i) return y;
+    ax = ux.i & -1ULL/2;
+    ay = uy.i & -1ULL/2;
+    if (ax == 0) {
+        if (ay == 0) return y;
+        ux.i = (uy.i & 1ULL<<63) | 1;
+    } else if (ax > ay || ((ux.i ^ uy.i) & 1ULL<<63)) {
+        ux.i--;
+    }
+    else {
+        ux.i++;
+    }
+    e = ux.i >> 52 & 0x7ff;
+    /* raise overflow if ux.f is infinite and x is finite */
+    if (e == 0x7ff) just_do_it(float) _x = x+x;
+    /* raise underflow if ux.f is subnormal or zero */
+    if (e == 0) just_do_it(float) _x = x*x + ux.f*ux.f;
+    return ux.f;
+}
+
+float nextafterf(float x, float y) {
+    union {float f; uint32_t i;} ux={x}, uy={y};
+    uint32_t ax, ay, e;
+
+    if (isnan(x) || isnan(y)) return x + y;
+    if (ux.i == uy.i) return y;
+    ax = ux.i & 0x7fffffff;
+    ay = uy.i & 0x7fffffff;
+    if (ax == 0) {
+        if (ay == 0) return y;
+        ux.i = (uy.i & 0x80000000) | 1;
+    } else if (ax > ay || ((ux.i ^ uy.i) & 0x80000000)) {
+        ux.i--;
+    }
+    else {
+        ux.i++;
+    }
+    e = ux.i & 0x7f800000;
+    /* raise overflow if ux.f is infinite and x is finite */
+    if (e == 0x7f800000) just_do_it(float) _x = x+x;
+    /* raise underflow if ux.f is subnormal or zero */
+    if (e == 0) just_do_it(float) _x = x*x + ux.f*ux.f;
+    return ux.f;
+}
+
+long double nextafterl(long double x, long double y) {
+    return nextafter(x, y);
+}
+
+double nexttoward(double x, long double y) {
+    return nextafter(x, y);
+}
+
+float nexttowardf(float x, long double y) {
+    union {float f; uint32_t i;} ux = {x};
+    uint32_t e;
+    if (isnan(x) || isnan(y)) return x + y;
+    if (x == y) return y;
+    if (x == 0) {
+        ux.i = 1;
+        if (signbit(y)) ux.i |= 0x80000000;
+    } else if (x < y) {
+        if (signbit(x)) ux.i--;
+        else            ux.i++;
+    } else {
+        if (signbit(x)) ux.i++;
+        else            ux.i--;
+    }
+    e = ux.i & 0x7f800000;
+    /* raise overflow if ux.f is infinite and x is finite */
+    if (e == 0x7f800000) just_do_it(float) _x = x+x;
+    /* raise underflow if ux.f is subnormal or zero */
+    if (e == 0) just_do_it(float) _x = x*x + ux.f*ux.f;
+    return ux.f;
+}
+
+long double nexttowardl(long double x, long double y) {
+    return nextafterl(x, y);
+}
+
+double rint(double x) {
+    static const double_t toint = 1/DBL_EPSILON;
+    union {double f; uint64_t i;} u = {x};
+    int e = u.i>>52 & 0x7ff;
+    int s = u.i>>63;
+    double y;
+    if (e >= 0x3ff+52) return x;
+    if (s) y = x - toint + toint;
+    else   y = x + toint - toint;
+    if (y == 0) return s ? -0.0 : +0.0;
+    return y;
+}
+
+float rintf(float x) {
+    static const float toint = 1/FLT_EPSILON;
+    union {float f; uint32_t i;} u = {x};
+    int e = u.i>>23 & 0xff;
+    int s = u.i>>31;
+    float y;
+    if (e >= 0x7f+23) return x;
+    if (s) y = x - toint + toint;
+    else y = x + toint - toint;
+    if (y == 0) return s ? -0.0f : 0.0f;
+    return y;
+}
+
+long double rintl(long double x) {
+    return rint(x);
+}
+
+#if LONG_MAX < 1U<<53 && defined(FE_INEXACT)
+    static long lrint_slow(double x)
+    {
+        #pragma STDC FENV_ACCESS ON
+        int e;
+        e = fetestexcept(FE_INEXACT);
+        x = rint(x);
+        if (!e && (x > LONG_MAX || x < LONG_MIN))
+            feclearexcept(FE_INEXACT);
+        /* conversion */
+        return x;
+    }
+
+    long lrint(double x)
+    {
+        uint32_t abstop = asuint64(x)>>32 & 0x7fffffff;
+        uint64_t sign = asuint64(x) & (1ULL << 63);
+
+        if (abstop < 0x41dfffff) {
+            /* |x| < 0x7ffffc00, no overflow */
+            double_t toint = asdouble(asuint64(1/DBL_EPSILON) | sign);
+            double_t y = x + toint - toint;
+            return (long)y;
+        }
+        return lrint_slow(x);
+    }
+#else
+    long lrint(double x) {
+        return rint(x);
+    }
+#endif
+
+long lrintf(float x) {
+    return rintf(x);
+}
+
+long lrintl(long double x) {
+    return lrint(x);
+}
+
+long long llrint(double x) {
+    return rint(x);
+}
+
+long long llrintf(float x) {
+    return rintf(x);
+}
+
+long long llrintl(long double x) {
+    return llrint(x);
+}
+
+double round(double x) {
+    static const double_t toint = 1/DBL_EPSILON;
+    union {double f; uint64_t i;} u = {x};
+    int e = u.i >> 52 & 0x7ff;
+    double_t y;
+    if (e >= 0x3ff+52) return x;
+    if (u.i >> 63) x = -x;
+    if (e < 0x3ff-1) {
+        /* raise inexact if x!=0 */
+        just_do_it(float) _x = x + toint;
+        return 0*u.f;
+    }
+    y = x + toint - toint - x;
+    if (y > 0.5)        y = y + x - 1;
+    else if (y <= -0.5) y = y + x + 1;
+    else                y = y + x;
+    if (u.i >> 63) y = -y;
+    return y;
+}
+
+float roundf(float x) {
+    static const double_t toint = 1/FLT_EPSILON;
+    union {float f; uint32_t i;} u = {x};
+    int e = u.i >> 23 & 0xff;
+    float_t y;
+    if (e >= 0x7f+23) return x;
+    if (u.i >> 31) x = -x;
+    if (e < 0x7f-1) {
+        just_do_it(float) _x = x + toint;
+        return 0*u.f;
+    }
+    y = x + toint - toint - x;
+    if (y > 0.5f)        y = y + x - 1;
+    else if (y <= -0.5f) y = y + x + 1;
+    else                 y = y + x;
+    if (u.i >> 31) y = -y;
+    return y;
+}
+
+long double roundl(long double x) {
+    return round(x);
+}
+
+long lround(double x) {
+    return round(x);
+}
+
+long lroundf(float x) {
+    return roundf(x);
+}
+
+long lroundl(long double x) {
+    return roundl(x);
+}
+
+long long llround(double x) {
+    return round(x);
+}
+
+long long llroundf(float x) {
+    return roundf(x);
+}
+
+long long llroundl(long double x) {
+    return roundl(x);
+}
+
+double ceil(double x) {
+    static const double_t toint = 1/DBL_EPSILON;
+    union {double f; uint64_t i;} u = {x};
+    int e = u.i >> 52 & 0x7ff;
+    double_t y;
+
+    if (e >= 0x3ff+52 || x == 0)
+        return x;
+    /* y = int(x) - x, where int(x) is an integer neighbor of x */
+    if (u.i >> 63)
+        y = x - toint + toint - x;
+    else
+        y = x + toint - toint - x;
+    /* special case because of non-nearest rounding modes */
+    if (e <= 0x3ff-1) {
+        just_do_it(double) _x = y;
+        return u.i >> 63 ? -0.0 : 1;
+    }
+    if (y < 0)
+        return x + y + 1;
+    return x + y;
+}
+
+float ceilf(float x) {
+    union {float f; uint32_t i;} u = {x};
+    int e = (int)(u.i >> 23 & 0xff) - 0x7f;
+    uint32_t m;
+
+    if (e >= 23)
+        return x;
+    if (e >= 0) {
+        m = 0x007fffff >> e;
+        if ((u.i & m) == 0)
+            return x;
+        just_do_it(float) _x = (x + 0x1p120f);
+        if (u.i >> 31 == 0)
+            u.i += m;
+        u.i &= ~m;
+    } else {
+        just_do_it(float) _x = (x + 0x1p120f);
+        if (u.i >> 31)
+            u.f = -0.0;
+        else if (u.i << 1)
+            u.f = 1.0;
+    }
+    return u.f;
+}
+
+long double ceill(long double x) {
+    return ceil(x);
+}
+
+double floor(double x) {
+    static const double_t toint = 1/DBL_EPSILON;
+    union {double f; uint64_t i;} u = {x};
+    int e = u.i >> 52 & 0x7ff;
+    double_t y;
+    if (e >= 0x3ff+52 || x == 0)
+        return x;
+    /* y = int(x) - x, where int(x) is an integer neighbor of x */
+    if (u.i >> 63)
+        y = x - toint + toint - x;
+    else
+        y = x + toint - toint - x;
+    /* special case because of non-nearest rounding modes */
+    if (e <= 0x3ff-1) {
+        just_do_it(double) _x = (y);
+        return u.i >> 63 ? -1 : 0;
+    }
+    if (y > 0)
+        return x + y - 1;
+    return x + y;
+}
+
+float floorf(float x) {
+    union {float f; uint32_t i;} u = {x};
+    int e = (int)(u.i >> 23 & 0xff) - 0x7f;
+    uint32_t m;
+
+    if (e >= 23)
+        return x;
+    if (e >= 0) {
+        m = 0x007fffff >> e;
+        if ((u.i & m) == 0)
+            return x;
+        just_do_it(float) _x = (x + 0x1p120f);
+        if (u.i >> 31)
+            u.i += m;
+        u.i &= ~m;
+    } else {
+        just_do_it(float) _x = (x + 0x1p120f);
+        if (u.i >> 31 == 0)
+            u.i = 0;
+        else if (u.i << 1)
+            u.f = -1.0;
+    }
+    return u.f;
+}
+
+long double floorl(long double x) {
+    return floor(x);
+}
+
+double trunc(double x) {
+    union {double f; uint64_t i;} u = {x};
+    int e = (int)(u.i >> 52 & 0x7ff) - 0x3ff + 12;
+    uint64_t m;
+
+    if (e >= 52 + 12)
+        return x;
+    if (e < 12)
+        e = 1;
+    m = -1ULL >> e;
+    if ((u.i & m) == 0)
+        return x;
+    just_do_it(double) _x = (x + 0x1p120f);
+    u.i &= ~m;
+    return u.f;
+}
+
+float truncf(float x) {
+    union {float f; uint32_t i;} u = {x};
+    int e = (int)(u.i >> 23 & 0xff) - 0x7f + 9;
+    uint32_t m;
+
+    if (e >= 23 + 9)
+        return x;
+    if (e < 9)
+        e = 1;
+    m = -1U >> e;
+    if ((u.i & m) == 0)
+        return x;
+    just_do_it(float) _x = (x + 0x1p120f);
+    u.i &= ~m;
+    return u.f;
+}
+
+long double truncl(long double x) {
+    return trunc(x);
+}
--- a/test/test_math.c
+++ b/test/test_math.c
@ -3,6 +3,7 @@
 #include <math.h>
 #include <float.h>
 #include <fenv.h>
+#include <inttypes.h>

 const char *show_classification(double x) {
    switch(fpclassify(x)) {
@ -55,22 +56,52 @@ int main() {
    printf("rint(1.1) = %f\n", rint(1.1));
    if(fetestexcept(FE_INEXACT)) printf("    FE_INEXACT was raised\n");

-
+    printf("\n\n=== nextafter === \n");
    float from1 = 0, to1 = nextafterf(from1, 1);
    printf("The next representable float after %f is %f\n", from1, to1);
    float from2 = 1, to2 = nextafterf(from2, 2);
    printf("The next representable float after %f is %f\n", from2, to2);
-    {
-        #pragma STDC FENV_ACCESS ON
-        feclearexcept(FE_ALL_EXCEPT);
-        double from4 = DBL_MAX, to4 = nextafter(from4, INFINITY);
-        printf("The next representable double after %f is %f\n",
-               from4, to4);
-        if(fetestexcept(FE_OVERFLOW)) printf("   raised FE_OVERFLOW\n");
-        if(fetestexcept(FE_INEXACT))  printf("   raised FE_INEXACT\n");
-    }
    float from5 = 0.0, to5 = nextafter(from5, -0.0);
    printf("nextafter(+0.0, -0.0) gives %f (%f)\n", to5, to5);

+    printf("\n\n=== ceil === \n");
+    printf("ceil(+2.4) = %f\n", ceil(2.4));
+    printf("ceil(-2.4) = %f\n", ceil(-2.4));
+    printf("ceil(-0.0) = %f\n", ceil(-0.0));
+    printf("ceil(-Inf) = %f\n", ceil(-INFINITY));
+
+    printf("\n\n=== floor === \n");
+    printf("floor(+2.7) = %f\n", floor(2.7));
+    printf("floor(-2.7) = %f\n", floor(-2.7));
+    printf("floor(-0.0) = %f\n", floor(-0.0));
+    printf("floor(-Inf) = %f\n", floor(-INFINITY));
+
+    printf("\n\n=== trunk === \n");
+    printf("trunc(+2.7) = %f\n", trunc(2.7));
+    printf("trunc(-2.7) = %f\n", trunc(-2.7));
+    printf("trunc(-0.0) = %f\n", trunc(-0.0));
+    printf("trunc(-Inf) = %f\n", trunc(-INFINITY));
+
+    printf("\n\n=== round === \n");
+    printf("round(+2.3)  = %f  ",  round(2.3));
+    printf("round(+2.5)  = %f  ",  round(2.5));
+    printf("round(+2.7)  = %f\n",  round(2.7));
+    printf("round(-2.3)  = %f  ",  round(-2.3));
+    printf("round(-2.5)  = %f  ",  round(-2.5));
+    printf("round(-2.7)  = %f\n",  round(-2.7));
+    printf("round(-0.0)  = %f\n",  round(-0.0));
+    printf("round(-Inf)  = %f\n",  round(-INFINITY));
+    printf("lround(+2.3) = %ld  ", lround(2.3));
+    printf("lround(+2.5) = %ld  ", lround(2.5));
+    printf("lround(+2.7) = %ld\n", lround(2.7));
+    printf("lround(-2.3) = %ld  ", lround(-2.3));
+    printf("lround(-2.5) = %ld  ", lround(-2.5));
+    printf("lround(-2.7) = %ld\n", lround(-2.7));
+    printf("lround(-0.0) = %ld\n", lround(-0.0));
+    printf("lround(-Inf) = %ld\n", lround(-INFINITY));
+    feclearexcept(FE_ALL_EXCEPT);
+    printf("lround(LONG_MAX+1.5) = %ld\n", lround(LONG_MAX+1.5));
+    if(fetestexcept(FE_INVALID)) printf("    FE_INVALID was raised\n");
+
    return 0;
 }