math.h rounding functions

This commit is contained in:
bumbread 2022-06-11 17:17:24 +11:00
parent 2320a22706
commit effd0acb63
3 changed files with 450 additions and 191 deletions

View File

@ -81,184 +81,3 @@ long double nanl(const char *s) {
return NAN;
}
double rint(double x) {
static const double_t toint = 1/DBL_EPSILON;
union {double f; uint64_t i;} u = {x};
int e = u.i>>52 & 0x7ff;
int s = u.i>>63;
double y;
if (e >= 0x3ff+52) return x;
if (s) y = x - toint + toint;
else y = x + toint - toint;
if (y == 0) return s ? -0.0 : +0.0;
return y;
}
float rintf(float x) {
static const float toint = 1/FLT_EPSILON;
union {float f; uint32_t i;} u = {x};
int e = u.i>>23 & 0xff;
int s = u.i>>31;
float y;
if (e >= 0x7f+23) return x;
if (s) y = x - toint + toint;
else y = x + toint - toint;
if (y == 0) return s ? -0.0f : 0.0f;
return y;
}
long double rintl(long double x) {
return rint(x);
}
double nearbyint(double x) {
#pragma STDC FENV_ACCESS ON
int e = fetestexcept(FE_INEXACT);
x = rint(x);
if (!e) feclearexcept(FE_INEXACT);
return x;
}
float nearbyintf(float x) {
#pragma STDC FENV_ACCESS ON
int e = fetestexcept(FE_INEXACT);
x = rintf(x);
if (!e) feclearexcept(FE_INEXACT);
return x;
}
long double nearbyintl(long double x) {
return nearbyint(x);
}
double nextafter(double x, double y) {
union {double f; uint64_t i;} ux={x}, uy={y};
uint64_t ax, ay;
int e;
if (isnan(x) || isnan(y)) return x + y;
if (ux.i == uy.i) return y;
ax = ux.i & -1ULL/2;
ay = uy.i & -1ULL/2;
if (ax == 0) {
if (ay == 0) return y;
ux.i = (uy.i & 1ULL<<63) | 1;
} else if (ax > ay || ((ux.i ^ uy.i) & 1ULL<<63)) {
ux.i--;
}
else {
ux.i++;
}
e = ux.i >> 52 & 0x7ff;
/* raise overflow if ux.f is infinite and x is finite */
if (e == 0x7ff) just_do_it(float) _x = x+x;
/* raise underflow if ux.f is subnormal or zero */
if (e == 0) just_do_it(float) _x = x*x + ux.f*ux.f;
return ux.f;
}
float nextafterf(float x, float y) {
union {float f; uint32_t i;} ux={x}, uy={y};
uint32_t ax, ay, e;
if (isnan(x) || isnan(y)) return x + y;
if (ux.i == uy.i) return y;
ax = ux.i & 0x7fffffff;
ay = uy.i & 0x7fffffff;
if (ax == 0) {
if (ay == 0) return y;
ux.i = (uy.i & 0x80000000) | 1;
} else if (ax > ay || ((ux.i ^ uy.i) & 0x80000000)) {
ux.i--;
}
else {
ux.i++;
}
e = ux.i & 0x7f800000;
/* raise overflow if ux.f is infinite and x is finite */
if (e == 0x7f800000) just_do_it(float) _x = x+x;
/* raise underflow if ux.f is subnormal or zero */
if (e == 0) just_do_it(float) _x = x*x + ux.f*ux.f;
return ux.f;
}
long double nextafterl(long double x, long double y) {
return nextafter(x, y);
}
double nexttoward(double x, long double y) {
return nextafter(x, y);
}
float nexttowardf(float x, long double y) {
union {float f; uint32_t i;} ux = {x};
uint32_t e;
if (isnan(x) || isnan(y)) return x + y;
if (x == y) return y;
if (x == 0) {
ux.i = 1;
if (signbit(y)) ux.i |= 0x80000000;
} else if (x < y) {
if (signbit(x)) ux.i--;
else ux.i++;
} else {
if (signbit(x)) ux.i++;
else ux.i--;
}
e = ux.i & 0x7f800000;
/* raise overflow if ux.f is infinite and x is finite */
if (e == 0x7f800000) just_do_it(float) _x = x+x;
/* raise underflow if ux.f is subnormal or zero */
if (e == 0) just_do_it(float) _x = x*x + ux.f*ux.f;
return ux.f;
}
long double nexttowardl(long double x, long double y) {
return nextafterl(x, y);
}
double round(double x) {
static const double_t toint = 1/DBL_EPSILON;
union {double f; uint64_t i;} u = {x};
int e = u.i >> 52 & 0x7ff;
double_t y;
if (e >= 0x3ff+52) return x;
if (u.i >> 63) x = -x;
if (e < 0x3ff-1) {
/* raise inexact if x!=0 */
just_do_it(float) _x = x + toint;
return 0*u.f;
}
y = x + toint - toint - x;
if (y > 0.5) y = y + x - 1;
else if (y <= -0.5) y = y + x + 1;
else y = y + x;
if (u.i >> 63) y = -y;
return y;
}
float roundf(float x) {
static const double_t toint = 1/FLT_EPSILON;
union {float f; uint32_t i;} u = {x};
int e = u.i >> 23 & 0xff;
float_t y;
if (e >= 0x7f+23) return x;
if (u.i >> 31) x = -x;
if (e < 0x7f-1) {
just_do_it(float) _x = x + toint;
return 0*u.f;
}
y = x + toint - toint - x;
if (y > 0.5f) y = y + x - 1;
else if (y <= -0.5f) y = y + x + 1;
else y = y + x;
if (u.i >> 31) y = -y;
return y;
}
long double roundl(long double x) {
return round(x);
}

409
code/math/round.c Normal file
View File

@ -0,0 +1,409 @@
#include <math.h>
#include <stdint.h>
#include <fenv.h>
#include <float.h>
#include <limits.h>
#include <_compiler.h>
#if defined(_compiler_clang) || defined(_compiler_gnu)
#define just_do_it(t) __attribute__((unused)) volatile t
#endif
#define asuint64(x) ((union {double f; uint64_t i;}){x}).i
#define asdouble(x) ((union {double f; uint64_t i;}){x}).f
double nearbyint(double x) {
#pragma STDC FENV_ACCESS ON
int e = fetestexcept(FE_INEXACT);
x = rint(x);
if (!e) feclearexcept(FE_INEXACT);
return x;
}
float nearbyintf(float x) {
#pragma STDC FENV_ACCESS ON
int e = fetestexcept(FE_INEXACT);
x = rintf(x);
if (!e) feclearexcept(FE_INEXACT);
return x;
}
long double nearbyintl(long double x) {
return nearbyint(x);
}
double nextafter(double x, double y) {
union {double f; uint64_t i;} ux={x}, uy={y};
uint64_t ax, ay;
int e;
if (isnan(x) || isnan(y)) return x + y;
if (ux.i == uy.i) return y;
ax = ux.i & -1ULL/2;
ay = uy.i & -1ULL/2;
if (ax == 0) {
if (ay == 0) return y;
ux.i = (uy.i & 1ULL<<63) | 1;
} else if (ax > ay || ((ux.i ^ uy.i) & 1ULL<<63)) {
ux.i--;
}
else {
ux.i++;
}
e = ux.i >> 52 & 0x7ff;
/* raise overflow if ux.f is infinite and x is finite */
if (e == 0x7ff) just_do_it(float) _x = x+x;
/* raise underflow if ux.f is subnormal or zero */
if (e == 0) just_do_it(float) _x = x*x + ux.f*ux.f;
return ux.f;
}
float nextafterf(float x, float y) {
union {float f; uint32_t i;} ux={x}, uy={y};
uint32_t ax, ay, e;
if (isnan(x) || isnan(y)) return x + y;
if (ux.i == uy.i) return y;
ax = ux.i & 0x7fffffff;
ay = uy.i & 0x7fffffff;
if (ax == 0) {
if (ay == 0) return y;
ux.i = (uy.i & 0x80000000) | 1;
} else if (ax > ay || ((ux.i ^ uy.i) & 0x80000000)) {
ux.i--;
}
else {
ux.i++;
}
e = ux.i & 0x7f800000;
/* raise overflow if ux.f is infinite and x is finite */
if (e == 0x7f800000) just_do_it(float) _x = x+x;
/* raise underflow if ux.f is subnormal or zero */
if (e == 0) just_do_it(float) _x = x*x + ux.f*ux.f;
return ux.f;
}
long double nextafterl(long double x, long double y) {
return nextafter(x, y);
}
double nexttoward(double x, long double y) {
return nextafter(x, y);
}
float nexttowardf(float x, long double y) {
union {float f; uint32_t i;} ux = {x};
uint32_t e;
if (isnan(x) || isnan(y)) return x + y;
if (x == y) return y;
if (x == 0) {
ux.i = 1;
if (signbit(y)) ux.i |= 0x80000000;
} else if (x < y) {
if (signbit(x)) ux.i--;
else ux.i++;
} else {
if (signbit(x)) ux.i++;
else ux.i--;
}
e = ux.i & 0x7f800000;
/* raise overflow if ux.f is infinite and x is finite */
if (e == 0x7f800000) just_do_it(float) _x = x+x;
/* raise underflow if ux.f is subnormal or zero */
if (e == 0) just_do_it(float) _x = x*x + ux.f*ux.f;
return ux.f;
}
long double nexttowardl(long double x, long double y) {
return nextafterl(x, y);
}
double rint(double x) {
static const double_t toint = 1/DBL_EPSILON;
union {double f; uint64_t i;} u = {x};
int e = u.i>>52 & 0x7ff;
int s = u.i>>63;
double y;
if (e >= 0x3ff+52) return x;
if (s) y = x - toint + toint;
else y = x + toint - toint;
if (y == 0) return s ? -0.0 : +0.0;
return y;
}
float rintf(float x) {
static const float toint = 1/FLT_EPSILON;
union {float f; uint32_t i;} u = {x};
int e = u.i>>23 & 0xff;
int s = u.i>>31;
float y;
if (e >= 0x7f+23) return x;
if (s) y = x - toint + toint;
else y = x + toint - toint;
if (y == 0) return s ? -0.0f : 0.0f;
return y;
}
long double rintl(long double x) {
return rint(x);
}
#if LONG_MAX < 1U<<53 && defined(FE_INEXACT)
static long lrint_slow(double x)
{
#pragma STDC FENV_ACCESS ON
int e;
e = fetestexcept(FE_INEXACT);
x = rint(x);
if (!e && (x > LONG_MAX || x < LONG_MIN))
feclearexcept(FE_INEXACT);
/* conversion */
return x;
}
long lrint(double x)
{
uint32_t abstop = asuint64(x)>>32 & 0x7fffffff;
uint64_t sign = asuint64(x) & (1ULL << 63);
if (abstop < 0x41dfffff) {
/* |x| < 0x7ffffc00, no overflow */
double_t toint = asdouble(asuint64(1/DBL_EPSILON) | sign);
double_t y = x + toint - toint;
return (long)y;
}
return lrint_slow(x);
}
#else
long lrint(double x) {
return rint(x);
}
#endif
long lrintf(float x) {
return rintf(x);
}
long lrintl(long double x) {
return lrint(x);
}
long long llrint(double x) {
return rint(x);
}
long long llrintf(float x) {
return rintf(x);
}
long long llrintl(long double x) {
return llrint(x);
}
double round(double x) {
static const double_t toint = 1/DBL_EPSILON;
union {double f; uint64_t i;} u = {x};
int e = u.i >> 52 & 0x7ff;
double_t y;
if (e >= 0x3ff+52) return x;
if (u.i >> 63) x = -x;
if (e < 0x3ff-1) {
/* raise inexact if x!=0 */
just_do_it(float) _x = x + toint;
return 0*u.f;
}
y = x + toint - toint - x;
if (y > 0.5) y = y + x - 1;
else if (y <= -0.5) y = y + x + 1;
else y = y + x;
if (u.i >> 63) y = -y;
return y;
}
float roundf(float x) {
static const double_t toint = 1/FLT_EPSILON;
union {float f; uint32_t i;} u = {x};
int e = u.i >> 23 & 0xff;
float_t y;
if (e >= 0x7f+23) return x;
if (u.i >> 31) x = -x;
if (e < 0x7f-1) {
just_do_it(float) _x = x + toint;
return 0*u.f;
}
y = x + toint - toint - x;
if (y > 0.5f) y = y + x - 1;
else if (y <= -0.5f) y = y + x + 1;
else y = y + x;
if (u.i >> 31) y = -y;
return y;
}
long double roundl(long double x) {
return round(x);
}
long lround(double x) {
return round(x);
}
long lroundf(float x) {
return roundf(x);
}
long lroundl(long double x) {
return roundl(x);
}
long long llround(double x) {
return round(x);
}
long long llroundf(float x) {
return roundf(x);
}
long long llroundl(long double x) {
return roundl(x);
}
double ceil(double x) {
static const double_t toint = 1/DBL_EPSILON;
union {double f; uint64_t i;} u = {x};
int e = u.i >> 52 & 0x7ff;
double_t y;
if (e >= 0x3ff+52 || x == 0)
return x;
/* y = int(x) - x, where int(x) is an integer neighbor of x */
if (u.i >> 63)
y = x - toint + toint - x;
else
y = x + toint - toint - x;
/* special case because of non-nearest rounding modes */
if (e <= 0x3ff-1) {
just_do_it(double) _x = y;
return u.i >> 63 ? -0.0 : 1;
}
if (y < 0)
return x + y + 1;
return x + y;
}
float ceilf(float x) {
union {float f; uint32_t i;} u = {x};
int e = (int)(u.i >> 23 & 0xff) - 0x7f;
uint32_t m;
if (e >= 23)
return x;
if (e >= 0) {
m = 0x007fffff >> e;
if ((u.i & m) == 0)
return x;
just_do_it(float) _x = (x + 0x1p120f);
if (u.i >> 31 == 0)
u.i += m;
u.i &= ~m;
} else {
just_do_it(float) _x = (x + 0x1p120f);
if (u.i >> 31)
u.f = -0.0;
else if (u.i << 1)
u.f = 1.0;
}
return u.f;
}
long double ceill(long double x) {
return ceil(x);
}
double floor(double x) {
static const double_t toint = 1/DBL_EPSILON;
union {double f; uint64_t i;} u = {x};
int e = u.i >> 52 & 0x7ff;
double_t y;
if (e >= 0x3ff+52 || x == 0)
return x;
/* y = int(x) - x, where int(x) is an integer neighbor of x */
if (u.i >> 63)
y = x - toint + toint - x;
else
y = x + toint - toint - x;
/* special case because of non-nearest rounding modes */
if (e <= 0x3ff-1) {
just_do_it(double) _x = (y);
return u.i >> 63 ? -1 : 0;
}
if (y > 0)
return x + y - 1;
return x + y;
}
float floorf(float x) {
union {float f; uint32_t i;} u = {x};
int e = (int)(u.i >> 23 & 0xff) - 0x7f;
uint32_t m;
if (e >= 23)
return x;
if (e >= 0) {
m = 0x007fffff >> e;
if ((u.i & m) == 0)
return x;
just_do_it(float) _x = (x + 0x1p120f);
if (u.i >> 31)
u.i += m;
u.i &= ~m;
} else {
just_do_it(float) _x = (x + 0x1p120f);
if (u.i >> 31 == 0)
u.i = 0;
else if (u.i << 1)
u.f = -1.0;
}
return u.f;
}
long double floorl(long double x) {
return floor(x);
}
double trunc(double x) {
union {double f; uint64_t i;} u = {x};
int e = (int)(u.i >> 52 & 0x7ff) - 0x3ff + 12;
uint64_t m;
if (e >= 52 + 12)
return x;
if (e < 12)
e = 1;
m = -1ULL >> e;
if ((u.i & m) == 0)
return x;
just_do_it(double) _x = (x + 0x1p120f);
u.i &= ~m;
return u.f;
}
float truncf(float x) {
union {float f; uint32_t i;} u = {x};
int e = (int)(u.i >> 23 & 0xff) - 0x7f + 9;
uint32_t m;
if (e >= 23 + 9)
return x;
if (e < 9)
e = 1;
m = -1U >> e;
if ((u.i & m) == 0)
return x;
just_do_it(float) _x = (x + 0x1p120f);
u.i &= ~m;
return u.f;
}
long double truncl(long double x) {
return trunc(x);
}

View File

@ -3,6 +3,7 @@
#include <math.h>
#include <float.h>
#include <fenv.h>
#include <inttypes.h>
const char *show_classification(double x) {
switch(fpclassify(x)) {
@ -55,22 +56,52 @@ int main() {
printf("rint(1.1) = %f\n", rint(1.1));
if(fetestexcept(FE_INEXACT)) printf(" FE_INEXACT was raised\n");
printf("\n\n=== nextafter === \n");
float from1 = 0, to1 = nextafterf(from1, 1);
printf("The next representable float after %f is %f\n", from1, to1);
float from2 = 1, to2 = nextafterf(from2, 2);
printf("The next representable float after %f is %f\n", from2, to2);
{
#pragma STDC FENV_ACCESS ON
feclearexcept(FE_ALL_EXCEPT);
double from4 = DBL_MAX, to4 = nextafter(from4, INFINITY);
printf("The next representable double after %f is %f\n",
from4, to4);
if(fetestexcept(FE_OVERFLOW)) printf(" raised FE_OVERFLOW\n");
if(fetestexcept(FE_INEXACT)) printf(" raised FE_INEXACT\n");
}
float from5 = 0.0, to5 = nextafter(from5, -0.0);
printf("nextafter(+0.0, -0.0) gives %f (%f)\n", to5, to5);
printf("\n\n=== ceil === \n");
printf("ceil(+2.4) = %f\n", ceil(2.4));
printf("ceil(-2.4) = %f\n", ceil(-2.4));
printf("ceil(-0.0) = %f\n", ceil(-0.0));
printf("ceil(-Inf) = %f\n", ceil(-INFINITY));
printf("\n\n=== floor === \n");
printf("floor(+2.7) = %f\n", floor(2.7));
printf("floor(-2.7) = %f\n", floor(-2.7));
printf("floor(-0.0) = %f\n", floor(-0.0));
printf("floor(-Inf) = %f\n", floor(-INFINITY));
printf("\n\n=== trunk === \n");
printf("trunc(+2.7) = %f\n", trunc(2.7));
printf("trunc(-2.7) = %f\n", trunc(-2.7));
printf("trunc(-0.0) = %f\n", trunc(-0.0));
printf("trunc(-Inf) = %f\n", trunc(-INFINITY));
printf("\n\n=== round === \n");
printf("round(+2.3) = %f ", round(2.3));
printf("round(+2.5) = %f ", round(2.5));
printf("round(+2.7) = %f\n", round(2.7));
printf("round(-2.3) = %f ", round(-2.3));
printf("round(-2.5) = %f ", round(-2.5));
printf("round(-2.7) = %f\n", round(-2.7));
printf("round(-0.0) = %f\n", round(-0.0));
printf("round(-Inf) = %f\n", round(-INFINITY));
printf("lround(+2.3) = %ld ", lround(2.3));
printf("lround(+2.5) = %ld ", lround(2.5));
printf("lround(+2.7) = %ld\n", lround(2.7));
printf("lround(-2.3) = %ld ", lround(-2.3));
printf("lround(-2.5) = %ld ", lround(-2.5));
printf("lround(-2.7) = %ld\n", lround(-2.7));
printf("lround(-0.0) = %ld\n", lround(-0.0));
printf("lround(-Inf) = %ld\n", lround(-INFINITY));
feclearexcept(FE_ALL_EXCEPT);
printf("lround(LONG_MAX+1.5) = %ld\n", lround(LONG_MAX+1.5));
if(fetestexcept(FE_INVALID)) printf(" FE_INVALID was raised\n");
return 0;
}