ciabatta/src/math/round.c

523 lines
13 KiB
C

#define asuint64(x) ((union {f64 f; uint64_t i;}){x}).i
#define asdouble(x) ((union {f64 f; uint64_t i;}){x}).f
#if defined(__GNUC__) || defined(__clang__)
#define just_do_it(v) do{__attribute__((unused)) volatile f64 t = v;}while(0)
#else
#define just_do_it(v) do{volatile f64 t = v;}while(0)
#endif
f64 nearbyint(f64 x) {
#pragma STDC FENV_ACCESS ON
u64 bits = F64_BITS(x);
i64 bexp = F64_BEXP(bits);
u64 bmant = F64_MANT(bits);
// 1. Get rid of special cases, exp = 0x7ff, and exp < 0x3ff
// Return x unmodified if inf, nan
if(bexp == 0x7ff) {
return x;
}
int mode = fegetround();
// Get exponent for (integer_mantissa * 2^exp) representation
i64 exp = bexp - 0x3ff - 52;
int s = F64_SIGN(bits);
// This value is 0 if no increment is required, and 1 if the absolute value
// increases by 1
int c;
{
// Check if we need to round towards 0 or towards 1
// (assumes specific values in rounding modes in fenv.h)
int a = (mode&2)>>1;
int b = mode&1;
int mask = ((a^b)<<1)|(a^b);
int d = 2 - mode&mask;
c = s ^ d;
}
// If the whole mantissa is after a point, such that the first digit is 0,
// then the value is closer to 0 these values are all zeroes, subnormal
// numbers and very small normal numbers
if(exp < -53) {
// Return 0 if exponent and mantissa are zero
if(bexp == 0 && bmant == 0) {
return x;
}
// For subnormal and normal numbers we round them either towards 0 or 1
// and then call it a day
u64 new_bexp = (u64)((1-c)&0x3ff) << F64_MANT_BITS;
u64 new_sign = (u64)s << 63;
u64 new_bits = new_sign | new_bexp;
return F64_CONS(new_bits);
}
// 2. Get fractional and whole bits of the mantissa
u64 mant = bmant | ((u64)1 << 52);
if(exp >= 0) {
// Already an integer
return x;
}
// if e.g. mantissa is 0b101.., and exponent is -2, the value is 0b101*2^-2
// or 0b1.01, meaning there are 2 fractional digits
int nfrac_digs = -exp;
u64 frac_mask = (((u64)1<<(nfrac_digs))-1);
u64 frac_mant = mant & frac_mask;
// The mantissas for 1.0 and 0.5
u64 one = (((u64)1<<(nfrac_digs)));
u64 half = one >> 1;
// 3. Round the float based on the value of c
// we'll first fix up c to include other rounding modes
c |= (mode == FE_UPWARD) & ((~s)&1);
c |= (mode == FE_DOWNWARD) & s;
c |= (mode == FE_TONEAREST) & (frac_mant >= half);
// Drop fractional bits
u64 new_mant = mant & ~frac_mant;
// Add 1 to float if required
if(c) {
new_mant += one;
if(new_mant > ((u64)1 << 53)) {
new_mant >>= 1;
exp += 1;
}
}
new_mant &= F64_MANT_MASK;
u64 new_bits = new_mant;
new_bits |= (exp+0x3ff+52) << F64_MANT_BITS;
new_bits |= (u64)s << (F64_MANT_BITS + F64_BEXP_BITS);
f64 result = F64_CONS(new_bits);
return result;
}
f32 nearbyintf(f32 x) {
#pragma STDC FENV_ACCESS ON
u64 bits = F32_BITS(x);
i64 bexp = F32_BEXP(bits);
u64 bmant = F32_MANT(bits);
if(bexp == 0x7f) {
return x;
}
int mode = fegetround();
i64 exp = bexp - 0x3f - 52;
int s = F32_SIGN(bits);
int c;
{
int a = (mode&2)>>1;
int b = mode&1;
int mask = ((a^b)<<1)|(a^b);
int d = 2 - mode&mask;
c = s ^ d;
}
if(exp < -24) {
if(bexp == 0 && bmant == 0) {
return x;
}
u64 new_bexp = (u64)((1-c)&0x3f) << F32_MANT_BITS;
u64 new_sign = (u64)s << 63;
u64 new_bits = new_sign | new_bexp;
return F32_CONS(new_bits);
}
u64 mant = bmant | ((u64)1 << 23);
if(exp >= 0) {
return x;
}
int nfrac_digs = -exp;
u64 frac_mask = (((u64)1<<(nfrac_digs))-1);
u64 frac_mant = mant & frac_mask;
u64 one = (((u64)1<<(nfrac_digs)));
u64 half = one >> 1;
c |= (mode == FE_UPWARD) & ((~s)&1);
c |= (mode == FE_DOWNWARD) & s;
c |= (mode == FE_TONEAREST) & (frac_mant >= half);
u64 new_mant = mant & ~frac_mant;
if(c) {
new_mant += one;
if(new_mant > ((u64)1 << 24)) {
new_mant >>= 1;
exp += 1;
}
}
new_mant &= F32_MANT_MASK;
u64 new_bits = new_mant;
new_bits |= (exp+0x3f+23) << F32_MANT_BITS;
new_bits |= (u64)s << (F32_MANT_BITS + F32_BEXP_BITS);
f64 result = F32_CONS(new_bits);
return result;
}
fl64 nearbyintl(fl64 x) {
return nearbyint((f64)x);
}
f64 nextafter(f64 x, f64 y) {
union {f64 f; uint64_t i;} ux={x}, uy={y};
uint64_t ax, ay;
int e;
if (isnan(x) || isnan(y)) return x + y;
if (ux.i == uy.i) return y;
ax = ux.i & -1ULL/2;
ay = uy.i & -1ULL/2;
if (ax == 0) {
if (ay == 0) return y;
ux.i = (uy.i & 1ULL<<63) | 1;
} else if (ax > ay || ((ux.i ^ uy.i) & 1ULL<<63)) {
ux.i--;
}
else {
ux.i++;
}
e = ux.i >> 52 & 0x7f;
/* raise overflow if ux.f is infinite and x is finite */
if (e == 0x7f) just_do_it(x+x);
/* raise underflow if ux.f is subnormal or zero */
if (e == 0) just_do_it(x*x + ux.f*ux.f);
return ux.f;
}
f32 nextafterf(f32 x, f32 y) {
union {f32 f; uint32_t i;} ux={x}, uy={y};
uint32_t ax, ay, e;
if (isnan(x) || isnan(y)) return x + y;
if (ux.i == uy.i) return y;
ax = ux.i & 0x7fffffff;
ay = uy.i & 0x7fffffff;
if (ax == 0) {
if (ay == 0) return y;
ux.i = (uy.i & 0x80000000) | 1;
} else if (ax > ay || ((ux.i ^ uy.i) & 0x80000000)) {
ux.i--;
}
else {
ux.i++;
}
e = ux.i & 0x7f800000;
/* raise overflow if ux.f is infinite and x is finite */
if (e == 0x7f800000) just_do_it(x+x);
/* raise underflow if ux.f is subnormal or zero */
if (e == 0) just_do_it(x*x + ux.f*ux.f);
return ux.f;
}
fl64 nextafterl(fl64 x, fl64 y) {
return nextafter(x, y);
}
f64 nexttoward(f64 x, fl64 y) {
return nextafter(x, y);
}
f32 nexttowardf(f32 x, fl64 y) {
union {f32 f; uint32_t i;} ux = {x};
uint32_t e;
if (isnan(x) || isnan(y)) return x + y;
if (x == y) return y;
if (x == 0) {
ux.i = 1;
if (signbit(y)) ux.i |= 0x80000000;
} else if (x < y) {
if (signbit(x)) ux.i--;
else ux.i++;
} else {
if (signbit(x)) ux.i++;
else ux.i--;
}
e = ux.i & 0x7f800000;
/* raise overflow if ux.f is infinite and x is finite */
if (e == 0x7f800000) just_do_it(x+x);
/* raise underflow if ux.f is subnormal or zero */
if (e == 0) just_do_it(x*x + ux.f*ux.f);
return ux.f;
}
fl64 nexttowardl(fl64 x, fl64 y) {
return nextafterl(x, y);
}
f64 rint(f64 x) {
static const double_t toint = 1/DBL_EPSILON;
union {f64 f; uint64_t i;} u = {x};
int e = u.i>>52 & 0x7ff;
int s = u.i>>63;
f64 y;
if (e >= 0x3ff+52) return x;
if (s) y = x - toint + toint;
else y = x + toint - toint;
if (y == 0) return s ? -0.0 : +0.0;
return y;
}
f32 rintf(f32 x) {
static const f32 toint = 1/FLT_EPSILON;
union {f32 f; uint32_t i;} u = {x};
int e = u.i>>23 & 0xff;
int s = u.i>>31;
f32 y;
if (e >= 0x7f+23) return x;
if (s) y = x - toint + toint;
else y = x + toint - toint;
if (y == 0) return s ? -0.0f : 0.0f;
return y;
}
fl64 rintl(fl64 x) {
return rint(x);
}
#if LONG_MAX < 1U<<53 && defined(FE_INEXACT)
static long lrint_slow(f64 x)
{
#pragma STDC FENV_ACCESS ON
int e;
e = fetestexcept(FE_INEXACT);
x = rint(x);
if (!e && (x > LONG_MAX || x < LONG_MIN))
feclearexcept(FE_INEXACT);
/* conversion */
return x;
}
long lrint(f64 x)
{
uint32_t abstop = asuint64(x)>>32 & 0x7fffffff;
uint64_t sign = asuint64(x) & (1ULL << 63);
if (abstop < 0x41dfffff) {
/* |x| < 0x7ffffc00, no overflow */
double_t toint = asdouble(asuint64(1/DBL_EPSILON) | sign);
double_t y = x + toint - toint;
return (long)y;
}
return lrint_slow(x);
}
#else
long lrint(f64 x) {
return rint(x);
}
#endif
long lrintf(f32 x) {
return rintf(x);
}
long lrintl(fl64 x) {
return lrint(x);
}
long long llrint(f64 x) {
return rint(x);
}
long long llrintf(f32 x) {
return rintf(x);
}
long long llrintl(fl64 x) {
return llrint(x);
}
f64 round(f64 x) {
static const double_t toint = 1/DBL_EPSILON;
union {f64 f; uint64_t i;} u = {x};
int e = u.i >> 52 & 0x7ff;
double_t y;
if (e >= 0x3ff+52) return x;
if (u.i >> 63) x = -x;
if (e < 0x3ff-1) {
/* raise inexact if x!=0 */
just_do_it(x + toint);
return 0*u.f;
}
y = x + toint - toint - x;
if (y > 0.5) y = y + x - 1;
else if (y <= -0.5) y = y + x + 1;
else y = y + x;
if (u.i >> 63) y = -y;
return y;
}
f32 roundf(f32 x) {
static const double_t toint = 1/FLT_EPSILON;
union {f32 f; uint32_t i;} u = {x};
int e = u.i >> 23 & 0xff;
float_t y;
if (e >= 0x7f+23) return x;
if (u.i >> 31) x = -x;
if (e < 0x7f-1) {
just_do_it(x + toint);
return 0*u.f;
}
y = x + toint - toint - x;
if (y > 0.5f) y = y + x - 1;
else if (y <= -0.5f) y = y + x + 1;
else y = y + x;
if (u.i >> 31) y = -y;
return y;
}
fl64 roundl(fl64 x) {
return round(x);
}
long lround(f64 x) {
return round(x);
}
long lroundf(f32 x) {
return roundf(x);
}
long lroundl(fl64 x) {
return roundl(x);
}
long long llround(f64 x) {
return round(x);
}
long long llroundf(f32 x) {
return roundf(x);
}
long long llroundl(fl64 x) {
return roundl(x);
}
f64 ceil(f64 x) {
static const double_t toint = 1/DBL_EPSILON;
union {f64 f; uint64_t i;} u = {x};
int e = u.i >> 52 & 0x7ff;
double_t y;
if (e >= 0x3ff+52 || x == 0)
return x;
/* y = int(x) - x, where int(x) is an integer neighbor of x */
if (u.i >> 63)
y = x - toint + toint - x;
else
y = x + toint - toint - x;
/* special case because of non-nearest rounding modes */
if (e <= 0x3ff-1) {
just_do_it(y);
return u.i >> 63 ? -0.0 : 1;
}
if (y < 0)
return x + y + 1;
return x + y;
}
f32 ceilf(f32 x) {
union {f32 f; uint32_t i;} u = {x};
int e = (int)(u.i >> 23 & 0xff) - 0x7f;
uint32_t m;
if (e >= 23)
return x;
if (e >= 0) {
m = 0x007fffff >> e;
if ((u.i & m) == 0)
return x;
just_do_it(x + 0x1p120f);
if (u.i >> 31 == 0)
u.i += m;
u.i &= ~m;
} else {
just_do_it(x + 0x1p120f);
if (u.i >> 31)
u.f = -0.0;
else if (u.i << 1)
u.f = 1.0;
}
return u.f;
}
fl64 ceill(fl64 x) {
return ceil(x);
}
f64 floor(f64 x) {
static const double_t toint = 1/DBL_EPSILON;
union {f64 f; uint64_t i;} u = {x};
int e = u.i >> 52 & 0x7ff;
double_t y;
if (e >= 0x3ff+52 || x == 0)
return x;
/* y = int(x) - x, where int(x) is an integer neighbor of x */
if (u.i >> 63)
y = x - toint + toint - x;
else
y = x + toint - toint - x;
/* special case because of non-nearest rounding modes */
if (e <= 0x3ff-1) {
just_do_it(y);
return u.i >> 63 ? -1 : 0;
}
if (y > 0)
return x + y - 1;
return x + y;
}
f32 floorf(f32 x) {
union {f32 f; uint32_t i;} u = {x};
int e = (int)(u.i >> 23 & 0xff) - 0x7f;
uint32_t m;
if (e >= 23)
return x;
if (e >= 0) {
m = 0x007fffff >> e;
if ((u.i & m) == 0)
return x;
just_do_it(x + 0x1p120f);
if (u.i >> 31)
u.i += m;
u.i &= ~m;
} else {
just_do_it(x + 0x1p120f);
if (u.i >> 31 == 0)
u.i = 0;
else if (u.i << 1)
u.f = -1.0;
}
return u.f;
}
fl64 floorl(fl64 x) {
return floor(x);
}
f64 trunc(f64 x) {
union {f64 f; uint64_t i;} u = {x};
int e = (int)(u.i >> 52 & 0x7ff) - 0x3ff + 12;
uint64_t m;
if (e >= 52 + 12)
return x;
if (e < 12)
e = 1;
m = -1ULL >> e;
if ((u.i & m) == 0)
return x;
just_do_it(x + 0x1p120f);
u.i &= ~m;
return u.f;
}
f32 truncf(f32 x) {
union {f32 f; uint32_t i;} u = {x};
int e = (int)(u.i >> 23 & 0xff) - 0x7f + 9;
uint32_t m;
if (e >= 23 + 9)
return x;
if (e < 9)
e = 1;
m = -1U >> e;
if ((u.i & m) == 0)
return x;
just_do_it(x + 0x1p120f);
u.i &= ~m;
return u.f;
}
fl64 truncl(fl64 x) {
return trunc(x);
}