#define asuint64(x) ((union {f64 f; uint64_t i;}){x}).i #define asdouble(x) ((union {f64 f; uint64_t i;}){x}).f #if defined(__GNUC__) || defined(__clang__) #define just_do_it(v) do{__attribute__((unused)) volatile f64 t = v;}while(0) #else #define just_do_it(v) do{volatile f64 t = v;}while(0) #endif f64 nearbyint(f64 x) { #pragma STDC FENV_ACCESS ON u64 bits = F64_BITS(x); i64 bexp = F64_BEXP(bits); u64 bmant = F64_MANT(bits); // 1. Get rid of special cases, exp = 0x7ff, and exp < 0x3ff // Return x unmodified if inf, nan if(bexp == 0x7ff) { return x; } int mode = fegetround(); // Get exponent for (integer_mantissa * 2^exp) representation i64 exp = bexp - 0x3ff - 52; int s = F64_SIGN(bits); // This value is 0 if no increment is required, and 1 if the absolute value // increases by 1 int c; { // Check if we need to round towards 0 or towards 1 // (assumes specific values in rounding modes in fenv.h) int a = (mode&2)>>1; int b = mode&1; int mask = ((a^b)<<1)|(a^b); int d = 2 - mode&mask; c = s ^ d; } // If the whole mantissa is after a point, such that the first digit is 0, // then the value is closer to 0 these values are all zeroes, subnormal // numbers and very small normal numbers if(exp < -53) { // Return 0 if exponent and mantissa are zero if(bexp == 0 && bmant == 0) { return x; } // For subnormal and normal numbers we round them either towards 0 or 1 // and then call it a day u64 new_bexp = (u64)((1-c)&0x3ff) << F64_MANT_BITS; u64 new_sign = (u64)s << 63; u64 new_bits = new_sign | new_bexp; return F64_CONS(new_bits); } // 2. Get fractional and whole bits of the mantissa u64 mant = bmant | ((u64)1 << 52); if(exp >= 0) { // Already an integer return x; } // if e.g. mantissa is 0b101.., and exponent is -2, the value is 0b101*2^-2 // or 0b1.01, meaning there are 2 fractional digits int nfrac_digs = -exp; u64 frac_mask = (((u64)1<<(nfrac_digs))-1); u64 frac_mant = mant & frac_mask; // The mantissas for 1.0 and 0.5 u64 one = (((u64)1<<(nfrac_digs))); u64 half = one >> 1; // 3. Round the float based on the value of c // we'll first fix up c to include other rounding modes c |= (mode == FE_UPWARD) & ((~s)&1); c |= (mode == FE_DOWNWARD) & s; c |= (mode == FE_TONEAREST) & (frac_mant >= half); // Drop fractional bits u64 new_mant = mant & ~frac_mant; // Add 1 to float if required if(c) { new_mant += one; if(new_mant > ((u64)1 << 53)) { new_mant >>= 1; exp += 1; } } new_mant &= F64_MANT_MASK; u64 new_bits = new_mant; new_bits |= (exp+0x3ff+52) << F64_MANT_BITS; new_bits |= (u64)s << (F64_MANT_BITS + F64_BEXP_BITS); f64 result = F64_CONS(new_bits); return result; } f32 nearbyintf(f32 x) { #pragma STDC FENV_ACCESS ON u64 bits = F32_BITS(x); i64 bexp = F32_BEXP(bits); u64 bmant = F32_MANT(bits); if(bexp == 0x7f) { return x; } int mode = fegetround(); i64 exp = bexp - 0x3f - 52; int s = F32_SIGN(bits); int c; { int a = (mode&2)>>1; int b = mode&1; int mask = ((a^b)<<1)|(a^b); int d = 2 - mode&mask; c = s ^ d; } if(exp < -24) { if(bexp == 0 && bmant == 0) { return x; } u64 new_bexp = (u64)((1-c)&0x3f) << F32_MANT_BITS; u64 new_sign = (u64)s << 63; u64 new_bits = new_sign | new_bexp; return F32_CONS(new_bits); } u64 mant = bmant | ((u64)1 << 23); if(exp >= 0) { return x; } int nfrac_digs = -exp; u64 frac_mask = (((u64)1<<(nfrac_digs))-1); u64 frac_mant = mant & frac_mask; u64 one = (((u64)1<<(nfrac_digs))); u64 half = one >> 1; c |= (mode == FE_UPWARD) & ((~s)&1); c |= (mode == FE_DOWNWARD) & s; c |= (mode == FE_TONEAREST) & (frac_mant >= half); u64 new_mant = mant & ~frac_mant; if(c) { new_mant += one; if(new_mant > ((u64)1 << 24)) { new_mant >>= 1; exp += 1; } } new_mant &= F32_MANT_MASK; u64 new_bits = new_mant; new_bits |= (exp+0x3f+23) << F32_MANT_BITS; new_bits |= (u64)s << (F32_MANT_BITS + F32_BEXP_BITS); f64 result = F32_CONS(new_bits); return result; } fl64 nearbyintl(fl64 x) { return nearbyint((f64)x); } f64 nextafter(f64 x, f64 y) { union {f64 f; uint64_t i;} ux={x}, uy={y}; uint64_t ax, ay; int e; if (isnan(x) || isnan(y)) return x + y; if (ux.i == uy.i) return y; ax = ux.i & -1ULL/2; ay = uy.i & -1ULL/2; if (ax == 0) { if (ay == 0) return y; ux.i = (uy.i & 1ULL<<63) | 1; } else if (ax > ay || ((ux.i ^ uy.i) & 1ULL<<63)) { ux.i--; } else { ux.i++; } e = ux.i >> 52 & 0x7f; /* raise overflow if ux.f is infinite and x is finite */ if (e == 0x7f) just_do_it(x+x); /* raise underflow if ux.f is subnormal or zero */ if (e == 0) just_do_it(x*x + ux.f*ux.f); return ux.f; } f32 nextafterf(f32 x, f32 y) { union {f32 f; uint32_t i;} ux={x}, uy={y}; uint32_t ax, ay, e; if (isnan(x) || isnan(y)) return x + y; if (ux.i == uy.i) return y; ax = ux.i & 0x7fffffff; ay = uy.i & 0x7fffffff; if (ax == 0) { if (ay == 0) return y; ux.i = (uy.i & 0x80000000) | 1; } else if (ax > ay || ((ux.i ^ uy.i) & 0x80000000)) { ux.i--; } else { ux.i++; } e = ux.i & 0x7f800000; /* raise overflow if ux.f is infinite and x is finite */ if (e == 0x7f800000) just_do_it(x+x); /* raise underflow if ux.f is subnormal or zero */ if (e == 0) just_do_it(x*x + ux.f*ux.f); return ux.f; } fl64 nextafterl(fl64 x, fl64 y) { return nextafter(x, y); } f64 nexttoward(f64 x, fl64 y) { return nextafter(x, y); } f32 nexttowardf(f32 x, fl64 y) { union {f32 f; uint32_t i;} ux = {x}; uint32_t e; if (isnan(x) || isnan(y)) return x + y; if (x == y) return y; if (x == 0) { ux.i = 1; if (signbit(y)) ux.i |= 0x80000000; } else if (x < y) { if (signbit(x)) ux.i--; else ux.i++; } else { if (signbit(x)) ux.i++; else ux.i--; } e = ux.i & 0x7f800000; /* raise overflow if ux.f is infinite and x is finite */ if (e == 0x7f800000) just_do_it(x+x); /* raise underflow if ux.f is subnormal or zero */ if (e == 0) just_do_it(x*x + ux.f*ux.f); return ux.f; } fl64 nexttowardl(fl64 x, fl64 y) { return nextafterl(x, y); } f64 rint(f64 x) { static const double_t toint = 1/DBL_EPSILON; union {f64 f; uint64_t i;} u = {x}; int e = u.i>>52 & 0x7ff; int s = u.i>>63; f64 y; if (e >= 0x3ff+52) return x; if (s) y = x - toint + toint; else y = x + toint - toint; if (y == 0) return s ? -0.0 : +0.0; return y; } f32 rintf(f32 x) { static const f32 toint = 1/FLT_EPSILON; union {f32 f; uint32_t i;} u = {x}; int e = u.i>>23 & 0xff; int s = u.i>>31; f32 y; if (e >= 0x7f+23) return x; if (s) y = x - toint + toint; else y = x + toint - toint; if (y == 0) return s ? -0.0f : 0.0f; return y; } fl64 rintl(fl64 x) { return rint(x); } #if LONG_MAX < 1U<<53 && defined(FE_INEXACT) static long lrint_slow(f64 x) { #pragma STDC FENV_ACCESS ON int e; e = fetestexcept(FE_INEXACT); x = rint(x); if (!e && (x > LONG_MAX || x < LONG_MIN)) feclearexcept(FE_INEXACT); /* conversion */ return x; } long lrint(f64 x) { uint32_t abstop = asuint64(x)>>32 & 0x7fffffff; uint64_t sign = asuint64(x) & (1ULL << 63); if (abstop < 0x41dfffff) { /* |x| < 0x7ffffc00, no overflow */ double_t toint = asdouble(asuint64(1/DBL_EPSILON) | sign); double_t y = x + toint - toint; return (long)y; } return lrint_slow(x); } #else long lrint(f64 x) { return rint(x); } #endif long lrintf(f32 x) { return rintf(x); } long lrintl(fl64 x) { return lrint(x); } long long llrint(f64 x) { return rint(x); } long long llrintf(f32 x) { return rintf(x); } long long llrintl(fl64 x) { return llrint(x); } f64 round(f64 x) { static const double_t toint = 1/DBL_EPSILON; union {f64 f; uint64_t i;} u = {x}; int e = u.i >> 52 & 0x7ff; double_t y; if (e >= 0x3ff+52) return x; if (u.i >> 63) x = -x; if (e < 0x3ff-1) { /* raise inexact if x!=0 */ just_do_it(x + toint); return 0*u.f; } y = x + toint - toint - x; if (y > 0.5) y = y + x - 1; else if (y <= -0.5) y = y + x + 1; else y = y + x; if (u.i >> 63) y = -y; return y; } f32 roundf(f32 x) { static const double_t toint = 1/FLT_EPSILON; union {f32 f; uint32_t i;} u = {x}; int e = u.i >> 23 & 0xff; float_t y; if (e >= 0x7f+23) return x; if (u.i >> 31) x = -x; if (e < 0x7f-1) { just_do_it(x + toint); return 0*u.f; } y = x + toint - toint - x; if (y > 0.5f) y = y + x - 1; else if (y <= -0.5f) y = y + x + 1; else y = y + x; if (u.i >> 31) y = -y; return y; } fl64 roundl(fl64 x) { return round(x); } long lround(f64 x) { return round(x); } long lroundf(f32 x) { return roundf(x); } long lroundl(fl64 x) { return roundl(x); } long long llround(f64 x) { return round(x); } long long llroundf(f32 x) { return roundf(x); } long long llroundl(fl64 x) { return roundl(x); } f64 ceil(f64 x) { static const double_t toint = 1/DBL_EPSILON; union {f64 f; uint64_t i;} u = {x}; int e = u.i >> 52 & 0x7ff; double_t y; if (e >= 0x3ff+52 || x == 0) return x; /* y = int(x) - x, where int(x) is an integer neighbor of x */ if (u.i >> 63) y = x - toint + toint - x; else y = x + toint - toint - x; /* special case because of non-nearest rounding modes */ if (e <= 0x3ff-1) { just_do_it(y); return u.i >> 63 ? -0.0 : 1; } if (y < 0) return x + y + 1; return x + y; } f32 ceilf(f32 x) { union {f32 f; uint32_t i;} u = {x}; int e = (int)(u.i >> 23 & 0xff) - 0x7f; uint32_t m; if (e >= 23) return x; if (e >= 0) { m = 0x007fffff >> e; if ((u.i & m) == 0) return x; just_do_it(x + 0x1p120f); if (u.i >> 31 == 0) u.i += m; u.i &= ~m; } else { just_do_it(x + 0x1p120f); if (u.i >> 31) u.f = -0.0; else if (u.i << 1) u.f = 1.0; } return u.f; } fl64 ceill(fl64 x) { return ceil(x); } f64 floor(f64 x) { static const double_t toint = 1/DBL_EPSILON; union {f64 f; uint64_t i;} u = {x}; int e = u.i >> 52 & 0x7ff; double_t y; if (e >= 0x3ff+52 || x == 0) return x; /* y = int(x) - x, where int(x) is an integer neighbor of x */ if (u.i >> 63) y = x - toint + toint - x; else y = x + toint - toint - x; /* special case because of non-nearest rounding modes */ if (e <= 0x3ff-1) { just_do_it(y); return u.i >> 63 ? -1 : 0; } if (y > 0) return x + y - 1; return x + y; } f32 floorf(f32 x) { union {f32 f; uint32_t i;} u = {x}; int e = (int)(u.i >> 23 & 0xff) - 0x7f; uint32_t m; if (e >= 23) return x; if (e >= 0) { m = 0x007fffff >> e; if ((u.i & m) == 0) return x; just_do_it(x + 0x1p120f); if (u.i >> 31) u.i += m; u.i &= ~m; } else { just_do_it(x + 0x1p120f); if (u.i >> 31 == 0) u.i = 0; else if (u.i << 1) u.f = -1.0; } return u.f; } fl64 floorl(fl64 x) { return floor(x); } f64 trunc(f64 x) { union {f64 f; uint64_t i;} u = {x}; int e = (int)(u.i >> 52 & 0x7ff) - 0x3ff + 12; uint64_t m; if (e >= 52 + 12) return x; if (e < 12) e = 1; m = -1ULL >> e; if ((u.i & m) == 0) return x; just_do_it(x + 0x1p120f); u.i &= ~m; return u.f; } f32 truncf(f32 x) { union {f32 f; uint32_t i;} u = {x}; int e = (int)(u.i >> 23 & 0xff) - 0x7f + 9; uint32_t m; if (e >= 23 + 9) return x; if (e < 9) e = 1; m = -1U >> e; if ((u.i & m) == 0) return x; just_do_it(x + 0x1p120f); u.i &= ~m; return u.f; } fl64 truncl(fl64 x) { return trunc(x); }