Make sure printf %a works for all floats

This commit is contained in:
bumbread 2022-08-07 17:24:37 +11:00
parent 5e96c041ba
commit ee2b02a5c4
6 changed files with 270 additions and 362 deletions

View File

@ -38,21 +38,19 @@
#include "intrin.h" #include "intrin.h"
// Dependencies
#include "util.c"
#include "conv/decfloat/decfloat.c"
// Platform-independent stuff // Platform-independent stuff
#include "fmt/gen_fmt.c" #include "util.c"
#include "math/basic.c" #include "math/basic.c"
#include "math/division.c" #include "math/division.c"
#include "math/gen_math.c" #include "math/gen_math.c"
#include "math/bits.c" #include "math/bits.c"
#include "math/round.c" #include "math/round.c"
#include "conv/decfloat/decfloat.c"
#include "conv/digits.c" #include "conv/digits.c"
#include "conv/strpfx.c" #include "conv/strpfx.c"
#include "conv/int.c" #include "conv/int.c"
#include "conv/float.c" #include "conv/float.c"
#include "fmt/gen_fmt.c"
#include "stdlib/algorithm.c" #include "stdlib/algorithm.c"
#include "stdlib/multibyte.c" #include "stdlib/multibyte.c"
#include "stdlib/random.c" #include "stdlib/random.c"

View File

@ -3,6 +3,7 @@
typedef struct decfloat_t decfloat_t; typedef struct decfloat_t decfloat_t;
struct decfloat_t { struct decfloat_t {
u64 sign;
u64 exponent; u64 exponent;
i64 mantissa; i64 mantissa;
}; };
@ -18,10 +19,10 @@ static const char DIGIT_TABLE[200] = {
"75767778798081828384858687888990919293949596979899" "75767778798081828384858687888990919293949596979899"
}; };
static inline uint32_t pow5Factor(uint64_t value) { static inline u32 pow5Factor(u64 value) {
const uint64_t m_inv_5 = 14757395258967641293u; // 5 * m_inv_5 = 1 (mod 2^64) const u64 m_inv_5 = 14757395258967641293u; // 5 * m_inv_5 = 1 (mod 2^64)
const uint64_t n_div_5 = 3689348814741910323u; // #{ n | n = 0 (mod 2^64) } = 2^64 / 5 const u64 n_div_5 = 3689348814741910323u; // #{ n | n = 0 (mod 2^64) } = 2^64 / 5
uint32_t count = 0; u32 count = 0;
for (;;) { for (;;) {
value *= m_inv_5; value *= m_inv_5;
if (value > n_div_5) if (value > n_div_5)
@ -32,38 +33,38 @@ static inline uint32_t pow5Factor(uint64_t value) {
} }
// Returns true if value is divisible by 5^p. // Returns true if value is divisible by 5^p.
static inline bool multipleOfPowerOf5(const uint64_t value, const uint32_t p) { static inline bool multipleOfPowerOf5(const u64 value, const u32 p) {
// I tried a case distinction on p, but there was no performance difference. // I tried a case distinction on p, but there was no performance difference.
return pow5Factor(value) >= p; return pow5Factor(value) >= p;
} }
// Returns true if value is divisible by 2^p. // Returns true if value is divisible by 2^p.
static inline bool multipleOfPowerOf2(const uint64_t value, const uint32_t p) { static inline bool multipleOfPowerOf2(const u64 value, const u32 p) {
// __builtin_ctzll doesn't appear to be faster here. // __builtin_ctzll doesn't appear to be faster here.
return (value & ((1ull << p) - 1)) == 0; return (value & ((1ull << p) - 1)) == 0;
} }
static inline uint64_t umul128(const uint64_t a, const uint64_t b, uint64_t* const productHi) { static inline u64 umul128(const u64 a, const u64 b, u64* const productHi) {
return _umul128(a, b, productHi); return _umul128(a, b, productHi);
} }
static inline uint64_t umulh(const uint64_t a, const uint64_t b) { static inline u64 umulh(const u64 a, const u64 b) {
uint64_t hi; u64 hi;
umul128(a, b, &hi); umul128(a, b, &hi);
return hi; return hi;
} }
static inline uint64_t shiftright128(const uint64_t lo, const uint64_t hi, const uint32_t dist) { static inline u64 shiftright128(const u64 lo, const u64 hi, const u32 dist) {
return __shiftright128(lo, hi, (unsigned char) dist); return __shiftright128(lo, hi, (unsigned char) dist);
} }
static inline uint64_t mulShift64(const uint64_t m, const uint64_t* const mul, const int32_t j) { static inline u64 mulShift64(const u64 m, const u64* const mul, const int32_t j) {
// m is maximum 55 bits // m is maximum 55 bits
uint64_t high1; // 128 u64 high1; // 128
const uint64_t low1 = umul128(m, mul[1], &high1); // 64 const u64 low1 = umul128(m, mul[1], &high1); // 64
uint64_t high0; // 64 u64 high0; // 64
umul128(m, mul[0], &high0); // 0 umul128(m, mul[0], &high0); // 0
const uint64_t sum = high0 + low1; const u64 sum = high0 + low1;
if (sum < high0) { if (sum < high0) {
++high1; // overflow into high1 ++high1; // overflow into high1
} }
@ -71,23 +72,23 @@ static inline uint64_t mulShift64(const uint64_t m, const uint64_t* const mul, c
} }
static inline uint64_t mulShiftAll64(const uint64_t m, const uint64_t* const mul, const int32_t j, static inline u64 mulShiftAll64(const u64 m, const u64* const mul, const int32_t j,
uint64_t* const vp, uint64_t* const vm, const uint32_t mmShift) { u64* const vp, u64* const vm, const u32 mmShift) {
*vp = mulShift64(4 * m + 2, mul, j); *vp = mulShift64(4 * m + 2, mul, j);
*vm = mulShift64(4 * m - 1 - mmShift, mul, j); *vm = mulShift64(4 * m - 1 - mmShift, mul, j);
return mulShift64(4 * m, mul, j); return mulShift64(4 * m, mul, j);
} }
// Returns floor(log_10(2^e)); requires 0 <= e <= 1650. // Returns floor(log_10(2^e)); requires 0 <= e <= 1650.
static inline uint32_t log10Pow2(const int32_t e) { static inline u32 log10Pow2(const int32_t e) {
// The first value this approximation fails for is 2^1651 which is just greater than 10^297. // The first value this approximation fails for is 2^1651 which is just greater than 10^297.
return (((uint32_t) e) * 78913) >> 18; return (((u32) e) * 78913) >> 18;
} }
// Returns floor(log_10(5^e)); requires 0 <= e <= 2620. // Returns floor(log_10(5^e)); requires 0 <= e <= 2620.
static inline uint32_t log10Pow5(const int32_t e) { static inline u32 log10Pow5(const int32_t e) {
// The first value this approximation fails for is 5^2621 which is just greater than 10^1832. // The first value this approximation fails for is 5^2621 which is just greater than 10^1832.
return (((uint32_t) e) * 732923) >> 20; return (((u32) e) * 732923) >> 20;
} }
// Returns e == 0 ? 1 : ceil(log_2(5^e)); requires 0 <= e <= 3528. // Returns e == 0 ? 1 : ceil(log_2(5^e)); requires 0 <= e <= 3528.
@ -95,10 +96,10 @@ static inline int32_t pow5bits(const int32_t e) {
// This approximation works up to the point that the multiplication overflows at e = 3529. // This approximation works up to the point that the multiplication overflows at e = 3529.
// If the multiplication were done in 64 bits, it would fail at 5^4004 which is just greater // If the multiplication were done in 64 bits, it would fail at 5^4004 which is just greater
// than 2^9297. // than 2^9297.
return (int32_t) (((((uint32_t) e) * 1217359) >> 19) + 1); return (int32_t) (((((u32) e) * 1217359) >> 19) + 1);
} }
static inline uint32_t decimalLength17(const uint64_t v) { static inline u32 decimalLength17(const u64 v) {
// This is slightly faster than a loop. // This is slightly faster than a loop.
// The average output length is 16.38 digits, so we check high-to-low. // The average output length is 16.38 digits, so we check high-to-low.
// Function precondition: v is not an 18, 19, or 20-digit number. // Function precondition: v is not an 18, 19, or 20-digit number.
@ -122,25 +123,25 @@ static inline uint32_t decimalLength17(const uint64_t v) {
return 1; return 1;
} }
static inline uint64_t div5(const uint64_t x) { static inline u64 div5(const u64 x) {
return umulh(x, 0xCCCCCCCCCCCCCCCDu) >> 2; return umulh(x, 0xCCCCCCCCCCCCCCCDu) >> 2;
} }
static inline uint64_t div10(const uint64_t x) { static inline u64 div10(const u64 x) {
return umulh(x, 0xCCCCCCCCCCCCCCCDu) >> 3; return umulh(x, 0xCCCCCCCCCCCCCCCDu) >> 3;
} }
static inline uint64_t div100(const uint64_t x) { static inline u64 div100(const u64 x) {
return umulh(x >> 2, 0x28F5C28F5C28F5C3u) >> 2; return umulh(x >> 2, 0x28F5C28F5C28F5C3u) >> 2;
} }
static inline uint64_t div1e8(const uint64_t x) { static inline u64 div1e8(const u64 x) {
return umulh(x, 0xABCC77118461CEFDu) >> 26; return umulh(x, 0xABCC77118461CEFDu) >> 26;
} }
static decfloat_t dtodecfloat(const uint64_t ieeeMantissa, const uint32_t ieeeExponent) { static decfloat_t ieee_to_decimal(u64 sign, u64 ieeeMantissa, u32 ieeeExponent) {
int32_t e2; int32_t e2;
uint64_t m2; u64 m2;
if (ieeeExponent == 0) { if (ieeeExponent == 0) {
// We subtract 2 so that the bounds computation has 2 additional bits. // We subtract 2 so that the bounds computation has 2 additional bits.
e2 = 1 - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2; e2 = 1 - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2;
@ -153,22 +154,22 @@ static decfloat_t dtodecfloat(const uint64_t ieeeMantissa, const uint32_t ieeeEx
const bool acceptBounds = even; const bool acceptBounds = even;
// Step 2: Determine the interval of valid decimal representations. // Step 2: Determine the interval of valid decimal representations.
const uint64_t mv = 4 * m2; const u64 mv = 4 * m2;
// Implicit bool -> int conversion. True is 1, false is 0. // Implicit bool -> int conversion. True is 1, false is 0.
const uint32_t mmShift = ieeeMantissa != 0 || ieeeExponent <= 1; const u32 mmShift = ieeeMantissa != 0 || ieeeExponent <= 1;
// We would compute mp and mm like this: // We would compute mp and mm like this:
// uint64_t mp = 4 * m2 + 2; // u64 mp = 4 * m2 + 2;
// uint64_t mm = mv - 1 - mmShift; // u64 mm = mv - 1 - mmShift;
// Step 3: Convert to a decimal power base using 128-bit arithmetic. // Step 3: Convert to a decimal power base using 128-bit arithmetic.
uint64_t vr, vp, vm; u64 vr, vp, vm;
int32_t e10; int32_t e10;
bool vmIsTrailingZeros = false; bool vmIsTrailingZeros = false;
bool vrIsTrailingZeros = false; bool vrIsTrailingZeros = false;
if (e2 >= 0) { if (e2 >= 0) {
// I tried special-casing q == 0, but there was no effect on performance. // I tried special-casing q == 0, but there was no effect on performance.
// This expression is slightly faster than max(0, log10Pow2(e2) - 1). // This expression is slightly faster than max(0, log10Pow2(e2) - 1).
const uint32_t q = log10Pow2(e2) - (e2 > 3); const u32 q = log10Pow2(e2) - (e2 > 3);
e10 = (int32_t) q; e10 = (int32_t) q;
const int32_t k = DOUBLE_POW5_INV_BITCOUNT + pow5bits((int32_t) q) - 1; const int32_t k = DOUBLE_POW5_INV_BITCOUNT + pow5bits((int32_t) q) - 1;
const int32_t i = -e2 + (int32_t) q + k; const int32_t i = -e2 + (int32_t) q + k;
@ -177,7 +178,7 @@ static decfloat_t dtodecfloat(const uint64_t ieeeMantissa, const uint32_t ieeeEx
// This should use q <= 22, but I think 21 is also safe. Smaller values // This should use q <= 22, but I think 21 is also safe. Smaller values
// may still be safe, but it's more difficult to reason about them. // may still be safe, but it's more difficult to reason about them.
// Only one of mp, mv, and mm can be a multiple of 5, if any. // Only one of mp, mv, and mm can be a multiple of 5, if any.
const uint32_t mvMod5 = ((uint32_t) mv) - 5 * ((uint32_t) div5(mv)); const u32 mvMod5 = ((u32) mv) - 5 * ((u32) div5(mv));
if (mvMod5 == 0) { if (mvMod5 == 0) {
vrIsTrailingZeros = multipleOfPowerOf5(mv, q); vrIsTrailingZeros = multipleOfPowerOf5(mv, q);
} else if (acceptBounds) { } else if (acceptBounds) {
@ -192,7 +193,7 @@ static decfloat_t dtodecfloat(const uint64_t ieeeMantissa, const uint32_t ieeeEx
} }
} else { } else {
// This expression is slightly faster than max(0, log10Pow5(-e2) - 1). // This expression is slightly faster than max(0, log10Pow5(-e2) - 1).
const uint32_t q = log10Pow5(-e2) - (-e2 > 1); const u32 q = log10Pow5(-e2) - (-e2 > 1);
e10 = (int32_t) q + e2; e10 = (int32_t) q + e2;
const int32_t i = -e2 - (int32_t) q; const int32_t i = -e2 - (int32_t) q;
const int32_t k = pow5bits(i) - DOUBLE_POW5_BITCOUNT; const int32_t k = pow5bits(i) - DOUBLE_POW5_BITCOUNT;
@ -220,19 +221,19 @@ static decfloat_t dtodecfloat(const uint64_t ieeeMantissa, const uint32_t ieeeEx
// Step 4: Find the shortest decimal representation in the interval of valid representations. // Step 4: Find the shortest decimal representation in the interval of valid representations.
int32_t removed = 0; int32_t removed = 0;
uint8_t lastRemovedDigit = 0; uint8_t lastRemovedDigit = 0;
uint64_t output; u64 output;
// On average, we remove ~2 digits. // On average, we remove ~2 digits.
if (vmIsTrailingZeros || vrIsTrailingZeros) { if (vmIsTrailingZeros || vrIsTrailingZeros) {
// General case, which happens rarely (~0.7%). // General case, which happens rarely (~0.7%).
for (;;) { for (;;) {
const uint64_t vpDiv10 = div10(vp); const u64 vpDiv10 = div10(vp);
const uint64_t vmDiv10 = div10(vm); const u64 vmDiv10 = div10(vm);
if (vpDiv10 <= vmDiv10) { if (vpDiv10 <= vmDiv10) {
break; break;
} }
const uint32_t vmMod10 = ((uint32_t) vm) - 10 * ((uint32_t) vmDiv10); const u32 vmMod10 = ((u32) vm) - 10 * ((u32) vmDiv10);
const uint64_t vrDiv10 = div10(vr); const u64 vrDiv10 = div10(vr);
const uint32_t vrMod10 = ((uint32_t) vr) - 10 * ((uint32_t) vrDiv10); const u32 vrMod10 = ((u32) vr) - 10 * ((u32) vrDiv10);
vmIsTrailingZeros &= vmMod10 == 0; vmIsTrailingZeros &= vmMod10 == 0;
vrIsTrailingZeros &= lastRemovedDigit == 0; vrIsTrailingZeros &= lastRemovedDigit == 0;
lastRemovedDigit = (uint8_t) vrMod10; lastRemovedDigit = (uint8_t) vrMod10;
@ -243,14 +244,14 @@ static decfloat_t dtodecfloat(const uint64_t ieeeMantissa, const uint32_t ieeeEx
} }
if (vmIsTrailingZeros) { if (vmIsTrailingZeros) {
for (;;) { for (;;) {
const uint64_t vmDiv10 = div10(vm); const u64 vmDiv10 = div10(vm);
const uint32_t vmMod10 = ((uint32_t) vm) - 10 * ((uint32_t) vmDiv10); const u32 vmMod10 = ((u32) vm) - 10 * ((u32) vmDiv10);
if (vmMod10 != 0) { if (vmMod10 != 0) {
break; break;
} }
const uint64_t vpDiv10 = div10(vp); const u64 vpDiv10 = div10(vp);
const uint64_t vrDiv10 = div10(vr); const u64 vrDiv10 = div10(vr);
const uint32_t vrMod10 = ((uint32_t) vr) - 10 * ((uint32_t) vrDiv10); const u32 vrMod10 = ((u32) vr) - 10 * ((u32) vrDiv10);
vrIsTrailingZeros &= lastRemovedDigit == 0; vrIsTrailingZeros &= lastRemovedDigit == 0;
lastRemovedDigit = (uint8_t) vrMod10; lastRemovedDigit = (uint8_t) vrMod10;
vr = vrDiv10; vr = vrDiv10;
@ -268,11 +269,11 @@ static decfloat_t dtodecfloat(const uint64_t ieeeMantissa, const uint32_t ieeeEx
} else { } else {
// Specialized for the common case (~99.3%). Percentages below are relative to this. // Specialized for the common case (~99.3%). Percentages below are relative to this.
bool roundUp = false; bool roundUp = false;
const uint64_t vpDiv100 = div100(vp); const u64 vpDiv100 = div100(vp);
const uint64_t vmDiv100 = div100(vm); const u64 vmDiv100 = div100(vm);
if (vpDiv100 > vmDiv100) { // Optimization: remove two digits at a time (~86.2%). if (vpDiv100 > vmDiv100) { // Optimization: remove two digits at a time (~86.2%).
const uint64_t vrDiv100 = div100(vr); const u64 vrDiv100 = div100(vr);
const uint32_t vrMod100 = ((uint32_t) vr) - 100 * ((uint32_t) vrDiv100); const u32 vrMod100 = ((u32) vr) - 100 * ((u32) vrDiv100);
roundUp = vrMod100 >= 50; roundUp = vrMod100 >= 50;
vr = vrDiv100; vr = vrDiv100;
vp = vpDiv100; vp = vpDiv100;
@ -284,13 +285,13 @@ static decfloat_t dtodecfloat(const uint64_t ieeeMantissa, const uint32_t ieeeEx
// Loop iterations below (approximately), with optimization above: // Loop iterations below (approximately), with optimization above:
// 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02% // 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%
for (;;) { for (;;) {
const uint64_t vpDiv10 = div10(vp); const u64 vpDiv10 = div10(vp);
const uint64_t vmDiv10 = div10(vm); const u64 vmDiv10 = div10(vm);
if (vpDiv10 <= vmDiv10) { if (vpDiv10 <= vmDiv10) {
break; break;
} }
const uint64_t vrDiv10 = div10(vr); const u64 vrDiv10 = div10(vr);
const uint32_t vrMod10 = ((uint32_t) vr) - 10 * ((uint32_t) vrDiv10); const u32 vrMod10 = ((u32) vr) - 10 * ((u32) vrDiv10);
roundUp = vrMod10 >= 5; roundUp = vrMod10 >= 5;
vr = vrDiv10; vr = vrDiv10;
vp = vpDiv10; vp = vpDiv10;
@ -303,7 +304,16 @@ static decfloat_t dtodecfloat(const uint64_t ieeeMantissa, const uint32_t ieeeEx
const int32_t exp = e10 + removed; const int32_t exp = e10 + removed;
decfloat_t fd; decfloat_t fd;
fd.exponent = exp; fd.sign = sign;
fd.exponent = output? exp : 0;
fd.mantissa = output; fd.mantissa = output;
return fd; return fd;
} }
static decfloat_t f64_to_decimal(double d) {
u64 bits = F64_BITS(d);
u64 ieee_exp = F64_BEXP(bits);
u64 ieee_mant = F64_MANT(bits);
u64 sign = bits >> 63;
return ieee_to_decimal(sign, ieee_mant, ieee_exp);
}

View File

@ -30,3 +30,19 @@ static inline intl todigit(int c) {
return val; return val;
} }
static inline int fromdigit(int digit, int upper) {
int ch;
if(digit < 10) {
ch = digit+'0';
}
else {
if(upper) {
ch = digit-10+'A';
}
else {
ch = digit-10+'a';
}
}
return ch;
}

View File

@ -40,7 +40,7 @@ static inline int pfx(_dtoa)(
int w, int w,
void *ctx, void *ctx,
pfx(cbfn) cb, pfx(cbfn) cb,
double value, decfloat_t value,
int prec, int prec,
int width, int width,
int flags int flags
@ -49,7 +49,7 @@ static inline int pfx(_etoa)(
int w, int w,
void *ctx, void *ctx,
pfx(cbfn) cb, pfx(cbfn) cb,
double value, decfloat_t value,
int prec, int prec,
int width, int width,
int flags int flags
@ -66,6 +66,23 @@ static inline int pfx(_infnantoa)(
#define out(ch) do { if((w++, !cb(ctx, (ch)))) return -1; } while(0) #define out(ch) do { if((w++, !cb(ctx, (ch)))) return -1; } while(0)
#define print_lwidth_spaces(flags, pad) \
do if(!(flags & FLAG_LEFT) && !(flags & FLAG_ZERO)) while(pad-- > 0) { \
out(' '); \
} \
while(0)
#define print_lwidth_zeros(flags, pad) \
do if(!(flags & FLAG_LEFT) && (flags & FLAG_ZERO)) while(pad-- > 0) { \
out('0'); \
} \
while(0)
#define print_rwidth(flags, pad) \
if(flags & FLAG_LEFT) while(pad-- > 0) { \
out(' '); \
}
// Generic print // Generic print
static int pfx(vprintfcb)( static int pfx(vprintfcb)(
void *ctx, void *ctx,
@ -245,43 +262,39 @@ static int pfx(vprintfcb)(
char conv = tolower(*fmt); char conv = tolower(*fmt);
++fmt; ++fmt;
double value = va_arg(va, double); double value = va_arg(va, double);
int class = fpclassify(value);
if(class == FP_INFINITE || class == FP_NAN) {
return pfx(_infnantoa)(w, ctx, cb, value, prec, width, flags);
}
decfloat_t dec_value = f64_to_decimal(value);
if(conv == 'f') { if(conv == 'f') {
w = pfx(_dtoa)(w, ctx, cb, value, prec, width, flags); w = pfx(_dtoa)(w, ctx, cb, dec_value, prec, width, flags);
} }
else if(conv == 'e') { else if(conv == 'e') {
w = pfx(_etoa)(w, ctx, cb, value, prec, width, flags); w = pfx(_etoa)(w, ctx, cb, dec_value, prec, width, flags);
} }
else { else {
int P = prec; int P = prec;
if(!(flags & FLAG_PREC)) P = 6; if(!(flags & FLAG_PREC)) P = 6;
if(prec == 0) P = 1; if(prec == 0) P = 1;
int class = fpclassify(value);
int64_t E; int64_t E;
{ {
union {
uint64_t bits;
double value;
} _ = {.value = value};
uint64_t bits = _.bits;
// uint64_t neg = bits >> 63;
int64_t e2 = (int64_t)((bits >> 52) & ((1<<11)-1));
int64_t m2 = bits & ((UINT64_C(1)<<52)-1);
if(class == FP_ZERO) { if(class == FP_ZERO) {
E = 0; E = 0;
} }
else { else {
decfloat_t f = dtodecfloat(m2, e2); E = dec_value.exponent;
E = f.exponent;
} }
} }
if(class == FP_SUBNORMAL || class == FP_ZERO) { if(class == FP_SUBNORMAL || class == FP_ZERO) {
E = 0; E = 0;
} }
flags |= FLAG_PREC;
if(P > E && E >= -4) { if(P > E && E >= -4) {
w = pfx(_dtoa)(w, ctx, cb, value, P-(E+1), width, flags); w = pfx(_dtoa)(w, ctx, cb, dec_value, P-(E+1), width, flags);
} }
else { else {
w = pfx(_etoa)(w, ctx, cb, value, P-1, width, flags); w = pfx(_etoa)(w, ctx, cb, dec_value, P-1, width, flags);
} }
} }
if(w < 0) return -1; if(w < 0) return -1;
@ -493,12 +506,9 @@ static inline int pfx(_infnantoa)(
if(isnan(value)) name = "nan", nchars = 3; if(isnan(value)) name = "nan", nchars = 3;
// Figure out prefix // Figure out prefix
int pref_len = 0; int pref_len = 0;
union { u64 bits = F64_BITS(value);
uint64_t bits; u64 mant = F64_MANT(bits);
double value; int neg = F64_SIGN(bits) & (mant == 0);
} _ = {value};
uint64_t bits = _.bits;
uint64_t neg = bits >> 63;
if(neg) pref_len = 1; if(neg) pref_len = 1;
else if(flags & FLAG_PLUS) pref_len = 1; else if(flags & FLAG_PLUS) pref_len = 1;
else if(flags & FLAG_SPACE) pref_len = 1; else if(flags & FLAG_SPACE) pref_len = 1;
@ -537,133 +547,123 @@ static inline int pfx(_dtoh)(
int width, int width,
int flags int flags
) { ) {
int class = fpclassify(value); // Filter out inifnities and NaN
if(class == FP_INFINITE || class == FP_NAN) {
return pfx(_infnantoa)(w, ctx, cb, value, prec, width, flags);
}
// Disassemble the float into parts
union {
uint64_t bits;
double value;
} _ = {.value = value};
uint64_t bits = _.bits;
// uint64_t neg = bits >> 63;
int64_t exp = (int64_t)((bits >> 52) & 0x7ff) - 1023;
int64_t mant = bits & ((UINT64_C(1)<<51)-1);
if(class == FP_SUBNORMAL || class == FP_ZERO) {
exp = 0;
}
// Figure out how many hex digits does mantissa take up (mant_digits_n)
// and the number of digits after decimal point (prec)
int mant_digits_n;
int nonsig_digits_n = 0;
if(mant != 0) {
int64_t m = mant;
while((m & 0xf) == 0) {
++nonsig_digits_n;
m >>= 4;
}
}
else {
nonsig_digits_n = 52/4;
}
mant_digits_n = 52/4 - nonsig_digits_n;
if((flags & FLAG_PREC) == 0) {
prec = mant_digits_n;
}
// Figure out how many symbols decimal point takes up (0 or 1)
int decimal_point_n = 1;
if(prec == 0) {
decimal_point_n = 0;
if(flags & FLAG_HASH) {
decimal_point_n = 1;
}
}
// Figure out how many digits exponent take up and it's sign
// also save exponent digits to a buffer starting from LSD
static ctype exp_buf[64] = {0};
int exp_digits_n = 0;
ctype exp_sign;
if(exp < 0) {
exp_sign = '-';
exp = -exp;
}
else {
exp_sign = '+';
}
{ {
int64_t e = exp; int class = fpclassify(value);
if(class == FP_INFINITE || class == FP_NAN) {
return pfx(_infnantoa)(w, ctx, cb, value, prec, width, flags);
}
}
// Deconstruct the float
u64 bits = F64_BITS(value);
u64 flt_sgn = F64_SIGN(bits);
u64 flt_mnt = F64_MANT(bits);
i64 flt_exp = F64_BEXP(bits);
u64 frc;
u64 mnt;
i64 exp;
{
mnt = flt_mnt;
exp = flt_exp - 0x3ff;
if(flt_exp == 0 && flt_mnt == 0) {
exp = 0;
}
}
// Calculate widths of different number parts and extract some data
static ctype frc_buffer[64] = {0};
static ctype exp_buffer[64] = {0};
ctype *frc_buf = frc_buffer + sizeof frc_buffer;
ctype *exp_buf = exp_buffer + sizeof exp_buffer;
ctype sgn_ch;
ctype exp_sgn_ch;
int has_sgn;
int has_point;
int nfrc = 0;
int nexp = 0;
int nfrcp = 0;
{
// Sign
if(flt_sgn) {
has_sgn = 1;
sgn_ch = '-';
}
else if(flags & FLAG_PLUS) {
has_sgn = 1;
sgn_ch = '+';
}
else if(flags & FLAG_SPACE) {
has_sgn = 1;
sgn_ch = ' ';
}
// Fractional part
frc = mnt;
if(frc != 0) {
while(!(frc & 0xf)) {
frc >>= 4;
}
}
u64 f = frc;
do { do {
exp_buf[exp_digits_n++] = e % 10 + '0'; int d = f % 16;
f /= 16;
*--frc_buf = fromdigit(d, flags & FLAG_UPPER);
} while(f != 0);
// Frac/whole digits
if(flags & FLAG_PREC) {
nfrcp = prec;
}
else {
nfrcp = nfrc;
}
// Decimal point
has_point = 1;
if(nfrc == 0 && !(flags & FLAG_HASH)) {
has_point = 0;
}
// Exponent
i64 e = exp;
if(e < 0) {
exp_sgn_ch = '-';
e = -e;
}
else {
exp_sgn_ch = '+';
}
do {
*--exp_buf = (e%10) + '0';
e /= 10; e /= 10;
nexp ++;
} while(e != 0); } while(e != 0);
} }
// Figure out sign symbol and number of chars it takes up (0 or 1) // Figure out the width of a float
int sign_n = 0; int nfloat = has_sgn + 2 + 1 + has_point + nfrcp + 1 + 1 + nexp;
ctype sign_ch; int pad = MIN(0, width - nfloat);
if(value < 0) { print_lwidth_spaces(flags, pad);
sign_n = 1; if(has_sgn) {
sign_ch = '-'; out(sgn_ch);
} }
else if(flags & FLAG_PLUS) {
sign_n = 1;
sign_ch = '+';
}
else if(flags & FLAG_SPACE) {
sign_n = 1;
sign_ch = ' ';
}
// Figure out the width of the number
int significand_width = 1 + decimal_point_n + prec;
int exp_part_width = 2 /*p+-*/ + exp_digits_n;
int n_width = sign_n + 2 /*0x*/ + significand_width + exp_part_width;
// Figure out width-padding required
int pad = 0;
if(width > n_width) pad = width - n_width;
// Print width left-pad if it's made out of space
if(!(flags & FLAG_LEFT) && !(flags & FLAG_ZERO)) while(pad-- > 0) {
out(' ');
}
// Print sign if there
if(sign_n)
out(sign_ch);
// Print 0x, 0X
out('0'); out('0');
out((flags & FLAG_UPPER)? 'X' : 'x'); out((flags & FLAG_UPPER)? 'X' : 'x');
// Print width left-pad if it's made out of zero print_lwidth_zeros(flags, pad);
if(!(flags & FLAG_LEFT) && (flags & FLAG_ZERO)) while(pad-- > 0) { out((flt_exp == 0) ? '0' : '1');
out('0'); if(has_point) {
out('.');
} }
// Print whole part int i;
out((class == FP_SUBNORMAL || class == FP_ZERO)? '0' : '1'); for(i = 0; i != nfrcp; ++i) {
// Print decimal point if(i < nfrc) {
if(decimal_point_n) out('.'); out(frc_buf[i]);
int digs = mant_digits_n < prec? mant_digits_n : prec; }
for(int i = 0; i != digs; ++i) { else {
int bit = 52 - 4 - i; out('0');
int digit = (int)((mant >> bit) & 0xf); }
ctype ch;
if(digit < 10)
ch = digit+'0';
else if(flags & FLAG_UPPER)
ch = digit+'A'-10;
else ch = digit+'a'-10;
out(ch);
} }
// Print precision padding
for(int i = mant_digits_n; i < prec; ++i) {
out('0');
}
// Print the exponent part
out((flags & FLAG_UPPER)? 'P' : 'p'); out((flags & FLAG_UPPER)? 'P' : 'p');
out(exp_sign); out(exp_sgn_ch);
if(exp_digits_n) do for(int i = 0; i != nexp; ++i) {
out(exp_buf[exp_digits_n]); out(exp_buf[i]);
while(exp_digits_n--);
// Print right-pad
if(flags & FLAG_LEFT) while(pad-- > 0) {
out(' ');
} }
print_rwidth(flags, pad);
return w; return w;
} }
@ -671,37 +671,13 @@ static inline int pfx(_dtoa)(
int w, int w,
void *ctx, void *ctx,
pfx(cbfn) cb, pfx(cbfn) cb,
double value, decfloat_t value,
int prec, int prec,
int width, int width,
int flags int flags
) { ) {
int class = fpclassify(value); int64_t exp = value.exponent;
if(class == FP_INFINITE || class == FP_NAN) { int64_t mant = value.mantissa;
return pfx(_infnantoa)(w, ctx, cb, value, prec, width, flags);
}
// Disassemble the float into parts
int64_t exp;
int64_t mant;
{
union {
uint64_t bits;
double value;
} _ = {.value = value};
uint64_t bits = _.bits;
//uint64_t neg = bits >> 63;
int64_t e2 = (int64_t)((bits >> 52) & ((1<<11)-1));
int64_t m2 = bits & ((UINT64_C(1)<<52)-1);
if(class == FP_ZERO) {
mant = 0;
exp = 0;
}
else {
decfloat_t f = dtodecfloat(m2, e2);
mant = f.mantissa;
exp = f.exponent;
}
}
// Figure out how many digits does mantissa take up (mant_digits_n) // Figure out how many digits does mantissa take up (mant_digits_n)
// and the number of digits after decimal point (prec) // and the number of digits after decimal point (prec)
// and the number of digits before decimal point (while_digits_n) // and the number of digits before decimal point (while_digits_n)
@ -731,7 +707,7 @@ static inline int pfx(_dtoa)(
// Figure out sign symbol and number of chars it takes up (0 or 1) // Figure out sign symbol and number of chars it takes up (0 or 1)
int sign_n = 0; int sign_n = 0;
ctype sign_ch; ctype sign_ch;
if(value < 0) { if(value.sign) {
sign_n = 1; sign_n = 1;
sign_ch = '-'; sign_ch = '-';
} }
@ -797,36 +773,13 @@ static inline int pfx(_etoa)(
int w, int w,
void *ctx, void *ctx,
pfx(cbfn) cb, pfx(cbfn) cb,
double value, decfloat_t value,
int prec, int prec,
int width, int width,
int flags int flags
) { ) {
int class = fpclassify(value); int64_t exp = value.exponent;
if(class == FP_INFINITE || class == FP_NAN) { int64_t mant = value.mantissa;
return pfx(_infnantoa)(w, ctx, cb, value, prec, width, flags);
}
// Disassemble the float into parts
int64_t exp;
int64_t mant;
{
union {
uint64_t bits;
double value;
} _ = {.value = value};
uint64_t bits = _.bits;
int64_t e2 = (int64_t)((bits >> 52) & ((1<<11)-1));
int64_t m2 = bits & ((UINT64_C(1)<<52)-1);
if(class == FP_ZERO) {
mant = 0;
exp = 0;
}
else {
decfloat_t f = dtodecfloat(m2, e2);
mant = f.mantissa;
exp = f.exponent;
}
}
// Figure out how many digits does mantissa take up (mant_digits_n) // Figure out how many digits does mantissa take up (mant_digits_n)
// and the number of digits after decimal point (prec) // and the number of digits after decimal point (prec)
static ctype mant_buf[64] = {0}; static ctype mant_buf[64] = {0};
@ -874,7 +827,7 @@ static inline int pfx(_etoa)(
// Figure out sign symbol and number of chars it takes up (0 or 1) // Figure out sign symbol and number of chars it takes up (0 or 1)
int sign_n = 0; int sign_n = 0;
ctype sign_ch; ctype sign_ch;
if(value < 0) { if(value.sign) {
sign_n = 1; sign_n = 1;
sign_ch = '-'; sign_ch = '-';
} }

View File

@ -71,3 +71,6 @@ typedef wchar_t wchar;
#define F32_SIGN(bits) (bits >> (F32_MANT_BITS + F32_BEXP_BITS)) #define F32_SIGN(bits) (bits >> (F32_MANT_BITS + F32_BEXP_BITS))
#define F32_BEXP(bits) ((bits >> F32_MANT_BITS) & F32_BEXP_MASK) #define F32_BEXP(bits) ((bits >> F32_MANT_BITS) & F32_BEXP_MASK)
#define F32_MANT(bits) ((bits) & F32_MANT_MASK) #define F32_MANT(bits) ((bits) & F32_MANT_MASK)
#define MIN(a,b) ((a)<(b)?(a):(b))
#define MAX(a,b) ((a)>(b)?(a):(b))

View File

@ -1,6 +1,37 @@
#include <std.h> #include <std.h>
double cool_doubles[] = {
-NAN,
-INFINITY,
-DBL_MAX,
-2.7,
-2.5,
-2.3,
-1.0,
-DBL_TRUE_MIN,
-DBL_MIN,
-0.0,
+0.0,
DBL_MIN,
DBL_TRUE_MIN,
1.0,
2.3,
2.5,
2.7,
DBL_MAX,
INFINITY,
NAN,
};
size_t ncool_doubles = COUNTOF(cool_doubles);
#define for_each_cool_double(i,d,b) \
for(int i = 0; i != ncool_doubles; ++i) { \
double d = cool_doubles[i]; \
b; \
}
const char *show_classification(double x) { const char *show_classification(double x) {
switch(fpclassify(x)) { switch(fpclassify(x)) {
case FP_INFINITE: return "Inf"; case FP_INFINITE: return "Inf";
@ -13,112 +44,9 @@ const char *show_classification(double x) {
} }
int main() { int main() {
for(int i = 0; i != ncool_doubles; ++i) {
printf("\n=== fpclassify === \n"); double d = cool_doubles[i];
double zero = 0.0; // fucken msvc printf("%a\n", d);
printf("1.0/0.0 is %s\n", show_classification(1.0/zero)); }
printf("0.0/0.0 is %s\n", show_classification(0.0/zero));
printf("DBL_MIN/2 is %s\n", show_classification(DBL_MIN/2));
printf("-0.0 is %s\n", show_classification(-0.0));
printf("1.0 is %s\n", show_classification(1.0));
printf("\n\n=== nearbyint === \n");
double d;
printf("nearbyint(+0.0) = %f\n", d=nearbyint(+0.0));
printf("nearbyint(-0.0) = %f\n", d=nearbyint(-0.0));
printf("nearbyint(+16.4) = %f\n", d=nearbyint(+16.4));
printf("nearbyint(+16.5) = %f\n", d=nearbyint(+16.5));
printf("nearbyint(+16.8) = %f\n", d=nearbyint(+16.8));
printf("\n\n=== signbit === \n");
printf("signbit(+0.0) = %d\n", signbit(+0.0));
printf("signbit(-0.0) = %d\n", signbit(-0.0));
printf("\n\n=== copysign === \n");
printf("copysign(1.0,+2.0) = %f\n", copysign(1.0,+2.0));
printf("copysign(1.0,-2.0) = %f\n", copysign(1.0,-2.0));
printf("copysign(INFINITY,-2.0) = %f\n", copysign(INFINITY,-2.0));
printf("copysign(NAN,-2.0) = %f\n", copysign(NAN,-2.0));
printf("\n\n=== rint === \n");
fesetround(FE_TONEAREST);
printf("rounding to nearest (halfway cases to even):\n"
"rint(+2.3) = %f ", rint(2.3));
printf("rint(+2.5) = %f ", rint(2.5));
printf("rint(+3.5) = %f\n", rint(3.5));
printf("rint(-2.3) = %f ", rint(-2.3));
printf("rint(-2.5) = %f ", rint(-2.5));
printf("rint(-3.5) = %f\n", rint(-3.5));
fesetround(FE_DOWNWARD);
printf("rounding down: \nrint(+2.3) = %f ", rint(2.3));
printf("rint(+2.5) = %f ", rint(2.5));
printf("rint(+3.5) = %f\n", rint(3.5));
printf("rint(-2.3) = %f ", rint(-2.3));
printf("rint(-2.5) = %f ", rint(-2.5));
printf("rint(-3.5) = %f\n", rint(-3.5));
feclearexcept(FE_ALL_EXCEPT);
printf("rint(1.1) = %f\n", rint(1.1));
if(fetestexcept(FE_INEXACT)) printf(" FE_INEXACT was raised\n");
printf("\n\n=== nextafter === \n");
float from1 = 0, to1 = nextafterf(from1, 1);
printf("The next representable float after %f is %f\n", from1, to1);
float from2 = 1, to2 = nextafterf(from2, 2);
printf("The next representable float after %f is %f\n", from2, to2);
float from5 = 0.0, to5 = nextafter(from5, -0.0);
printf("nextafter(+0.0, -0.0) gives %f (%f)\n", to5, to5);
printf("\n\n=== ceil === \n");
printf("ceil(+2.4) = %f\n", ceil(2.4));
printf("ceil(-2.4) = %f\n", ceil(-2.4));
printf("ceil(-0.0) = %f\n", ceil(-0.0));
printf("ceil(-Inf) = %f\n", ceil(-INFINITY));
printf("\n\n=== floor === \n");
printf("floor(+2.7) = %f\n", floor(2.7));
printf("floor(-2.7) = %f\n", floor(-2.7));
printf("floor(-0.0) = %f\n", floor(-0.0));
printf("floor(-Inf) = %f\n", floor(-INFINITY));
printf("\n\n=== trunk === \n");
printf("trunc(+2.7) = %f\n", trunc(2.7));
printf("trunc(-2.7) = %f\n", trunc(-2.7));
printf("trunc(-0.0) = %f\n", trunc(-0.0));
printf("trunc(-Inf) = %f\n", trunc(-INFINITY));
printf("\n\n=== round === \n");
printf("round(+2.3) = %f ", round(2.3));
printf("round(+2.5) = %f ", round(2.5));
printf("round(+2.7) = %f\n", round(2.7));
printf("round(-2.3) = %f ", round(-2.3));
printf("round(-2.5) = %f ", round(-2.5));
printf("round(-2.7) = %f\n", round(-2.7));
printf("round(-0.0) = %f\n", round(-0.0));
printf("round(-Inf) = %f\n", round(-INFINITY));
printf("lround(+2.3) = %ld ", lround(2.3));
printf("lround(+2.5) = %ld ", lround(2.5));
printf("lround(+2.7) = %ld\n", lround(2.7));
printf("lround(-2.3) = %ld ", lround(-2.3));
printf("lround(-2.5) = %ld ", lround(-2.5));
printf("lround(-2.7) = %ld\n", lround(-2.7));
printf("lround(-0.0) = %ld\n", lround(-0.0));
printf("lround(-Inf) = %ld\n", lround(-INFINITY));
feclearexcept(FE_ALL_EXCEPT);
printf("lround(LONG_MAX+1.5) = %ld\n", lround(LONG_MAX+1.5));
if(fetestexcept(FE_INVALID)) printf(" FE_INVALID was raised\n");
printf("\n\n=== exp === \n");
printf("exp(1) = %e\n", exp(1));
printf("FV of $100, continuously compounded at 3%% for 1 year = %e\n",
100*exp(0.03));
// special values
printf("exp(-0) = %e\n", exp(-0.0));
printf("exp(-Inf) = %e\n", exp(-INFINITY));
//error handling
errno = 0; feclearexcept(FE_ALL_EXCEPT);
printf("exp(710) = %e\n", exp(710));
if(errno == ERANGE) printf(" errno == ERANGE\n");
if(fetestexcept(FE_OVERFLOW)) printf(" FE_OVERFLOW raised\n");
return 0; return 0;
} }