diff --git a/src/ciabatta.c b/src/ciabatta.c
index f4687ec..fd4f780 100644
--- a/src/ciabatta.c
+++ b/src/ciabatta.c
@@ -38,21 +38,19 @@
 
 #include "intrin.h"
 
-// Dependencies
-#include "util.c"
-#include "conv/decfloat/decfloat.c"
-
 // Platform-independent stuff
-#include "fmt/gen_fmt.c"
+#include "util.c"
 #include "math/basic.c"
 #include "math/division.c"
 #include "math/gen_math.c"
 #include "math/bits.c"
 #include "math/round.c"
+#include "conv/decfloat/decfloat.c"
 #include "conv/digits.c"
 #include "conv/strpfx.c"
 #include "conv/int.c"
 #include "conv/float.c"
+#include "fmt/gen_fmt.c"
 #include "stdlib/algorithm.c"
 #include "stdlib/multibyte.c"
 #include "stdlib/random.c"
diff --git a/src/conv/decfloat/decfloat.c b/src/conv/decfloat/decfloat.c
index 77cc336..bbaebd2 100644
--- a/src/conv/decfloat/decfloat.c
+++ b/src/conv/decfloat/decfloat.c
@@ -3,6 +3,7 @@
 
 typedef struct decfloat_t decfloat_t;
 struct decfloat_t {
+    u64 sign;
     u64 exponent;
     i64 mantissa;
 };
@@ -18,10 +19,10 @@ static const char DIGIT_TABLE[200] = {
     "75767778798081828384858687888990919293949596979899"
 };
 
-static inline uint32_t pow5Factor(uint64_t value) {
-  const uint64_t m_inv_5 = 14757395258967641293u; // 5 * m_inv_5 = 1 (mod 2^64)
-  const uint64_t n_div_5 = 3689348814741910323u;  // #{ n | n = 0 (mod 2^64) } = 2^64 / 5
-  uint32_t count = 0;
+static inline u32 pow5Factor(u64 value) {
+  const u64 m_inv_5 = 14757395258967641293u; // 5 * m_inv_5 = 1 (mod 2^64)
+  const u64 n_div_5 = 3689348814741910323u;  // #{ n | n = 0 (mod 2^64) } = 2^64 / 5
+  u32 count = 0;
   for (;;) {
       value *= m_inv_5;
       if (value > n_div_5)
@@ -32,38 +33,38 @@ static inline uint32_t pow5Factor(uint64_t value) {
 }
 
 // Returns true if value is divisible by 5^p.
-static inline bool multipleOfPowerOf5(const uint64_t value, const uint32_t p) {
+static inline bool multipleOfPowerOf5(const u64 value, const u32 p) {
     // I tried a case distinction on p, but there was no performance difference.
     return pow5Factor(value) >= p;
 }
 
 // Returns true if value is divisible by 2^p.
-static inline bool multipleOfPowerOf2(const uint64_t value, const uint32_t p) {
+static inline bool multipleOfPowerOf2(const u64 value, const u32 p) {
     // __builtin_ctzll doesn't appear to be faster here.
     return (value & ((1ull << p) - 1)) == 0;
 }
 
-static inline uint64_t umul128(const uint64_t a, const uint64_t b, uint64_t* const productHi) {
+static inline u64 umul128(const u64 a, const u64 b, u64* const productHi) {
     return _umul128(a, b, productHi);
 }
 
-static inline uint64_t umulh(const uint64_t a, const uint64_t b) {
-    uint64_t hi;
+static inline u64 umulh(const u64 a, const u64 b) {
+    u64 hi;
     umul128(a, b, &hi);
     return hi;
 }
 
-static inline uint64_t shiftright128(const uint64_t lo, const uint64_t hi, const uint32_t dist) {
+static inline u64 shiftright128(const u64 lo, const u64 hi, const u32 dist) {
     return __shiftright128(lo, hi, (unsigned char) dist);
 }
 
-static inline uint64_t mulShift64(const uint64_t m, const uint64_t* const mul, const int32_t j) {
+static inline u64 mulShift64(const u64 m, const u64* const mul, const int32_t j) {
                 // m is maximum 55 bits
-    uint64_t high1;                                   // 128
-    const uint64_t low1 = umul128(m, mul[1], &high1); // 64
-    uint64_t high0;                                   // 64
+    u64 high1;                                   // 128
+    const u64 low1 = umul128(m, mul[1], &high1); // 64
+    u64 high0;                                   // 64
     umul128(m, mul[0], &high0);                       // 0
-    const uint64_t sum = high0 + low1;
+    const u64 sum = high0 + low1;
     if (sum < high0) {
         ++high1; // overflow into high1
     }
@@ -71,23 +72,23 @@ static inline uint64_t mulShift64(const uint64_t m, const uint64_t* const mul, c
 }
 
 
-static inline uint64_t mulShiftAll64(const uint64_t m, const uint64_t* const mul, const int32_t j,
-    uint64_t* const vp, uint64_t* const vm, const uint32_t mmShift) {
+static inline u64 mulShiftAll64(const u64 m, const u64* const mul, const int32_t j,
+    u64* const vp, u64* const vm, const u32 mmShift) {
     *vp = mulShift64(4 * m + 2, mul, j);
     *vm = mulShift64(4 * m - 1 - mmShift, mul, j);
     return mulShift64(4 * m, mul, j);
 }
 
 // Returns floor(log_10(2^e)); requires 0 <= e <= 1650.
-static inline uint32_t log10Pow2(const int32_t e) {
+static inline u32 log10Pow2(const int32_t e) {
     // The first value this approximation fails for is 2^1651 which is just greater than 10^297.
-    return (((uint32_t) e) * 78913) >> 18;
+    return (((u32) e) * 78913) >> 18;
 }
 
 // Returns floor(log_10(5^e)); requires 0 <= e <= 2620.
-static inline uint32_t log10Pow5(const int32_t e) {
+static inline u32 log10Pow5(const int32_t e) {
     // The first value this approximation fails for is 5^2621 which is just greater than 10^1832.
-    return (((uint32_t) e) * 732923) >> 20;
+    return (((u32) e) * 732923) >> 20;
 }
 
 // Returns e == 0 ? 1 : ceil(log_2(5^e)); requires 0 <= e <= 3528.
@@ -95,10 +96,10 @@ static inline int32_t pow5bits(const int32_t e) {
     // This approximation works up to the point that the multiplication overflows at e = 3529.
     // If the multiplication were done in 64 bits, it would fail at 5^4004 which is just greater
     // than 2^9297.
-    return (int32_t) (((((uint32_t) e) * 1217359) >> 19) + 1);
+    return (int32_t) (((((u32) e) * 1217359) >> 19) + 1);
 }
 
-static inline uint32_t decimalLength17(const uint64_t v) {
+static inline u32 decimalLength17(const u64 v) {
     // This is slightly faster than a loop.
     // The average output length is 16.38 digits, so we check high-to-low.
     // Function precondition: v is not an 18, 19, or 20-digit number.
@@ -122,25 +123,25 @@ static inline uint32_t decimalLength17(const uint64_t v) {
     return 1;
 }
 
-static inline uint64_t div5(const uint64_t x) {
+static inline u64 div5(const u64 x) {
     return umulh(x, 0xCCCCCCCCCCCCCCCDu) >> 2;
 }
 
-static inline uint64_t div10(const uint64_t x) {
+static inline u64 div10(const u64 x) {
     return umulh(x, 0xCCCCCCCCCCCCCCCDu) >> 3;
 }
 
-static inline uint64_t div100(const uint64_t x) {
+static inline u64 div100(const u64 x) {
     return umulh(x >> 2, 0x28F5C28F5C28F5C3u) >> 2;
 }
 
-static inline uint64_t div1e8(const uint64_t x) {
+static inline u64 div1e8(const u64 x) {
     return umulh(x, 0xABCC77118461CEFDu) >> 26;
 }
 
-static decfloat_t dtodecfloat(const uint64_t ieeeMantissa, const uint32_t ieeeExponent) {
+static decfloat_t ieee_to_decimal(u64 sign, u64 ieeeMantissa, u32 ieeeExponent) {
     int32_t e2;
-    uint64_t m2;
+    u64 m2;
     if (ieeeExponent == 0) {
                 // We subtract 2 so that the bounds computation has 2 additional bits.
         e2 = 1 - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2;
@@ -153,22 +154,22 @@ static decfloat_t dtodecfloat(const uint64_t ieeeMantissa, const uint32_t ieeeEx
     const bool acceptBounds = even;
 
     // Step 2: Determine the interval of valid decimal representations.
-    const uint64_t mv = 4 * m2;
+    const u64 mv = 4 * m2;
     // Implicit bool -> int conversion. True is 1, false is 0.
-    const uint32_t mmShift = ieeeMantissa != 0 || ieeeExponent <= 1;
+    const u32 mmShift = ieeeMantissa != 0 || ieeeExponent <= 1;
     // We would compute mp and mm like this:
-    // uint64_t mp = 4 * m2 + 2;
-    // uint64_t mm = mv - 1 - mmShift;
+    // u64 mp = 4 * m2 + 2;
+    // u64 mm = mv - 1 - mmShift;
 
     // Step 3: Convert to a decimal power base using 128-bit arithmetic.
-    uint64_t vr, vp, vm;
+    u64 vr, vp, vm;
     int32_t e10;
     bool vmIsTrailingZeros = false;
     bool vrIsTrailingZeros = false;
     if (e2 >= 0) {
         // I tried special-casing q == 0, but there was no effect on performance.
         // This expression is slightly faster than max(0, log10Pow2(e2) - 1).
-        const uint32_t q = log10Pow2(e2) - (e2 > 3);
+        const u32 q = log10Pow2(e2) - (e2 > 3);
         e10 = (int32_t) q;
         const int32_t k = DOUBLE_POW5_INV_BITCOUNT + pow5bits((int32_t) q) - 1;
         const int32_t i = -e2 + (int32_t) q + k;
@@ -177,7 +178,7 @@ static decfloat_t dtodecfloat(const uint64_t ieeeMantissa, const uint32_t ieeeEx
             // This should use q <= 22, but I think 21 is also safe. Smaller values
             // may still be safe, but it's more difficult to reason about them.
             // Only one of mp, mv, and mm can be a multiple of 5, if any.
-            const uint32_t mvMod5 = ((uint32_t) mv) - 5 * ((uint32_t) div5(mv));
+            const u32 mvMod5 = ((u32) mv) - 5 * ((u32) div5(mv));
             if (mvMod5 == 0) {
                 vrIsTrailingZeros = multipleOfPowerOf5(mv, q);
             } else if (acceptBounds) {
@@ -192,7 +193,7 @@ static decfloat_t dtodecfloat(const uint64_t ieeeMantissa, const uint32_t ieeeEx
         }
     } else {
         // This expression is slightly faster than max(0, log10Pow5(-e2) - 1).
-        const uint32_t q = log10Pow5(-e2) - (-e2 > 1);
+        const u32 q = log10Pow5(-e2) - (-e2 > 1);
         e10 = (int32_t) q + e2;
         const int32_t i = -e2 - (int32_t) q;
         const int32_t k = pow5bits(i) - DOUBLE_POW5_BITCOUNT;
@@ -220,19 +221,19 @@ static decfloat_t dtodecfloat(const uint64_t ieeeMantissa, const uint32_t ieeeEx
     // Step 4: Find the shortest decimal representation in the interval of valid representations.
     int32_t removed = 0;
     uint8_t lastRemovedDigit = 0;
-    uint64_t output;
+    u64 output;
     // On average, we remove ~2 digits.
     if (vmIsTrailingZeros || vrIsTrailingZeros) {
         // General case, which happens rarely (~0.7%).
         for (;;) {
-            const uint64_t vpDiv10 = div10(vp);
-            const uint64_t vmDiv10 = div10(vm);
+            const u64 vpDiv10 = div10(vp);
+            const u64 vmDiv10 = div10(vm);
             if (vpDiv10 <= vmDiv10) {
                 break;
             }
-            const uint32_t vmMod10 = ((uint32_t) vm) - 10 * ((uint32_t) vmDiv10);
-            const uint64_t vrDiv10 = div10(vr);
-            const uint32_t vrMod10 = ((uint32_t) vr) - 10 * ((uint32_t) vrDiv10);
+            const u32 vmMod10 = ((u32) vm) - 10 * ((u32) vmDiv10);
+            const u64 vrDiv10 = div10(vr);
+            const u32 vrMod10 = ((u32) vr) - 10 * ((u32) vrDiv10);
             vmIsTrailingZeros &= vmMod10 == 0;
             vrIsTrailingZeros &= lastRemovedDigit == 0;
             lastRemovedDigit = (uint8_t) vrMod10;
@@ -243,14 +244,14 @@ static decfloat_t dtodecfloat(const uint64_t ieeeMantissa, const uint32_t ieeeEx
         }
         if (vmIsTrailingZeros) {
             for (;;) {
-                const uint64_t vmDiv10 = div10(vm);
-                const uint32_t vmMod10 = ((uint32_t) vm) - 10 * ((uint32_t) vmDiv10);
+                const u64 vmDiv10 = div10(vm);
+                const u32 vmMod10 = ((u32) vm) - 10 * ((u32) vmDiv10);
                 if (vmMod10 != 0) {
                     break;
                 }
-                const uint64_t vpDiv10 = div10(vp);
-                const uint64_t vrDiv10 = div10(vr);
-                const uint32_t vrMod10 = ((uint32_t) vr) - 10 * ((uint32_t) vrDiv10);
+                const u64 vpDiv10 = div10(vp);
+                const u64 vrDiv10 = div10(vr);
+                const u32 vrMod10 = ((u32) vr) - 10 * ((u32) vrDiv10);
                 vrIsTrailingZeros &= lastRemovedDigit == 0;
                 lastRemovedDigit = (uint8_t) vrMod10;
                 vr = vrDiv10;
@@ -268,11 +269,11 @@ static decfloat_t dtodecfloat(const uint64_t ieeeMantissa, const uint32_t ieeeEx
     } else {
         // Specialized for the common case (~99.3%). Percentages below are relative to this.
         bool roundUp = false;
-        const uint64_t vpDiv100 = div100(vp);
-        const uint64_t vmDiv100 = div100(vm);
+        const u64 vpDiv100 = div100(vp);
+        const u64 vmDiv100 = div100(vm);
         if (vpDiv100 > vmDiv100) { // Optimization: remove two digits at a time (~86.2%).
-            const uint64_t vrDiv100 = div100(vr);
-            const uint32_t vrMod100 = ((uint32_t) vr) - 100 * ((uint32_t) vrDiv100);
+            const u64 vrDiv100 = div100(vr);
+            const u32 vrMod100 = ((u32) vr) - 100 * ((u32) vrDiv100);
             roundUp = vrMod100 >= 50;
             vr = vrDiv100;
             vp = vpDiv100;
@@ -284,13 +285,13 @@ static decfloat_t dtodecfloat(const uint64_t ieeeMantissa, const uint32_t ieeeEx
         // Loop iterations below (approximately), with optimization above:
         // 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%
         for (;;) {
-            const uint64_t vpDiv10 = div10(vp);
-            const uint64_t vmDiv10 = div10(vm);
+            const u64 vpDiv10 = div10(vp);
+            const u64 vmDiv10 = div10(vm);
             if (vpDiv10 <= vmDiv10) {
                 break;
             }
-            const uint64_t vrDiv10 = div10(vr);
-            const uint32_t vrMod10 = ((uint32_t) vr) - 10 * ((uint32_t) vrDiv10);
+            const u64 vrDiv10 = div10(vr);
+            const u32 vrMod10 = ((u32) vr) - 10 * ((u32) vrDiv10);
             roundUp = vrMod10 >= 5;
             vr = vrDiv10;
             vp = vpDiv10;
@@ -303,7 +304,16 @@ static decfloat_t dtodecfloat(const uint64_t ieeeMantissa, const uint32_t ieeeEx
     const int32_t exp = e10 + removed;
 
     decfloat_t fd;
-    fd.exponent = exp;
+    fd.sign = sign;
+    fd.exponent = output? exp : 0;
     fd.mantissa = output;
     return fd;
 }
+
+static decfloat_t f64_to_decimal(double d) {
+    u64 bits = F64_BITS(d);
+    u64 ieee_exp = F64_BEXP(bits);
+    u64 ieee_mant = F64_MANT(bits);
+    u64 sign = bits >> 63;
+    return ieee_to_decimal(sign, ieee_mant, ieee_exp);
+}
diff --git a/src/conv/digits.c b/src/conv/digits.c
index dc4a8aa..bf75aa0 100644
--- a/src/conv/digits.c
+++ b/src/conv/digits.c
@@ -30,3 +30,19 @@ static inline intl todigit(int c) {
     return val;
 }
 
+static inline int fromdigit(int digit, int upper) {
+    int ch;
+    if(digit < 10) {
+        ch = digit+'0';
+    }
+    else {
+        if(upper) {
+            ch = digit-10+'A';
+        }
+        else {
+            ch = digit-10+'a';
+        }
+    }
+    return ch;
+}
+
diff --git a/src/fmt/gen_fmt.h b/src/fmt/gen_fmt.h
index d47edc4..d58723d 100644
--- a/src/fmt/gen_fmt.h
+++ b/src/fmt/gen_fmt.h
@@ -40,7 +40,7 @@ static inline int pfx(_dtoa)(
     int w,
     void *ctx,
     pfx(cbfn) cb,
-    double value,
+    decfloat_t value,
     int prec,
     int width,
     int flags
@@ -49,7 +49,7 @@ static inline int pfx(_etoa)(
     int w,
     void *ctx,
     pfx(cbfn) cb,
-    double value,
+    decfloat_t value,
     int prec,
     int width,
     int flags
@@ -66,6 +66,23 @@ static inline int pfx(_infnantoa)(
 
 #define out(ch) do { if((w++, !cb(ctx, (ch)))) return -1; } while(0)
 
+#define print_lwidth_spaces(flags, pad)                                    \
+    do if(!(flags & FLAG_LEFT) && !(flags & FLAG_ZERO)) while(pad-- > 0) { \
+         out(' ');                                                         \
+    }                                                                      \
+    while(0)
+
+#define print_lwidth_zeros(flags, pad)                                     \
+    do if(!(flags & FLAG_LEFT) &&  (flags & FLAG_ZERO)) while(pad-- > 0) { \
+         out('0');                                                         \
+    }                                                                      \
+    while(0)
+
+#define print_rwidth(flags, pad)                                           \
+    if(flags & FLAG_LEFT) while(pad-- > 0) {                               \
+        out(' ');                                                          \
+    }
+
 // Generic print
 static int pfx(vprintfcb)(
     void *ctx,
@@ -245,43 +262,39 @@ static int pfx(vprintfcb)(
             char conv = tolower(*fmt);
             ++fmt;
             double value = va_arg(va, double);
+            int class = fpclassify(value);
+            if(class == FP_INFINITE || class == FP_NAN) {
+                return pfx(_infnantoa)(w, ctx, cb, value, prec, width, flags);
+            }
+            decfloat_t dec_value = f64_to_decimal(value);
             if(conv == 'f') {
-                w = pfx(_dtoa)(w, ctx, cb, value, prec, width, flags);
+                w = pfx(_dtoa)(w, ctx, cb, dec_value, prec, width, flags);
             }
             else if(conv == 'e') {
-                w = pfx(_etoa)(w, ctx, cb, value, prec, width, flags);
+                w = pfx(_etoa)(w, ctx, cb, dec_value, prec, width, flags);
             }
             else {
                 int P = prec;
                 if(!(flags & FLAG_PREC)) P = 6;
                 if(prec == 0) P = 1;
-                int class = fpclassify(value);
                 int64_t E;
                 {
-                    union {
-                        uint64_t bits;
-                        double value;
-                    } _ = {.value = value};
-                    uint64_t bits = _.bits;
-                    // uint64_t neg = bits >> 63;
-                    int64_t e2 = (int64_t)((bits >> 52) & ((1<<11)-1));
-                    int64_t m2 = bits & ((UINT64_C(1)<<52)-1);
                     if(class == FP_ZERO) {
                         E = 0;
                     }
                     else {
-                        decfloat_t f = dtodecfloat(m2, e2);
-                        E = f.exponent;
+                        E = dec_value.exponent;
                     }
                 }
                 if(class == FP_SUBNORMAL || class == FP_ZERO) {
                     E = 0;
                 }
+                flags |= FLAG_PREC;
                 if(P > E && E >= -4) {
-                    w = pfx(_dtoa)(w, ctx, cb, value, P-(E+1), width, flags);
+                    w = pfx(_dtoa)(w, ctx, cb, dec_value, P-(E+1), width, flags);
                 }
                 else {
-                    w = pfx(_etoa)(w, ctx, cb, value, P-1, width, flags);
+                    w = pfx(_etoa)(w, ctx, cb, dec_value, P-1, width, flags);
                 }
             }
             if(w < 0) return -1;
@@ -493,12 +506,9 @@ static inline int pfx(_infnantoa)(
     if(isnan(value)) name = "nan", nchars = 3;
     // Figure out prefix
     int pref_len = 0;
-    union {
-        uint64_t bits;
-        double value;
-    } _ = {value};
-    uint64_t bits = _.bits;
-    uint64_t neg = bits >> 63;
+    u64 bits = F64_BITS(value);
+    u64 mant = F64_MANT(bits);
+    int neg = F64_SIGN(bits) & (mant == 0);
     if(neg) pref_len = 1;
     else if(flags & FLAG_PLUS) pref_len = 1;
     else if(flags & FLAG_SPACE) pref_len = 1;
@@ -537,133 +547,123 @@ static inline int pfx(_dtoh)(
     int width,
     int flags
 ) {
-    int class = fpclassify(value);
-    if(class == FP_INFINITE || class == FP_NAN) {
-        return pfx(_infnantoa)(w, ctx, cb, value, prec, width, flags);
-    }
-    // Disassemble the float into parts
-    union {
-        uint64_t bits;
-        double value;
-    } _ = {.value = value};
-    uint64_t bits = _.bits;
-    // uint64_t neg = bits >> 63;
-    int64_t exp = (int64_t)((bits >> 52) & 0x7ff) - 1023;
-    int64_t mant = bits & ((UINT64_C(1)<<51)-1);
-    if(class == FP_SUBNORMAL || class == FP_ZERO) {
-        exp = 0;
-    }
-    // Figure out how many hex digits does mantissa take up (mant_digits_n)
-    // and the number of digits after decimal point (prec)
-    int mant_digits_n;
-    int nonsig_digits_n = 0;
-    if(mant != 0) {
-        int64_t m = mant;
-        while((m & 0xf) == 0) {
-            ++nonsig_digits_n;
-            m >>= 4;
-        }
-    }
-    else {
-        nonsig_digits_n = 52/4;
-    }
-    mant_digits_n = 52/4 - nonsig_digits_n;
-    if((flags & FLAG_PREC) == 0) {
-        prec = mant_digits_n;
-    }
-    // Figure out how many symbols decimal point takes up (0 or 1)
-    int decimal_point_n = 1;
-    if(prec == 0) {
-        decimal_point_n = 0;
-        if(flags & FLAG_HASH) {
-            decimal_point_n = 1;
-        }
-    }
-    // Figure out how many digits exponent take up and it's sign
-    // also save exponent digits to a buffer starting from LSD
-    static ctype exp_buf[64] = {0};
-    int exp_digits_n = 0;
-    ctype exp_sign;
-    if(exp < 0) {
-        exp_sign = '-';
-        exp = -exp;
-    }
-    else {
-        exp_sign = '+';
-    }
+    // Filter out inifnities and NaN
     {
-        int64_t e = exp;
+        int class = fpclassify(value);
+        if(class == FP_INFINITE || class == FP_NAN) {
+            return pfx(_infnantoa)(w, ctx, cb, value, prec, width, flags);
+        }
+    }
+    // Deconstruct the float
+    u64 bits = F64_BITS(value);
+    u64 flt_sgn = F64_SIGN(bits);
+    u64 flt_mnt = F64_MANT(bits);
+    i64 flt_exp = F64_BEXP(bits);
+    u64 frc;
+    u64 mnt;
+    i64 exp;
+    {
+        mnt = flt_mnt;
+        exp = flt_exp - 0x3ff;
+        if(flt_exp == 0 && flt_mnt == 0) {
+            exp = 0;
+        }
+    }
+    // Calculate widths of different number parts and extract some data
+    static ctype frc_buffer[64] = {0};
+    static ctype exp_buffer[64] = {0};
+    ctype *frc_buf = frc_buffer + sizeof frc_buffer;
+    ctype *exp_buf = exp_buffer + sizeof exp_buffer;
+    ctype sgn_ch;
+    ctype exp_sgn_ch;
+    int has_sgn;
+    int has_point;
+    int nfrc = 0;
+    int nexp = 0;
+    int nfrcp = 0;
+    {
+        // Sign
+        if(flt_sgn) {
+            has_sgn = 1;
+            sgn_ch  = '-';
+        }
+        else if(flags & FLAG_PLUS) {
+            has_sgn = 1;
+            sgn_ch  = '+';
+        }
+        else if(flags & FLAG_SPACE) {
+            has_sgn = 1;
+            sgn_ch  = ' ';
+        }
+        // Fractional part
+        frc = mnt;
+        if(frc != 0) {
+            while(!(frc & 0xf)) {
+                frc >>= 4;
+            }
+        }
+        u64 f = frc;
         do {
-            exp_buf[exp_digits_n++] = e % 10 + '0';
+            int d = f % 16;
+            f /= 16;
+            *--frc_buf = fromdigit(d, flags & FLAG_UPPER);
+        } while(f != 0);
+        // Frac/whole digits
+        if(flags & FLAG_PREC) {
+            nfrcp = prec;
+        }
+        else {
+            nfrcp = nfrc;
+        }
+        // Decimal point
+        has_point = 1;
+        if(nfrc == 0 && !(flags & FLAG_HASH)) {
+            has_point = 0;
+        }
+        // Exponent
+        i64 e = exp;
+        if(e < 0) {
+            exp_sgn_ch = '-';
+            e = -e;
+        }
+        else {
+            exp_sgn_ch = '+';
+        }
+        do {
+            *--exp_buf = (e%10) + '0';
             e /= 10;
+            nexp ++;
         } while(e != 0);
     }
-    // Figure out sign symbol and number of chars it takes up (0 or 1)
-    int sign_n = 0;
-    ctype sign_ch;
-    if(value < 0) {
-        sign_n = 1;
-        sign_ch = '-';
+    // Figure out the width of a float
+    int nfloat = has_sgn + 2 + 1 + has_point + nfrcp + 1 + 1 + nexp;
+    int pad = MIN(0, width - nfloat);
+    print_lwidth_spaces(flags, pad);
+    if(has_sgn) {
+        out(sgn_ch);
     }
-    else if(flags & FLAG_PLUS) {
-        sign_n = 1;
-        sign_ch = '+';
-    }
-    else if(flags & FLAG_SPACE) {
-        sign_n = 1;
-        sign_ch = ' ';
-    }
-    // Figure out the width of the number
-    int significand_width = 1 + decimal_point_n + prec;
-    int exp_part_width = 2 /*p+-*/ + exp_digits_n;
-    int n_width = sign_n + 2 /*0x*/ + significand_width + exp_part_width;
-    // Figure out width-padding required
-    int pad = 0;
-    if(width > n_width) pad = width - n_width;
-    // Print width left-pad if it's made out of space
-    if(!(flags & FLAG_LEFT) && !(flags & FLAG_ZERO)) while(pad-- > 0) {
-        out(' ');
-    }
-    // Print sign if there
-    if(sign_n)
-        out(sign_ch);
-    // Print 0x, 0X
     out('0');
     out((flags & FLAG_UPPER)? 'X' : 'x');
-    // Print width left-pad if it's made out of zero
-    if(!(flags & FLAG_LEFT) && (flags & FLAG_ZERO)) while(pad-- > 0) {
-        out('0');
+    print_lwidth_zeros(flags, pad);
+    out((flt_exp == 0) ? '0' : '1');
+    if(has_point) {
+        out('.');
     }
-    // Print whole part
-    out((class == FP_SUBNORMAL || class == FP_ZERO)? '0' : '1');
-    // Print decimal point
-    if(decimal_point_n) out('.');
-    int digs = mant_digits_n < prec? mant_digits_n : prec;
-    for(int i = 0; i != digs; ++i) {
-        int bit = 52 - 4 - i;
-        int digit = (int)((mant >> bit) & 0xf);
-        ctype ch;
-        if(digit < 10)
-             ch = digit+'0';
-        else if(flags & FLAG_UPPER)
-             ch = digit+'A'-10;
-        else ch = digit+'a'-10;
-        out(ch);
+    int i;
+    for(i = 0; i != nfrcp; ++i) {
+        if(i < nfrc) {
+            out(frc_buf[i]);
+        }
+        else {
+            out('0');
+        }
     }
-    // Print precision padding
-    for(int i = mant_digits_n; i < prec; ++i) {
-        out('0');
-    }
-    // Print the exponent part
     out((flags & FLAG_UPPER)? 'P' : 'p');
-    out(exp_sign);
-    if(exp_digits_n) do
-        out(exp_buf[exp_digits_n]);
-    while(exp_digits_n--);
-    // Print right-pad
-    if(flags & FLAG_LEFT) while(pad-- > 0) {
-        out(' ');
+    out(exp_sgn_ch);
+    for(int i = 0; i != nexp; ++i) {
+        out(exp_buf[i]);
     }
+    print_rwidth(flags, pad);
     return w;
 }
 
@@ -671,37 +671,13 @@ static inline int pfx(_dtoa)(
     int w,
     void *ctx,
     pfx(cbfn) cb,
-    double value,
+    decfloat_t value,
     int prec,
     int width,
     int flags
 ) {
-    int class = fpclassify(value);
-    if(class == FP_INFINITE || class == FP_NAN) {
-        return pfx(_infnantoa)(w, ctx, cb, value, prec, width, flags);
-    }
-    // Disassemble the float into parts
-    int64_t exp;
-    int64_t mant;
-    {
-        union {
-            uint64_t bits;
-            double value;
-        } _ = {.value = value};
-        uint64_t bits = _.bits;
-        //uint64_t neg = bits >> 63;
-        int64_t e2 = (int64_t)((bits >> 52) & ((1<<11)-1));
-        int64_t m2 = bits & ((UINT64_C(1)<<52)-1);
-        if(class == FP_ZERO) {
-            mant = 0;
-            exp = 0;
-        }
-        else {
-            decfloat_t f = dtodecfloat(m2, e2);
-            mant = f.mantissa;
-            exp = f.exponent;
-        }
-    }
+    int64_t exp = value.exponent;
+    int64_t mant = value.mantissa;
     // Figure out how many digits does mantissa take up (mant_digits_n)
     // and the number of digits after decimal point (prec)
     // and the number of digits before decimal point (while_digits_n)
@@ -731,7 +707,7 @@ static inline int pfx(_dtoa)(
     // Figure out sign symbol and number of chars it takes up (0 or 1)
     int sign_n = 0;
     ctype sign_ch;
-    if(value < 0) {
+    if(value.sign) {
         sign_n = 1;
         sign_ch = '-';
     }
@@ -797,36 +773,13 @@ static inline int pfx(_etoa)(
     int w,
     void *ctx,
     pfx(cbfn) cb,
-    double value,
+    decfloat_t value,
     int prec,
     int width,
     int flags
 ) {
-    int class = fpclassify(value);
-    if(class == FP_INFINITE || class == FP_NAN) {
-        return pfx(_infnantoa)(w, ctx, cb, value, prec, width, flags);
-    }
-    // Disassemble the float into parts
-    int64_t exp;
-    int64_t mant;
-    {
-        union {
-            uint64_t bits;
-            double value;
-        } _ = {.value = value};
-        uint64_t bits = _.bits;
-        int64_t e2 = (int64_t)((bits >> 52) & ((1<<11)-1));
-        int64_t m2 = bits & ((UINT64_C(1)<<52)-1);
-        if(class == FP_ZERO) {
-            mant = 0;
-            exp = 0;
-        }
-        else {
-            decfloat_t f = dtodecfloat(m2, e2);
-            mant = f.mantissa;
-            exp = f.exponent;
-        }
-    }
+    int64_t exp = value.exponent;
+    int64_t mant = value.mantissa;
     // Figure out how many digits does mantissa take up (mant_digits_n)
     // and the number of digits after decimal point (prec)
     static ctype mant_buf[64] = {0};
@@ -874,7 +827,7 @@ static inline int pfx(_etoa)(
     // Figure out sign symbol and number of chars it takes up (0 or 1)
     int sign_n = 0;
     ctype sign_ch;
-    if(value < 0) {
+    if(value.sign) {
         sign_n = 1;
         sign_ch = '-';
     }
diff --git a/src/util.c b/src/util.c
index be6076d..bbfd4ce 100644
--- a/src/util.c
+++ b/src/util.c
@@ -71,3 +71,6 @@ typedef wchar_t wchar;
 #define F32_SIGN(bits) (bits >> (F32_MANT_BITS + F32_BEXP_BITS))
 #define F32_BEXP(bits) ((bits >> F32_MANT_BITS) & F32_BEXP_MASK)
 #define F32_MANT(bits) ((bits) & F32_MANT_MASK)
+
+#define MIN(a,b) ((a)<(b)?(a):(b))
+#define MAX(a,b) ((a)>(b)?(a):(b))
diff --git a/test/math.c b/test/math.c
index 1b8d661..8f88f43 100644
--- a/test/math.c
+++ b/test/math.c
@@ -1,6 +1,37 @@
 
 #include <std.h>
 
+double cool_doubles[] = {
+    -NAN,
+    -INFINITY,
+    -DBL_MAX,
+    -2.7,
+    -2.5,
+    -2.3,
+    -1.0,
+    -DBL_TRUE_MIN,
+    -DBL_MIN,
+    -0.0,
+    +0.0,
+    DBL_MIN,
+    DBL_TRUE_MIN,
+    1.0,
+    2.3,
+    2.5,
+    2.7,
+    DBL_MAX,
+    INFINITY,
+    NAN,
+};
+
+size_t ncool_doubles = COUNTOF(cool_doubles);
+
+#define for_each_cool_double(i,d,b)           \
+    for(int i = 0; i != ncool_doubles; ++i) { \
+        double d = cool_doubles[i];           \
+        b;                                    \
+    }
+
 const char *show_classification(double x) {
     switch(fpclassify(x)) {
         case FP_INFINITE:  return "Inf";
@@ -13,112 +44,9 @@ const char *show_classification(double x) {
 }
 
 int main() {
-
-    printf("\n=== fpclassify === \n");
-    double zero = 0.0; // fucken msvc
-    printf("1.0/0.0 is %s\n", show_classification(1.0/zero));
-    printf("0.0/0.0 is %s\n", show_classification(0.0/zero));
-    printf("DBL_MIN/2 is %s\n", show_classification(DBL_MIN/2));
-    printf("-0.0 is %s\n", show_classification(-0.0));
-    printf("1.0 is %s\n", show_classification(1.0));
-
-    printf("\n\n=== nearbyint === \n");
-    double d;
-    printf("nearbyint(+0.0)  = %f\n", d=nearbyint(+0.0));
-    printf("nearbyint(-0.0)  = %f\n", d=nearbyint(-0.0));
-    printf("nearbyint(+16.4) = %f\n", d=nearbyint(+16.4));
-    printf("nearbyint(+16.5) = %f\n", d=nearbyint(+16.5));
-    printf("nearbyint(+16.8) = %f\n", d=nearbyint(+16.8));
-
-    printf("\n\n=== signbit === \n");
-    printf("signbit(+0.0) = %d\n", signbit(+0.0));
-    printf("signbit(-0.0) = %d\n", signbit(-0.0));
-
-    printf("\n\n=== copysign === \n");
-    printf("copysign(1.0,+2.0)      = %f\n", copysign(1.0,+2.0));
-    printf("copysign(1.0,-2.0)      = %f\n", copysign(1.0,-2.0));
-    printf("copysign(INFINITY,-2.0) = %f\n", copysign(INFINITY,-2.0));
-    printf("copysign(NAN,-2.0)      = %f\n", copysign(NAN,-2.0));
-
-    printf("\n\n=== rint === \n");
-    fesetround(FE_TONEAREST);
-    printf("rounding to nearest (halfway cases to even):\n"
-           "rint(+2.3) = %f  ", rint(2.3));
-    printf("rint(+2.5) = %f  ", rint(2.5));
-    printf("rint(+3.5) = %f\n", rint(3.5));
-    printf("rint(-2.3) = %f  ", rint(-2.3));
-    printf("rint(-2.5) = %f  ", rint(-2.5));
-    printf("rint(-3.5) = %f\n", rint(-3.5));
-    fesetround(FE_DOWNWARD);
-    printf("rounding down: \nrint(+2.3) = %f  ", rint(2.3));
-    printf("rint(+2.5) = %f  ", rint(2.5));
-    printf("rint(+3.5) = %f\n", rint(3.5));
-    printf("rint(-2.3) = %f  ", rint(-2.3));
-    printf("rint(-2.5) = %f  ", rint(-2.5));
-    printf("rint(-3.5) = %f\n", rint(-3.5));
-    feclearexcept(FE_ALL_EXCEPT);
-    printf("rint(1.1) = %f\n", rint(1.1));
-    if(fetestexcept(FE_INEXACT)) printf("    FE_INEXACT was raised\n");
-
-    printf("\n\n=== nextafter === \n");
-    float from1 = 0, to1 = nextafterf(from1, 1);
-    printf("The next representable float after %f is %f\n", from1, to1);
-    float from2 = 1, to2 = nextafterf(from2, 2);
-    printf("The next representable float after %f is %f\n", from2, to2);
-    float from5 = 0.0, to5 = nextafter(from5, -0.0);
-    printf("nextafter(+0.0, -0.0) gives %f (%f)\n", to5, to5);
-
-    printf("\n\n=== ceil === \n");
-    printf("ceil(+2.4) = %f\n", ceil(2.4));
-    printf("ceil(-2.4) = %f\n", ceil(-2.4));
-    printf("ceil(-0.0) = %f\n", ceil(-0.0));
-    printf("ceil(-Inf) = %f\n", ceil(-INFINITY));
-
-    printf("\n\n=== floor === \n");
-    printf("floor(+2.7) = %f\n", floor(2.7));
-    printf("floor(-2.7) = %f\n", floor(-2.7));
-    printf("floor(-0.0) = %f\n", floor(-0.0));
-    printf("floor(-Inf) = %f\n", floor(-INFINITY));
-
-    printf("\n\n=== trunk === \n");
-    printf("trunc(+2.7) = %f\n", trunc(2.7));
-    printf("trunc(-2.7) = %f\n", trunc(-2.7));
-    printf("trunc(-0.0) = %f\n", trunc(-0.0));
-    printf("trunc(-Inf) = %f\n", trunc(-INFINITY));
-
-    printf("\n\n=== round === \n");
-    printf("round(+2.3)  = %f  ",  round(2.3));
-    printf("round(+2.5)  = %f  ",  round(2.5));
-    printf("round(+2.7)  = %f\n",  round(2.7));
-    printf("round(-2.3)  = %f  ",  round(-2.3));
-    printf("round(-2.5)  = %f  ",  round(-2.5));
-    printf("round(-2.7)  = %f\n",  round(-2.7));
-    printf("round(-0.0)  = %f\n",  round(-0.0));
-    printf("round(-Inf)  = %f\n",  round(-INFINITY));
-    printf("lround(+2.3) = %ld  ", lround(2.3));
-    printf("lround(+2.5) = %ld  ", lround(2.5));
-    printf("lround(+2.7) = %ld\n", lround(2.7));
-    printf("lround(-2.3) = %ld  ", lround(-2.3));
-    printf("lround(-2.5) = %ld  ", lround(-2.5));
-    printf("lround(-2.7) = %ld\n", lround(-2.7));
-    printf("lround(-0.0) = %ld\n", lround(-0.0));
-    printf("lround(-Inf) = %ld\n", lround(-INFINITY));
-    feclearexcept(FE_ALL_EXCEPT);
-    printf("lround(LONG_MAX+1.5) = %ld\n", lround(LONG_MAX+1.5));
-    if(fetestexcept(FE_INVALID)) printf("    FE_INVALID was raised\n");
-
-    printf("\n\n=== exp === \n");
-    printf("exp(1) = %e\n", exp(1));
-    printf("FV of $100, continuously compounded at 3%% for 1 year = %e\n",
-            100*exp(0.03));
-    // special values
-    printf("exp(-0) = %e\n", exp(-0.0));
-    printf("exp(-Inf) = %e\n", exp(-INFINITY));
-    //error handling
-    errno = 0; feclearexcept(FE_ALL_EXCEPT);
-    printf("exp(710) = %e\n", exp(710));
-    if(errno == ERANGE) printf("    errno == ERANGE\n");
-    if(fetestexcept(FE_OVERFLOW)) printf("    FE_OVERFLOW raised\n");
-
+    for(int i = 0; i != ncool_doubles; ++i) {
+        double d = cool_doubles[i];
+        printf("%a\n", d);
+    }
     return 0;
 }