We can render text!
This commit is contained in:
parent
b7e3e83f3e
commit
b792476d36
|
@ -0,0 +1,16 @@
|
|||
#define FLT_EVAL_METHOD 0
|
||||
|
||||
#define LDBL_TRUE_MIN 4.94065645841246544177e-324L
|
||||
#define LDBL_MIN 2.22507385850720138309e-308L
|
||||
#define LDBL_MAX 1.79769313486231570815e+308L
|
||||
#define LDBL_EPSILON 2.22044604925031308085e-16L
|
||||
|
||||
#define LDBL_MANT_DIG 53
|
||||
#define LDBL_MIN_EXP (-1021)
|
||||
#define LDBL_MAX_EXP 1024
|
||||
|
||||
#define LDBL_DIG 15
|
||||
#define LDBL_MIN_10_EXP (-307)
|
||||
#define LDBL_MAX_10_EXP 308
|
||||
|
||||
#define DECIMAL_DIG 17
|
|
@ -0,0 +1,52 @@
|
|||
#ifndef _FLOAT_H
|
||||
#define _FLOAT_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int __flt_rounds(void);
|
||||
#define FLT_ROUNDS (__flt_rounds())
|
||||
|
||||
#define FLT_RADIX 2
|
||||
|
||||
#define FLT_TRUE_MIN 1.40129846432481707092e-45F
|
||||
#define FLT_MIN 1.17549435082228750797e-38F
|
||||
#define FLT_MAX 3.40282346638528859812e+38F
|
||||
#define FLT_EPSILON 1.1920928955078125e-07F
|
||||
|
||||
#define FLT_MANT_DIG 24
|
||||
#define FLT_MIN_EXP (-125)
|
||||
#define FLT_MAX_EXP 128
|
||||
#define FLT_HAS_SUBNORM 1
|
||||
|
||||
#define FLT_DIG 6
|
||||
#define FLT_DECIMAL_DIG 9
|
||||
#define FLT_MIN_10_EXP (-37)
|
||||
#define FLT_MAX_10_EXP 38
|
||||
|
||||
#define DBL_TRUE_MIN 4.94065645841246544177e-324
|
||||
#define DBL_MIN 2.22507385850720138309e-308
|
||||
#define DBL_MAX 1.79769313486231570815e+308
|
||||
#define DBL_EPSILON 2.22044604925031308085e-16
|
||||
|
||||
#define DBL_MANT_DIG 53
|
||||
#define DBL_MIN_EXP (-1021)
|
||||
#define DBL_MAX_EXP 1024
|
||||
#define DBL_HAS_SUBNORM 1
|
||||
|
||||
#define DBL_DIG 15
|
||||
#define DBL_DECIMAL_DIG 17
|
||||
#define DBL_MIN_10_EXP (-307)
|
||||
#define DBL_MAX_10_EXP 308
|
||||
|
||||
#define LDBL_HAS_SUBNORM 1
|
||||
#define LDBL_DECIMAL_DIG DECIMAL_DIG
|
||||
|
||||
#include <bits/float.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -1,3 +1,10 @@
|
|||
#ifndef _MATH_H
|
||||
#define _MATH_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// NOTE(orca): not doing anything fancy for float_t and double_t
|
||||
typedef float float_t;
|
||||
typedef double double_t;
|
||||
|
@ -5,6 +12,60 @@ typedef double double_t;
|
|||
#define NAN __builtin_nanf("")
|
||||
#define INFINITY __builtin_inff()
|
||||
|
||||
#define FP_NAN 0
|
||||
#define FP_INFINITE 1
|
||||
#define FP_ZERO 2
|
||||
#define FP_SUBNORMAL 3
|
||||
#define FP_NORMAL 4
|
||||
|
||||
int __fpclassify(double);
|
||||
int __fpclassifyf(float);
|
||||
int __fpclassifyl(long double);
|
||||
|
||||
static __inline unsigned __FLOAT_BITS(float __f)
|
||||
{
|
||||
union {float __f; unsigned __i;} __u;
|
||||
__u.__f = __f;
|
||||
return __u.__i;
|
||||
}
|
||||
static __inline unsigned long long __DOUBLE_BITS(double __f)
|
||||
{
|
||||
union {double __f; unsigned long long __i;} __u;
|
||||
__u.__f = __f;
|
||||
return __u.__i;
|
||||
}
|
||||
|
||||
#define fpclassify(x) ( \
|
||||
sizeof(x) == sizeof(float) ? __fpclassifyf(x) : \
|
||||
sizeof(x) == sizeof(double) ? __fpclassify(x) : \
|
||||
__fpclassifyl(x) )
|
||||
|
||||
#define isinf(x) ( \
|
||||
sizeof(x) == sizeof(float) ? (__FLOAT_BITS(x) & 0x7fffffff) == 0x7f800000 : \
|
||||
sizeof(x) == sizeof(double) ? (__DOUBLE_BITS(x) & -1ULL>>1) == 0x7ffULL<<52 : \
|
||||
__fpclassifyl(x) == FP_INFINITE)
|
||||
|
||||
#define isnan(x) ( \
|
||||
sizeof(x) == sizeof(float) ? (__FLOAT_BITS(x) & 0x7fffffff) > 0x7f800000 : \
|
||||
sizeof(x) == sizeof(double) ? (__DOUBLE_BITS(x) & -1ULL>>1) > 0x7ffULL<<52 : \
|
||||
__fpclassifyl(x) == FP_NAN)
|
||||
|
||||
double acos(double);
|
||||
|
||||
double ceil(double);
|
||||
|
||||
double fabs(double);
|
||||
|
||||
double floor(double);
|
||||
|
||||
double fmod(double, double);
|
||||
|
||||
double pow(double, double);
|
||||
|
||||
double sqrt(double);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,3 +1,10 @@
|
|||
#ifndef _STDIO_H
|
||||
#define _STDIO_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct _IO_FILE { char __x; };
|
||||
typedef struct _IO_FILE FILE;
|
||||
|
||||
|
@ -10,3 +17,9 @@ extern FILE *const stderr;
|
|||
#define stderr (stderr)
|
||||
|
||||
int fprintf(FILE *__restrict, const char *__restrict, ...);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,3 +1,16 @@
|
|||
#ifndef _STDLIB_H
|
||||
#define _STDLIB_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
_Noreturn void abort (void);
|
||||
|
||||
int abs (int);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,101 @@
|
|||
/* origin: FreeBSD /usr/src/lib/msun/src/e_acos.c */
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Developed at SunSoft, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
*/
|
||||
/* acos(x)
|
||||
* Method :
|
||||
* acos(x) = pi/2 - asin(x)
|
||||
* acos(-x) = pi/2 + asin(x)
|
||||
* For |x|<=0.5
|
||||
* acos(x) = pi/2 - (x + x*x^2*R(x^2)) (see asin.c)
|
||||
* For x>0.5
|
||||
* acos(x) = pi/2 - (pi/2 - 2asin(sqrt((1-x)/2)))
|
||||
* = 2asin(sqrt((1-x)/2))
|
||||
* = 2s + 2s*z*R(z) ...z=(1-x)/2, s=sqrt(z)
|
||||
* = 2f + (2c + 2s*z*R(z))
|
||||
* where f=hi part of s, and c = (z-f*f)/(s+f) is the correction term
|
||||
* for f so that f+c ~ sqrt(z).
|
||||
* For x<-0.5
|
||||
* acos(x) = pi - 2asin(sqrt((1-|x|)/2))
|
||||
* = pi - 0.5*(s+s*z*R(z)), where z=(1-|x|)/2,s=sqrt(z)
|
||||
*
|
||||
* Special cases:
|
||||
* if x is NaN, return x itself;
|
||||
* if |x|>1, return NaN with invalid signal.
|
||||
*
|
||||
* Function needed: sqrt
|
||||
*/
|
||||
|
||||
#include "libm.h"
|
||||
|
||||
static const double
|
||||
pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
|
||||
pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
|
||||
pS0 = 1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */
|
||||
pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */
|
||||
pS2 = 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */
|
||||
pS3 = -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */
|
||||
pS4 = 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */
|
||||
pS5 = 3.47933107596021167570e-05, /* 0x3F023DE1, 0x0DFDF709 */
|
||||
qS1 = -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */
|
||||
qS2 = 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */
|
||||
qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */
|
||||
qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
|
||||
|
||||
static double R(double z)
|
||||
{
|
||||
double_t p, q;
|
||||
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
|
||||
q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
|
||||
return p/q;
|
||||
}
|
||||
|
||||
double acos(double x)
|
||||
{
|
||||
double z,w,s,c,df;
|
||||
uint32_t hx,ix;
|
||||
|
||||
GET_HIGH_WORD(hx, x);
|
||||
ix = hx & 0x7fffffff;
|
||||
/* |x| >= 1 or nan */
|
||||
if (ix >= 0x3ff00000) {
|
||||
uint32_t lx;
|
||||
|
||||
GET_LOW_WORD(lx,x);
|
||||
if ((ix-0x3ff00000 | lx) == 0) {
|
||||
/* acos(1)=0, acos(-1)=pi */
|
||||
if (hx >> 31)
|
||||
return 2*pio2_hi + 0x1p-120f;
|
||||
return 0;
|
||||
}
|
||||
return 0/(x-x);
|
||||
}
|
||||
/* |x| < 0.5 */
|
||||
if (ix < 0x3fe00000) {
|
||||
if (ix <= 0x3c600000) /* |x| < 2**-57 */
|
||||
return pio2_hi + 0x1p-120f;
|
||||
return pio2_hi - (x - (pio2_lo-x*R(x*x)));
|
||||
}
|
||||
/* x < -0.5 */
|
||||
if (hx >> 31) {
|
||||
z = (1.0+x)*0.5;
|
||||
s = sqrt(z);
|
||||
w = R(z)*s-pio2_lo;
|
||||
return 2*(pio2_hi - (s+w));
|
||||
}
|
||||
/* x > 0.5 */
|
||||
z = (1.0-x)*0.5;
|
||||
s = sqrt(z);
|
||||
df = s;
|
||||
SET_LOW_WORD(df,0);
|
||||
c = (z-df*df)/(s+df);
|
||||
w = R(z)*s+c;
|
||||
return 2*(df+w);
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
#include "libm.h"
|
||||
|
||||
#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
|
||||
#define EPS DBL_EPSILON
|
||||
#elif FLT_EVAL_METHOD==2
|
||||
#define EPS LDBL_EPSILON
|
||||
#endif
|
||||
static const double_t toint = 1/EPS;
|
||||
|
||||
double ceil(double x)
|
||||
{
|
||||
union {double f; uint64_t i;} u = {x};
|
||||
int e = u.i >> 52 & 0x7ff;
|
||||
double_t y;
|
||||
|
||||
if (e >= 0x3ff+52 || x == 0)
|
||||
return x;
|
||||
/* y = int(x) - x, where int(x) is an integer neighbor of x */
|
||||
if (u.i >> 63)
|
||||
y = x - toint + toint - x;
|
||||
else
|
||||
y = x + toint - toint - x;
|
||||
/* special case because of non-nearest rounding modes */
|
||||
if (e <= 0x3ff-1) {
|
||||
FORCE_EVAL(y);
|
||||
return u.i >> 63 ? -0.0 : 1;
|
||||
}
|
||||
if (y < 0)
|
||||
return x + y + 1;
|
||||
return x + y;
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
#include "libm.h"
|
||||
|
||||
#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
|
||||
#define EPS DBL_EPSILON
|
||||
#elif FLT_EVAL_METHOD==2
|
||||
#define EPS LDBL_EPSILON
|
||||
#endif
|
||||
static const double_t toint = 1/EPS;
|
||||
|
||||
double floor(double x)
|
||||
{
|
||||
union {double f; uint64_t i;} u = {x};
|
||||
int e = u.i >> 52 & 0x7ff;
|
||||
double_t y;
|
||||
|
||||
if (e >= 0x3ff+52 || x == 0)
|
||||
return x;
|
||||
/* y = int(x) - x, where int(x) is an integer neighbor of x */
|
||||
if (u.i >> 63)
|
||||
y = x - toint + toint - x;
|
||||
else
|
||||
y = x + toint - toint - x;
|
||||
/* special case because of non-nearest rounding modes */
|
||||
if (e <= 0x3ff-1) {
|
||||
FORCE_EVAL(y);
|
||||
return u.i >> 63 ? -1 : 0;
|
||||
}
|
||||
if (y > 0)
|
||||
return x + y - 1;
|
||||
return x + y;
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
|
||||
double fmod(double x, double y)
|
||||
{
|
||||
union {double f; uint64_t i;} ux = {x}, uy = {y};
|
||||
int ex = ux.i>>52 & 0x7ff;
|
||||
int ey = uy.i>>52 & 0x7ff;
|
||||
int sx = ux.i>>63;
|
||||
uint64_t i;
|
||||
|
||||
/* in the followings uxi should be ux.i, but then gcc wrongly adds */
|
||||
/* float load/store to inner loops ruining performance and code size */
|
||||
uint64_t uxi = ux.i;
|
||||
|
||||
if (uy.i<<1 == 0 || isnan(y) || ex == 0x7ff)
|
||||
return (x*y)/(x*y);
|
||||
if (uxi<<1 <= uy.i<<1) {
|
||||
if (uxi<<1 == uy.i<<1)
|
||||
return 0*x;
|
||||
return x;
|
||||
}
|
||||
|
||||
/* normalize x and y */
|
||||
if (!ex) {
|
||||
for (i = uxi<<12; i>>63 == 0; ex--, i <<= 1);
|
||||
uxi <<= -ex + 1;
|
||||
} else {
|
||||
uxi &= -1ULL >> 12;
|
||||
uxi |= 1ULL << 52;
|
||||
}
|
||||
if (!ey) {
|
||||
for (i = uy.i<<12; i>>63 == 0; ey--, i <<= 1);
|
||||
uy.i <<= -ey + 1;
|
||||
} else {
|
||||
uy.i &= -1ULL >> 12;
|
||||
uy.i |= 1ULL << 52;
|
||||
}
|
||||
|
||||
/* x mod y */
|
||||
for (; ex > ey; ex--) {
|
||||
i = uxi - uy.i;
|
||||
if (i >> 63 == 0) {
|
||||
if (i == 0)
|
||||
return 0*x;
|
||||
uxi = i;
|
||||
}
|
||||
uxi <<= 1;
|
||||
}
|
||||
i = uxi - uy.i;
|
||||
if (i >> 63 == 0) {
|
||||
if (i == 0)
|
||||
return 0*x;
|
||||
uxi = i;
|
||||
}
|
||||
for (; uxi>>52 == 0; uxi <<= 1, ex--);
|
||||
|
||||
/* scale result */
|
||||
if (ex > 0) {
|
||||
uxi -= 1ULL << 52;
|
||||
uxi |= (uint64_t)ex << 52;
|
||||
} else {
|
||||
uxi >>= -ex + 1;
|
||||
}
|
||||
uxi |= (uint64_t)sx << 63;
|
||||
ux.i = uxi;
|
||||
return ux.f;
|
||||
}
|
|
@ -1,4 +1,6 @@
|
|||
#include <stdint.h>
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
|
||||
#define WANT_ROUNDING 1
|
||||
|
||||
|
@ -30,11 +32,82 @@ static inline double eval_as_double(double x)
|
|||
return y;
|
||||
}
|
||||
|
||||
/* fp_force_eval ensures that the input value is computed when that's
|
||||
otherwise unused. To prevent the constant folding of the input
|
||||
expression, an additional fp_barrier may be needed or a compilation
|
||||
mode that does so (e.g. -frounding-math in gcc). Then it can be
|
||||
used to evaluate an expression for its fenv side-effects only. */
|
||||
|
||||
#ifndef fp_force_evalf
|
||||
#define fp_force_evalf fp_force_evalf
|
||||
static inline void fp_force_evalf(float x)
|
||||
{
|
||||
volatile float y;
|
||||
y = x;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef fp_force_eval
|
||||
#define fp_force_eval fp_force_eval
|
||||
static inline void fp_force_eval(double x)
|
||||
{
|
||||
volatile double y;
|
||||
y = x;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef fp_force_evall
|
||||
#define fp_force_evall fp_force_evall
|
||||
static inline void fp_force_evall(long double x)
|
||||
{
|
||||
volatile long double y;
|
||||
y = x;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define FORCE_EVAL(x) do { \
|
||||
if (sizeof(x) == sizeof(float)) { \
|
||||
fp_force_evalf(x); \
|
||||
} else if (sizeof(x) == sizeof(double)) { \
|
||||
fp_force_eval(x); \
|
||||
} else { \
|
||||
fp_force_evall(x); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define asuint(f) ((union{float _f; uint32_t _i;}){f})._i
|
||||
#define asfloat(i) ((union{uint32_t _i; float _f;}){i})._f
|
||||
#define asuint64(f) ((union{double _f; uint64_t _i;}){f})._i
|
||||
#define asdouble(i) ((union{uint64_t _i; double _f;}){i})._f
|
||||
|
||||
#define EXTRACT_WORDS(hi,lo,d) \
|
||||
do { \
|
||||
uint64_t __u = asuint64(d); \
|
||||
(hi) = __u >> 32; \
|
||||
(lo) = (uint32_t)__u; \
|
||||
} while (0)
|
||||
|
||||
#define GET_HIGH_WORD(hi,d) \
|
||||
do { \
|
||||
(hi) = asuint64(d) >> 32; \
|
||||
} while (0)
|
||||
|
||||
#define GET_LOW_WORD(lo,d) \
|
||||
do { \
|
||||
(lo) = (uint32_t)asuint64(d); \
|
||||
} while (0)
|
||||
|
||||
#define INSERT_WORDS(d,hi,lo) \
|
||||
do { \
|
||||
(d) = asdouble(((uint64_t)(hi)<<32) | (uint32_t)(lo)); \
|
||||
} while (0)
|
||||
|
||||
#define SET_HIGH_WORD(d,hi) \
|
||||
INSERT_WORDS(d, hi, (uint32_t)asuint64(d))
|
||||
|
||||
#define SET_LOW_WORD(d,lo) \
|
||||
INSERT_WORDS(d, asuint64(d)>>32, lo)
|
||||
|
||||
double __math_xflow(uint32_t, double);
|
||||
double __math_uflow(uint32_t);
|
||||
double __math_oflow(uint32_t);
|
||||
|
|
|
@ -0,0 +1,158 @@
|
|||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
#include "libm.h"
|
||||
#include "sqrt_data.h"
|
||||
|
||||
#define FENV_SUPPORT 1
|
||||
|
||||
/* returns a*b*2^-32 - e, with error 0 <= e < 1. */
|
||||
static inline uint32_t mul32(uint32_t a, uint32_t b)
|
||||
{
|
||||
return (uint64_t)a*b >> 32;
|
||||
}
|
||||
|
||||
/* returns a*b*2^-64 - e, with error 0 <= e < 3. */
|
||||
static inline uint64_t mul64(uint64_t a, uint64_t b)
|
||||
{
|
||||
uint64_t ahi = a>>32;
|
||||
uint64_t alo = a&0xffffffff;
|
||||
uint64_t bhi = b>>32;
|
||||
uint64_t blo = b&0xffffffff;
|
||||
return ahi*bhi + (ahi*blo >> 32) + (alo*bhi >> 32);
|
||||
}
|
||||
|
||||
double sqrt(double x)
|
||||
{
|
||||
uint64_t ix, top, m;
|
||||
|
||||
/* special case handling. */
|
||||
ix = asuint64(x);
|
||||
top = ix >> 52;
|
||||
if (predict_false(top - 0x001 >= 0x7ff - 0x001)) {
|
||||
/* x < 0x1p-1022 or inf or nan. */
|
||||
if (ix * 2 == 0)
|
||||
return x;
|
||||
if (ix == 0x7ff0000000000000)
|
||||
return x;
|
||||
if (ix > 0x7ff0000000000000)
|
||||
return __math_invalid(x);
|
||||
/* x is subnormal, normalize it. */
|
||||
ix = asuint64(x * 0x1p52);
|
||||
top = ix >> 52;
|
||||
top -= 52;
|
||||
}
|
||||
|
||||
/* argument reduction:
|
||||
x = 4^e m; with integer e, and m in [1, 4)
|
||||
m: fixed point representation [2.62]
|
||||
2^e is the exponent part of the result. */
|
||||
int even = top & 1;
|
||||
m = (ix << 11) | 0x8000000000000000;
|
||||
if (even) m >>= 1;
|
||||
top = (top + 0x3ff) >> 1;
|
||||
|
||||
/* approximate r ~ 1/sqrt(m) and s ~ sqrt(m) when m in [1,4)
|
||||
|
||||
initial estimate:
|
||||
7bit table lookup (1bit exponent and 6bit significand).
|
||||
|
||||
iterative approximation:
|
||||
using 2 goldschmidt iterations with 32bit int arithmetics
|
||||
and a final iteration with 64bit int arithmetics.
|
||||
|
||||
details:
|
||||
|
||||
the relative error (e = r0 sqrt(m)-1) of a linear estimate
|
||||
(r0 = a m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best,
|
||||
a table lookup is faster and needs one less iteration
|
||||
6 bit lookup table (128b) gives |e| < 0x1.f9p-8
|
||||
7 bit lookup table (256b) gives |e| < 0x1.fdp-9
|
||||
for single and double prec 6bit is enough but for quad
|
||||
prec 7bit is needed (or modified iterations). to avoid
|
||||
one more iteration >=13bit table would be needed (16k).
|
||||
|
||||
a newton-raphson iteration for r is
|
||||
w = r*r
|
||||
u = 3 - m*w
|
||||
r = r*u/2
|
||||
can use a goldschmidt iteration for s at the end or
|
||||
s = m*r
|
||||
|
||||
first goldschmidt iteration is
|
||||
s = m*r
|
||||
u = 3 - s*r
|
||||
r = r*u/2
|
||||
s = s*u/2
|
||||
next goldschmidt iteration is
|
||||
u = 3 - s*r
|
||||
r = r*u/2
|
||||
s = s*u/2
|
||||
and at the end r is not computed only s.
|
||||
|
||||
they use the same amount of operations and converge at the
|
||||
same quadratic rate, i.e. if
|
||||
r1 sqrt(m) - 1 = e, then
|
||||
r2 sqrt(m) - 1 = -3/2 e^2 - 1/2 e^3
|
||||
the advantage of goldschmidt is that the mul for s and r
|
||||
are independent (computed in parallel), however it is not
|
||||
"self synchronizing": it only uses the input m in the
|
||||
first iteration so rounding errors accumulate. at the end
|
||||
or when switching to larger precision arithmetics rounding
|
||||
errors dominate so the first iteration should be used.
|
||||
|
||||
the fixed point representations are
|
||||
m: 2.30 r: 0.32, s: 2.30, d: 2.30, u: 2.30, three: 2.30
|
||||
and after switching to 64 bit
|
||||
m: 2.62 r: 0.64, s: 2.62, d: 2.62, u: 2.62, three: 2.62 */
|
||||
|
||||
static const uint64_t three = 0xc0000000;
|
||||
uint64_t r, s, d, u, i;
|
||||
|
||||
i = (ix >> 46) % 128;
|
||||
r = (uint32_t)__rsqrt_tab[i] << 16;
|
||||
/* |r sqrt(m) - 1| < 0x1.fdp-9 */
|
||||
s = mul32(m>>32, r);
|
||||
/* |s/sqrt(m) - 1| < 0x1.fdp-9 */
|
||||
d = mul32(s, r);
|
||||
u = three - d;
|
||||
r = mul32(r, u) << 1;
|
||||
/* |r sqrt(m) - 1| < 0x1.7bp-16 */
|
||||
s = mul32(s, u) << 1;
|
||||
/* |s/sqrt(m) - 1| < 0x1.7bp-16 */
|
||||
d = mul32(s, r);
|
||||
u = three - d;
|
||||
r = mul32(r, u) << 1;
|
||||
/* |r sqrt(m) - 1| < 0x1.3704p-29 (measured worst-case) */
|
||||
r = r << 32;
|
||||
s = mul64(m, r);
|
||||
d = mul64(s, r);
|
||||
u = (three<<32) - d;
|
||||
s = mul64(s, u); /* repr: 3.61 */
|
||||
/* -0x1p-57 < s - sqrt(m) < 0x1.8001p-61 */
|
||||
s = (s - 2) >> 9; /* repr: 12.52 */
|
||||
/* -0x1.09p-52 < s - sqrt(m) < -0x1.fffcp-63 */
|
||||
|
||||
/* s < sqrt(m) < s + 0x1.09p-52,
|
||||
compute nearest rounded result:
|
||||
the nearest result to 52 bits is either s or s+0x1p-52,
|
||||
we can decide by comparing (2^52 s + 0.5)^2 to 2^104 m. */
|
||||
uint64_t d0, d1, d2;
|
||||
double y, t;
|
||||
d0 = (m << 42) - s*s;
|
||||
d1 = s - d0;
|
||||
d2 = d1 + s + 1;
|
||||
s += d1 >> 63;
|
||||
s &= 0x000fffffffffffff;
|
||||
s |= top << 52;
|
||||
y = asdouble(s);
|
||||
if (FENV_SUPPORT) {
|
||||
/* handle rounding modes and inexact exception:
|
||||
only (s+1)^2 == 2^42 m case is exact otherwise
|
||||
add a tiny value to cause the fenv effects. */
|
||||
uint64_t tiny = predict_false(d2==0) ? 0 : 0x0010000000000000;
|
||||
tiny |= (d1^d2) & 0x8000000000000000;
|
||||
t = asdouble(tiny);
|
||||
y = eval_as_double(y + t);
|
||||
}
|
||||
return y;
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
#include "sqrt_data.h"
|
||||
const uint16_t __rsqrt_tab[128] = {
|
||||
0xb451,0xb2f0,0xb196,0xb044,0xaef9,0xadb6,0xac79,0xab43,
|
||||
0xaa14,0xa8eb,0xa7c8,0xa6aa,0xa592,0xa480,0xa373,0xa26b,
|
||||
0xa168,0xa06a,0x9f70,0x9e7b,0x9d8a,0x9c9d,0x9bb5,0x9ad1,
|
||||
0x99f0,0x9913,0x983a,0x9765,0x9693,0x95c4,0x94f8,0x9430,
|
||||
0x936b,0x92a9,0x91ea,0x912e,0x9075,0x8fbe,0x8f0a,0x8e59,
|
||||
0x8daa,0x8cfe,0x8c54,0x8bac,0x8b07,0x8a64,0x89c4,0x8925,
|
||||
0x8889,0x87ee,0x8756,0x86c0,0x862b,0x8599,0x8508,0x8479,
|
||||
0x83ec,0x8361,0x82d8,0x8250,0x81c9,0x8145,0x80c2,0x8040,
|
||||
0xff02,0xfd0e,0xfb25,0xf947,0xf773,0xf5aa,0xf3ea,0xf234,
|
||||
0xf087,0xeee3,0xed47,0xebb3,0xea27,0xe8a3,0xe727,0xe5b2,
|
||||
0xe443,0xe2dc,0xe17a,0xe020,0xdecb,0xdd7d,0xdc34,0xdaf1,
|
||||
0xd9b3,0xd87b,0xd748,0xd61a,0xd4f1,0xd3cd,0xd2ad,0xd192,
|
||||
0xd07b,0xcf69,0xce5b,0xcd51,0xcc4a,0xcb48,0xca4a,0xc94f,
|
||||
0xc858,0xc764,0xc674,0xc587,0xc49d,0xc3b7,0xc2d4,0xc1f4,
|
||||
0xc116,0xc03c,0xbf65,0xbe90,0xbdbe,0xbcef,0xbc23,0xbb59,
|
||||
0xba91,0xb9cc,0xb90a,0xb84a,0xb78c,0xb6d0,0xb617,0xb560,
|
||||
};
|
|
@ -0,0 +1,13 @@
|
|||
#ifndef _SQRT_DATA_H
|
||||
#define _SQRT_DATA_H
|
||||
|
||||
#include <features.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/* if x in [1,2): i = (int)(64*x);
|
||||
if x in [2,4): i = (int)(32*x-64);
|
||||
__rsqrt_tab[i]*2^-16 is estimating 1/sqrt(x) with small relative error:
|
||||
|__rsqrt_tab[i]*0x1p-16*sqrt(x) - 1| < -0x1.fdp-9 < 2^-8 */
|
||||
extern const uint16_t __rsqrt_tab[128];
|
||||
|
||||
#endif
|
Binary file not shown.
|
@ -34,6 +34,7 @@ mg_canvas canvas;
|
|||
mg_surface surface;
|
||||
mg_image ballImage;
|
||||
mg_image paddleImage;
|
||||
mg_font pongFont;
|
||||
|
||||
mg_surface mg_surface_main(void);
|
||||
|
||||
|
@ -78,6 +79,26 @@ ORCA_EXPORT void OnInit(void)
|
|||
paddleImage = mg_image_create_from_data(surface, str8_from_buffer(size, buffer), false);
|
||||
}
|
||||
|
||||
//NOTE: load paddle texture
|
||||
{
|
||||
file_handle file = file_open(STR8("/Literata-SemiBoldItalic.ttf"), FILE_ACCESS_READ, 0);
|
||||
if(file_last_error(file) != IO_OK)
|
||||
{
|
||||
log_error("Couldn't open file Literata-SemiBoldItalic.ttf\n");
|
||||
}
|
||||
u64 size = file_size(file);
|
||||
char* buffer = mem_arena_alloc(mem_scratch(), size);
|
||||
file_read(file, size, buffer);
|
||||
file_close(file);
|
||||
unicode_range ranges[5] = {UNICODE_RANGE_BASIC_LATIN,
|
||||
UNICODE_RANGE_C1_CONTROLS_AND_LATIN_1_SUPPLEMENT,
|
||||
UNICODE_RANGE_LATIN_EXTENDED_A,
|
||||
UNICODE_RANGE_LATIN_EXTENDED_B,
|
||||
UNICODE_RANGE_SPECIALS};
|
||||
// NOTE(ben): Weird that images are "create from data" but fonts are "create from memory"
|
||||
// TODO: Decide whether we're using strings or explicit pointer + length
|
||||
pongFont = mg_font_create_from_memory(size, (byte*)buffer, 5, ranges);
|
||||
}
|
||||
|
||||
mem_arena_clear(mem_scratch());
|
||||
}
|
||||
|
@ -209,6 +230,18 @@ ORCA_EXPORT void OnFrameRefresh(void)
|
|||
|
||||
mg_matrix_pop();
|
||||
|
||||
mg_set_color_rgba(0, 0, 0, 1);
|
||||
mg_set_font(pongFont);
|
||||
mg_set_font_size(14);
|
||||
mg_move_to(10, 20);
|
||||
|
||||
str8 text = str8_pushf(mem_scratch(),
|
||||
"wahoo I'm did a text. ball is at x = %f, y = %f",
|
||||
ball.x, ball.y
|
||||
);
|
||||
mg_text_outlines(text);
|
||||
mg_fill();
|
||||
|
||||
mg_surface_prepare(surface);
|
||||
mg_render(surface, canvas);
|
||||
mg_surface_present(surface);
|
||||
|
|
Loading…
Reference in New Issue