adding exp/expf
This commit is contained in:
parent
b4aedb3ecc
commit
388b059643
|
@ -84,6 +84,9 @@ float log2f(float);
|
|||
double pow(double, double);
|
||||
float powf(float, float);
|
||||
|
||||
double exp(double);
|
||||
float expf(float);
|
||||
|
||||
double scalbn(double, int);
|
||||
|
||||
double sin(double);
|
||||
|
|
|
@ -0,0 +1,136 @@
|
|||
/*
|
||||
* Double-precision e^x function.
|
||||
*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include "libm.h"
|
||||
#include "exp_data.h"
|
||||
|
||||
#define N (1 << EXP_TABLE_BITS)
|
||||
#define InvLn2N __exp_data.invln2N
|
||||
#define NegLn2hiN __exp_data.negln2hiN
|
||||
#define NegLn2loN __exp_data.negln2loN
|
||||
#define Shift __exp_data.shift
|
||||
#define T __exp_data.tab
|
||||
#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
|
||||
#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
|
||||
#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
|
||||
#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
|
||||
|
||||
/* Handle cases that may overflow or underflow when computing the result that
|
||||
is scale*(1+TMP) without intermediate rounding. The bit representation of
|
||||
scale is in SBITS, however it has a computed exponent that may have
|
||||
overflown into the sign bit so that needs to be adjusted before using it as
|
||||
a double. (int32_t)KI is the k used in the argument reduction and exponent
|
||||
adjustment of scale, positive k here means the result may overflow and
|
||||
negative k means the result may underflow. */
|
||||
static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
|
||||
{
|
||||
double_t scale, y;
|
||||
|
||||
if ((ki & 0x80000000) == 0) {
|
||||
/* k > 0, the exponent of scale might have overflowed by <= 460. */
|
||||
sbits -= 1009ull << 52;
|
||||
scale = asdouble(sbits);
|
||||
y = 0x1p1009 * (scale + scale * tmp);
|
||||
return eval_as_double(y);
|
||||
}
|
||||
/* k < 0, need special care in the subnormal range. */
|
||||
sbits += 1022ull << 52;
|
||||
scale = asdouble(sbits);
|
||||
y = scale + scale * tmp;
|
||||
if (y < 1.0) {
|
||||
/* Round y to the right precision before scaling it into the subnormal
|
||||
range to avoid double rounding that can cause 0.5+E/2 ulp error where
|
||||
E is the worst-case ulp error outside the subnormal range. So this
|
||||
is only useful if the goal is better than 1 ulp worst-case error. */
|
||||
double_t hi, lo;
|
||||
lo = scale - y + scale * tmp;
|
||||
hi = 1.0 + y;
|
||||
lo = 1.0 - hi + y + lo;
|
||||
y = eval_as_double(hi + lo) - 1.0;
|
||||
/* Avoid -0.0 with downward rounding. */
|
||||
if (WANT_ROUNDING && y == 0.0)
|
||||
y = 0.0;
|
||||
/* The underflow exception needs to be signaled explicitly. */
|
||||
//WARN(orca): we don't have fp_barrier in wasm
|
||||
//fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
|
||||
fp_force_eval((0x1p-1022) * 0x1p-1022);
|
||||
}
|
||||
y = 0x1p-1022 * y;
|
||||
return eval_as_double(y);
|
||||
}
|
||||
|
||||
/* Top 12 bits of a double (sign and exponent bits). */
|
||||
static inline uint32_t top12(double x)
|
||||
{
|
||||
return asuint64(x) >> 52;
|
||||
}
|
||||
|
||||
double exp(double x)
|
||||
{
|
||||
uint32_t abstop;
|
||||
uint64_t ki, idx, top, sbits;
|
||||
double_t kd, z, r, r2, scale, tail, tmp;
|
||||
|
||||
abstop = top12(x) & 0x7ff;
|
||||
if (predict_false(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
|
||||
if (abstop - top12(0x1p-54) >= 0x80000000)
|
||||
/* Avoid spurious underflow for tiny x. */
|
||||
/* Note: 0 is common input. */
|
||||
return WANT_ROUNDING ? 1.0 + x : 1.0;
|
||||
if (abstop >= top12(1024.0)) {
|
||||
if (asuint64(x) == asuint64(-INFINITY))
|
||||
return 0.0;
|
||||
if (abstop >= top12(INFINITY))
|
||||
return 1.0 + x;
|
||||
if (asuint64(x) >> 63)
|
||||
return __math_uflow(0);
|
||||
else
|
||||
return __math_oflow(0);
|
||||
}
|
||||
/* Large x is special cased below. */
|
||||
abstop = 0;
|
||||
}
|
||||
|
||||
/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
|
||||
/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
|
||||
z = InvLn2N * x;
|
||||
#if TOINT_INTRINSICS
|
||||
kd = roundtoint(z);
|
||||
ki = converttoint(z);
|
||||
#elif EXP_USE_TOINT_NARROW
|
||||
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
|
||||
kd = eval_as_double(z + Shift);
|
||||
ki = asuint64(kd) >> 16;
|
||||
kd = (double_t)(int32_t)ki;
|
||||
#else
|
||||
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
|
||||
kd = eval_as_double(z + Shift);
|
||||
ki = asuint64(kd);
|
||||
kd -= Shift;
|
||||
#endif
|
||||
r = x + kd * NegLn2hiN + kd * NegLn2loN;
|
||||
/* 2^(k/N) ~= scale * (1 + tail). */
|
||||
idx = 2 * (ki % N);
|
||||
top = ki << (52 - EXP_TABLE_BITS);
|
||||
tail = asdouble(T[idx]);
|
||||
/* This is only a valid scale when -1023*N < k < 1024*N. */
|
||||
sbits = T[idx + 1] + top;
|
||||
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
|
||||
/* Evaluation is optimized assuming superscalar pipelined execution. */
|
||||
r2 = r * r;
|
||||
/* Without fma the worst case error is 0.25/N ulp larger. */
|
||||
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
|
||||
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
|
||||
if (predict_false(abstop == 0))
|
||||
return specialcase(tmp, sbits, ki);
|
||||
scale = asdouble(sbits);
|
||||
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
|
||||
is no spurious underflow here even without fma. */
|
||||
return eval_as_double(scale + scale * tmp);
|
||||
}
|
|
@ -0,0 +1,182 @@
|
|||
/*
|
||||
* Shared data between exp, exp2 and pow.
|
||||
*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "exp_data.h"
|
||||
|
||||
#define N (1 << EXP_TABLE_BITS)
|
||||
|
||||
const struct exp_data __exp_data = {
|
||||
// N/ln2
|
||||
.invln2N = 0x1.71547652b82fep0 * N,
|
||||
// -ln2/N
|
||||
.negln2hiN = -0x1.62e42fefa0000p-8,
|
||||
.negln2loN = -0x1.cf79abc9e3b3ap-47,
|
||||
// Used for rounding when !TOINT_INTRINSICS
|
||||
#if EXP_USE_TOINT_NARROW
|
||||
.shift = 0x1800000000.8p0,
|
||||
#else
|
||||
.shift = 0x1.8p52,
|
||||
#endif
|
||||
// exp polynomial coefficients.
|
||||
.poly = {
|
||||
// abs error: 1.555*2^-66
|
||||
// ulp error: 0.509 (0.511 without fma)
|
||||
// if |x| < ln2/256+eps
|
||||
// abs error if |x| < ln2/256+0x1p-15: 1.09*2^-65
|
||||
// abs error if |x| < ln2/128: 1.7145*2^-56
|
||||
0x1.ffffffffffdbdp-2,
|
||||
0x1.555555555543cp-3,
|
||||
0x1.55555cf172b91p-5,
|
||||
0x1.1111167a4d017p-7,
|
||||
},
|
||||
.exp2_shift = 0x1.8p52 / N,
|
||||
// exp2 polynomial coefficients.
|
||||
.exp2_poly = {
|
||||
// abs error: 1.2195*2^-65
|
||||
// ulp error: 0.507 (0.511 without fma)
|
||||
// if |x| < 1/256
|
||||
// abs error if |x| < 1/128: 1.9941*2^-56
|
||||
0x1.62e42fefa39efp-1,
|
||||
0x1.ebfbdff82c424p-3,
|
||||
0x1.c6b08d70cf4b5p-5,
|
||||
0x1.3b2abd24650ccp-7,
|
||||
0x1.5d7e09b4e3a84p-10,
|
||||
},
|
||||
// 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N)
|
||||
// tab[2*k] = asuint64(T[k])
|
||||
// tab[2*k+1] = asuint64(H[k]) - (k << 52)/N
|
||||
.tab = {
|
||||
0x0, 0x3ff0000000000000,
|
||||
0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335,
|
||||
0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
|
||||
0xbc905e7a108766d1, 0x3fefe315e86e7f85,
|
||||
0x3c8cd2523567f613, 0x3fefd9b0d3158574,
|
||||
0xbc8bce8023f98efa, 0x3fefd06b29ddf6de,
|
||||
0x3c60f74e61e6c861, 0x3fefc74518759bc8,
|
||||
0x3c90a3e45b33d399, 0x3fefbe3ecac6f383,
|
||||
0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
|
||||
0x3c8eb51a92fdeffc, 0x3fefac922b7247f7,
|
||||
0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
|
||||
0xbc6a033489906e0b, 0x3fef9b66affed31b,
|
||||
0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
|
||||
0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc,
|
||||
0xbc91c923b9d5f416, 0x3fef829aaea92de0,
|
||||
0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51,
|
||||
0xbc801b15eaa59348, 0x3fef72b83c7d517b,
|
||||
0xbc8f1ff055de323d, 0x3fef6af9388c8dea,
|
||||
0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
|
||||
0xbc96d99c7611eb26, 0x3fef5be084045cd4,
|
||||
0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
|
||||
0xbc8fe782cb86389d, 0x3fef4d5022fcd91d,
|
||||
0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
|
||||
0x3c807a05b0e4047d, 0x3fef3f49917ddc96,
|
||||
0x3c968efde3a8a894, 0x3fef387a6e756238,
|
||||
0x3c875e18f274487d, 0x3fef31ce4fb2a63f,
|
||||
0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
|
||||
0xbc96b87b3f71085e, 0x3fef24dfe1f56381,
|
||||
0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
|
||||
0xbc3d219b1a6fbffa, 0x3fef187fd0dad990,
|
||||
0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
|
||||
0x3c6e149289cecb8f, 0x3fef0cafa93e2f56,
|
||||
0x3c834d754db0abb6, 0x3fef06fe0a31b715,
|
||||
0x3c864201e2ac744c, 0x3fef0170fc4cd831,
|
||||
0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
|
||||
0xbc86a3803b8e5b04, 0x3feef6c55f929ff1,
|
||||
0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
|
||||
0xbc9907f81b512d8e, 0x3feeecae6d05d866,
|
||||
0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
|
||||
0xbc991919b3ce1b15, 0x3feee32dc313a8e5,
|
||||
0x3c859f48a72a4c6d, 0x3feedea64c123422,
|
||||
0xbc9312607a28698a, 0x3feeda4504ac801c,
|
||||
0xbc58a78f4817895b, 0x3feed60a21f72e2a,
|
||||
0xbc7c2c9b67499a1b, 0x3feed1f5d950a897,
|
||||
0x3c4363ed60c2ac11, 0x3feece086061892d,
|
||||
0x3c9666093b0664ef, 0x3feeca41ed1d0057,
|
||||
0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
|
||||
0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de,
|
||||
0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
|
||||
0x3c931dbdeb54e077, 0x3feebcb299fddd0d,
|
||||
0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
|
||||
0xbc87deccdc93a349, 0x3feeb6daa2cf6642,
|
||||
0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
|
||||
0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f,
|
||||
0x3c93350518fdd78e, 0x3feeaf4736b527da,
|
||||
0x3c7b98b72f8a9b05, 0x3feead12d497c7fd,
|
||||
0x3c9063e1e21c5409, 0x3feeab07dd485429,
|
||||
0x3c34c7855019c6ea, 0x3feea9268a5946b7,
|
||||
0x3c9432e62b64c035, 0x3feea76f15ad2148,
|
||||
0xbc8ce44a6199769f, 0x3feea5e1b976dc09,
|
||||
0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
|
||||
0xbc845378892be9ae, 0x3feea34634ccc320,
|
||||
0xbc93cedd78565858, 0x3feea23882552225,
|
||||
0x3c5710aa807e1964, 0x3feea155d44ca973,
|
||||
0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
|
||||
0xbc6a12ad8734b982, 0x3feea012750bdabf,
|
||||
0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
|
||||
0xbc80dc3d54e08851, 0x3fee9f7df9519484,
|
||||
0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
|
||||
0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174,
|
||||
0xbc8619321e55e68a, 0x3fee9feb564267c9,
|
||||
0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f,
|
||||
0xbc7b32dcb94da51d, 0x3feea11473eb0187,
|
||||
0x3c94ecfd5467c06b, 0x3feea1ed0130c132,
|
||||
0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
|
||||
0xbc88a1c52fb3cf42, 0x3feea427543e1a12,
|
||||
0xbc9369b6f13b3734, 0x3feea589994cce13,
|
||||
0xbc805e843a19ff1e, 0x3feea71a4623c7ad,
|
||||
0xbc94d450d872576e, 0x3feea8d99b4492ed,
|
||||
0x3c90ad675b0e8a00, 0x3feeaac7d98a6699,
|
||||
0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
|
||||
0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c,
|
||||
0x3c7bf68359f35f44, 0x3feeb1ae99157736,
|
||||
0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6,
|
||||
0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
|
||||
0xbc6c23f97c90b959, 0x3feeba44cbc8520f,
|
||||
0xbc92434322f4f9aa, 0x3feebd829fde4e50,
|
||||
0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba,
|
||||
0x3c71affc2b91ce27, 0x3feec49182a3f090,
|
||||
0x3c6dd235e10a73bb, 0x3feec86319e32323,
|
||||
0xbc87c50422622263, 0x3feecc667b5de565,
|
||||
0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33,
|
||||
0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
|
||||
0x3c90cc319cee31d2, 0x3feed99e1330b358,
|
||||
0x3c8469846e735ab3, 0x3feede6b5579fdbf,
|
||||
0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a,
|
||||
0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
|
||||
0xbc907b8f4ad1d9fa, 0x3feeee07298db666,
|
||||
0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
|
||||
0xbc90a40e3da6f640, 0x3feef9728de5593a,
|
||||
0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
|
||||
0xbc91eee26b588a35, 0x3fef05b030a1064a,
|
||||
0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
|
||||
0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09,
|
||||
0x3c736eae30af0cb3, 0x3fef199bdd85529c,
|
||||
0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a,
|
||||
0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
|
||||
0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5,
|
||||
0x3c676b2c6c921968, 0x3fef3720dcef9069,
|
||||
0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa,
|
||||
0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
|
||||
0xbc900dae3875a949, 0x3fef4f87080d89f2,
|
||||
0x3c74a385a63d07a7, 0x3fef5818dcfba487,
|
||||
0xbc82919e2040220f, 0x3fef60e316c98398,
|
||||
0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
|
||||
0x3c843a59ac016b4b, 0x3fef7321f301b460,
|
||||
0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
|
||||
0xbc892ab93b470dc9, 0x3fef864614f5a129,
|
||||
0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
|
||||
0x3c83c5ec519d7271, 0x3fef9a51fbc74c83,
|
||||
0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
|
||||
0xbc8dae98e223747d, 0x3fefaf482d8e67f1,
|
||||
0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
|
||||
0x3c842b94c3a9eb32, 0x3fefc52b376bba97,
|
||||
0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
|
||||
0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14,
|
||||
0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
|
||||
0x3c5305c14160cc89, 0x3feff3c22b8f71f1,
|
||||
},
|
||||
};
|
|
@ -0,0 +1,80 @@
|
|||
/*
|
||||
* Single-precision e^x function.
|
||||
*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include "libm.h"
|
||||
#include "exp2f_data.h"
|
||||
|
||||
/*
|
||||
EXP2F_TABLE_BITS = 5
|
||||
EXP2F_POLY_ORDER = 3
|
||||
|
||||
ULP error: 0.502 (nearest rounding.)
|
||||
Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.)
|
||||
Wrong count: 170635 (all nearest rounding wrong results with fma.)
|
||||
Non-nearest ULP error: 1 (rounded ULP error)
|
||||
*/
|
||||
|
||||
#define N (1 << EXP2F_TABLE_BITS)
|
||||
#define InvLn2N __exp2f_data.invln2_scaled
|
||||
#define T __exp2f_data.tab
|
||||
#define C __exp2f_data.poly_scaled
|
||||
|
||||
static inline uint32_t top12(float x)
|
||||
{
|
||||
return asuint(x) >> 20;
|
||||
}
|
||||
|
||||
float expf(float x)
|
||||
{
|
||||
uint32_t abstop;
|
||||
uint64_t ki, t;
|
||||
double_t kd, xd, z, r, r2, y, s;
|
||||
|
||||
xd = (double_t)x;
|
||||
abstop = top12(x) & 0x7ff;
|
||||
if (predict_false(abstop >= top12(88.0f))) {
|
||||
/* |x| >= 88 or x is nan. */
|
||||
if (asuint(x) == asuint(-INFINITY))
|
||||
return 0.0f;
|
||||
if (abstop >= top12(INFINITY))
|
||||
return x + x;
|
||||
if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
|
||||
return __math_oflowf(0);
|
||||
if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
|
||||
return __math_uflowf(0);
|
||||
}
|
||||
|
||||
/* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k. */
|
||||
z = InvLn2N * xd;
|
||||
|
||||
/* Round and convert z to int, the result is in [-150*N, 128*N] and
|
||||
ideally ties-to-even rule is used, otherwise the magnitude of r
|
||||
can be bigger which gives larger approximation error. */
|
||||
#if TOINT_INTRINSICS
|
||||
kd = roundtoint(z);
|
||||
ki = converttoint(z);
|
||||
#else
|
||||
# define SHIFT __exp2f_data.shift
|
||||
kd = eval_as_double(z + SHIFT);
|
||||
ki = asuint64(kd);
|
||||
kd -= SHIFT;
|
||||
#endif
|
||||
r = z - kd;
|
||||
|
||||
/* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
|
||||
t = T[ki % N];
|
||||
t += ki << (52 - EXP2F_TABLE_BITS);
|
||||
s = asdouble(t);
|
||||
z = C[0] * r + C[1];
|
||||
r2 = r * r;
|
||||
y = C[2] * r + 1;
|
||||
y = z * r2 + y;
|
||||
y = y * s;
|
||||
return eval_as_float(y);
|
||||
}
|
Loading…
Reference in New Issue