mirror of https://github.com/flysand7/ciabatta.git
129 lines
2.8 KiB
C
129 lines
2.8 KiB
C
|
|
#include <math.h>
|
|
#include <stdint.h>
|
|
|
|
#if !defined(__FMA__)
|
|
#error "Get a better CPU (the kind that supports FMA) or enable -mfma"
|
|
#endif
|
|
|
|
#include <immintrin.h>
|
|
#include <xmmintrin.h>
|
|
|
|
double fabs(double x) {
|
|
union {double f; uint64_t i;} u = {x};
|
|
u.i &= -1ULL/2;
|
|
return u.f;
|
|
}
|
|
|
|
float fabsf(float x) {
|
|
union {float f; uint32_t i;} u = {x};
|
|
u.i &= 0x7fffffff;
|
|
return u.f;
|
|
}
|
|
|
|
long double fabsl(long double x) {
|
|
return fabs(x);
|
|
}
|
|
|
|
double fdim(double x, double y) {
|
|
if (isnan(x)) return x;
|
|
if (isnan(y)) return y;
|
|
return x > y ? x - y : 0;
|
|
}
|
|
|
|
float fdimf(float x, float y) {
|
|
if (isnan(x)) return x;
|
|
if (isnan(y)) return y;
|
|
return x > y ? x - y : 0;
|
|
}
|
|
|
|
long double fdiml(long double x, long double y) {
|
|
return fdim(x, y);
|
|
}
|
|
|
|
double fmax(double x, double y) {
|
|
if (isnan(x)) return y;
|
|
if (isnan(y)) return x;
|
|
/* handle signed zeros, see C99 Annex F.9.9.2 */
|
|
if (signbit(x) != signbit(y))
|
|
return signbit(x) ? y : x;
|
|
return x < y ? y : x;
|
|
}
|
|
|
|
float fmaxf(float x, float y) {
|
|
if (isnan(x)) return y;
|
|
if (isnan(y)) return x;
|
|
/* handle signed zeroes, see C99 Annex F.9.9.2 */
|
|
if (signbit(x) != signbit(y))
|
|
return signbit(x) ? y : x;
|
|
return x < y ? y : x;
|
|
}
|
|
|
|
long double fmaxl(long double x, long double y) {
|
|
return fmax(x, y);
|
|
}
|
|
|
|
double fmin(double x, double y) {
|
|
if (isnan(x)) return y;
|
|
if (isnan(y)) return x;
|
|
/* handle signed zeros, see C99 Annex F.9.9.2 */
|
|
if (signbit(x) != signbit(y))
|
|
return signbit(x) ? x : y;
|
|
return x < y ? x : y;
|
|
}
|
|
|
|
float fminf(float x, float y) {
|
|
if (isnan(x)) return y;
|
|
if (isnan(y)) return x;
|
|
/* handle signed zeros, see C99 Annex F.9.9.2 */
|
|
if (signbit(x) != signbit(y))
|
|
return signbit(x) ? x : y;
|
|
return x < y ? x : y;
|
|
}
|
|
|
|
long double fminl(long double x, long double y) {
|
|
return fmin(x, y);
|
|
}
|
|
|
|
double fma(double x, double y, double z) {
|
|
__m128d xd = _mm_set_sd(x);
|
|
__m128d yd = _mm_set_sd(y);
|
|
__m128d zd = _mm_set_sd(z);
|
|
__m128d rd = _mm_fmadd_sd(xd, yd, zd);
|
|
double res = _mm_cvtsd_f64(rd);
|
|
return res;
|
|
}
|
|
|
|
float fmaf(float x, float y, float z) {
|
|
__m128 xd = _mm_set_ss(x);
|
|
__m128 yd = _mm_set_ss(y);
|
|
__m128 zd = _mm_set_ss(z);
|
|
__m128 rd = _mm_fmadd_ss(xd, yd, zd);
|
|
float res = _mm_cvtss_f32(rd);
|
|
return res;
|
|
}
|
|
|
|
long double fmal(long double x, long double y, long double z) {
|
|
return fma(x, y, z);
|
|
}
|
|
|
|
double sqrt(double x) {
|
|
__m128d xd = _mm_set_sd(x);
|
|
__m128d yd = _mm_set_sd(0);
|
|
__m128d rd = _mm_sqrt_sd(xd, yd);
|
|
double res = _mm_cvtsd_f64(rd);
|
|
return res;
|
|
}
|
|
|
|
float sqrtf(float x) {
|
|
__m128 xd = _mm_set_ss(x);
|
|
__m128 rd = _mm_sqrt_ss(xd);
|
|
float res = _mm_cvtss_f32(rd);
|
|
return res;
|
|
}
|
|
|
|
long double sqrtl(long double x) {
|
|
return sqrt(x);
|
|
}
|
|
|