Fix wasm3 alignment errors and silence some warnings #135
|
@ -170,7 +170,7 @@ def build_platform_layer_lib_win(release):
|
|||
def build_platform_layer_lib_mac(release):
|
||||
sdk_dir = "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk"
|
||||
|
||||
flags = ["-mmacos-version-min=10.15.4", "-maes"]
|
||||
flags = ["-mmacos-version-min=10.15.4"]
|
||||
cflags = ["-std=c11"]
|
||||
debug_flags = ["-O3"] if release else ["-g", "-DOC_DEBUG", "-DOC_LOG_COMPILE_DEBUG"]
|
||||
ldflags = [f"-L{sdk_dir}/usr/lib", f"-F{sdk_dir}/System/Library/Frameworks/"]
|
||||
|
@ -280,6 +280,7 @@ def build_wasm3_lib_mac(release):
|
|||
"-foptimize-sibling-calls",
|
||||
"-Wno-extern-initializer",
|
||||
"-Dd_m3VerboseErrorMessages",
|
||||
"-mmacos-version-min=10.15.4"
|
||||
]
|
||||
|
||||
for f in glob.iglob("src/ext/wasm3/source/*.c"):
|
||||
|
@ -289,7 +290,7 @@ def build_wasm3_lib_mac(release):
|
|||
"-o", f"build/obj/{name}",
|
||||
f,
|
||||
], check=True)
|
||||
subprocess.run(["ar", "-rcs", "build/lib/libwasm3.a", *glob.glob("build/obj/*.o")], check=True)
|
||||
subprocess.run(["libtool", "-static", "-o", "build/lib/libwasm3.a", "-no_warning_for_no_symbols", *glob.glob("build/obj/*.o")], check=True)
|
||||
subprocess.run(["rm", "-rf", "build/obj"], check=True)
|
||||
|
||||
|
||||
|
@ -348,9 +349,7 @@ def build_orca_mac(release):
|
|||
debug_flags = ["-O2"] if release else ["-g", "-DOC_DEBUG -DOC_LOG_COMPILE_DEBUG"]
|
||||
flags = [
|
||||
*debug_flags,
|
||||
"-mmacos-version-min=10.15.4",
|
||||
"-maes",
|
||||
]
|
||||
"-mmacos-version-min=10.15.4"]
|
||||
|
||||
gen_all_bindings()
|
||||
|
||||
|
|
|
@ -121,7 +121,7 @@ typedef int8_t i8;
|
|||
# if defined (M3_COMPILER_MSVC)
|
||||
# define vectorcall // For MSVC, better not to specify any call convention
|
||||
# elif defined(__x86_64__)
|
||||
# define vectorcall __attribute__((aligned(32)))
|
||||
# define vectorcall
|
||||
//# elif defined(__riscv) && (__riscv_xlen == 64)
|
||||
//# define vectorcall __attribute__((aligned(16)))
|
||||
# elif defined(__MINGW32__)
|
||||
|
|
|
@ -15,9 +15,13 @@
|
|||
|
||||
#include "mtl_renderer.h"
|
||||
|
||||
const int OC_MTL_INPUT_BUFFERS_COUNT = 3,
|
||||
OC_MTL_TILE_SIZE = 16,
|
||||
OC_MTL_MSAA_COUNT = 8;
|
||||
enum
|
||||
{
|
||||
OC_MTL_INPUT_BUFFERS_COUNT = 3,
|
||||
OC_MTL_TILE_SIZE = 16,
|
||||
OC_MTL_MSAA_COUNT = 8,
|
||||
OC_MTL_OFFSET_CURVE_SAMPLE_COUNT = 5,
|
||||
};
|
||||
|
||||
typedef struct oc_mtl_canvas_backend
|
||||
{
|
||||
|
@ -511,8 +515,7 @@ void oc_mtl_render_stroke_quadratic(oc_mtl_canvas_backend* backend, oc_vec2* p)
|
|||
}
|
||||
else
|
||||
{
|
||||
const int CHECK_SAMPLE_COUNT = 5;
|
||||
f32 checkSamples[CHECK_SAMPLE_COUNT] = { 1. / 6, 2. / 6, 3. / 6, 4. / 6, 5. / 6 };
|
||||
f32 checkSamples[OC_MTL_OFFSET_CURVE_SAMPLE_COUNT] = { 1. / 6, 2. / 6, 3. / 6, 4. / 6, 5. / 6 };
|
||||
|
||||
f32 d2LowBound = oc_square(0.5 * width - tolerance);
|
||||
f32 d2HighBound = oc_square(0.5 * width + tolerance);
|
||||
|
@ -520,7 +523,7 @@ void oc_mtl_render_stroke_quadratic(oc_mtl_canvas_backend* backend, oc_vec2* p)
|
|||
f32 maxOvershoot = 0;
|
||||
f32 maxOvershootParameter = 0;
|
||||
|
||||
for(int i = 0; i < CHECK_SAMPLE_COUNT; i++)
|
||||
for(int i = 0; i < OC_MTL_OFFSET_CURVE_SAMPLE_COUNT; i++)
|
||||
{
|
||||
f32 t = checkSamples[i];
|
||||
|
||||
|
@ -613,8 +616,7 @@ void oc_mtl_render_stroke_cubic(oc_mtl_canvas_backend* backend, oc_vec2* p)
|
|||
}
|
||||
else
|
||||
{
|
||||
const int CHECK_SAMPLE_COUNT = 5;
|
||||
f32 checkSamples[CHECK_SAMPLE_COUNT] = { 1. / 6, 2. / 6, 3. / 6, 4. / 6, 5. / 6 };
|
||||
f32 checkSamples[OC_MTL_OFFSET_CURVE_SAMPLE_COUNT] = { 1. / 6, 2. / 6, 3. / 6, 4. / 6, 5. / 6 };
|
||||
|
||||
f32 d2LowBound = oc_square(0.5 * width - tolerance);
|
||||
f32 d2HighBound = oc_square(0.5 * width + tolerance);
|
||||
|
@ -622,7 +624,7 @@ void oc_mtl_render_stroke_cubic(oc_mtl_canvas_backend* backend, oc_vec2* p)
|
|||
f32 maxOvershoot = 0;
|
||||
f32 maxOvershootParameter = 0;
|
||||
|
||||
for(int i = 0; i < CHECK_SAMPLE_COUNT; i++)
|
||||
for(int i = 0; i < OC_MTL_OFFSET_CURVE_SAMPLE_COUNT; i++)
|
||||
{
|
||||
f32 t = checkSamples[i];
|
||||
|
||||
|
|
158
src/util/hash.c
158
src/util/hash.c
|
@ -8,8 +8,84 @@
|
|||
#include "hash.h"
|
||||
#include "platform/platform.h"
|
||||
|
||||
#if OC_ARCH_X64
|
||||
#include <immintrin.h>
|
||||
//xxhash64, copy-pasted from https://github.com/demetri/scribbles/blob/master/hashing/hash_functions.c
|
||||
// Thanks to Demetri Spanos
|
||||
|
||||
uint64_t xxh_64(const void* key, int len, uint64_t h)
|
||||
{
|
||||
// primes used in mul-rot updates
|
||||
uint64_t p1 = 0x9e3779b185ebca87, p2 = 0xc2b2ae3d27d4eb4f,
|
||||
p3 = 0x165667b19e3779f9, p4 = 0x85ebca77c2b2ae63, p5 = 0x27d4eb2f165667c5;
|
||||
|
||||
// inital 32-byte (4x8) wide hash state
|
||||
uint64_t s[4] = { h + p1 + p2, h + p2, h, h - p1 };
|
||||
|
||||
// bulk work: process all 32 byte blocks
|
||||
uint64_t* k32 = (uint64_t*)key;
|
||||
for(int i = 0; i < (len / 32); i += 4)
|
||||
{
|
||||
uint64_t b[4] = { k32[i + 0], k32[i + 1], k32[i + 2], k32[i + 3] };
|
||||
for(int j = 0; j < 4; j++)
|
||||
b[j] = b[j] * p2 + s[j];
|
||||
for(int j = 0; j < 4; j++)
|
||||
s[j] = ((b[j] << 31) | (b[j] >> 33)) * p1;
|
||||
}
|
||||
|
||||
// mix 32-byte state down to 8-byte state, initalize to value for short keys
|
||||
uint64_t s64 = (s[2] + p5);
|
||||
if(len > 32)
|
||||
{
|
||||
s64 = ((s[0] << 1) | (s[0] >> 63)) + ((s[1] << 7) | (s[1] >> 57)) + ((s[2] << 12) | (s[2] >> 52)) + ((s[3] << 18) | (s[3] >> 46));
|
||||
for(int i = 0; i < 4; i++)
|
||||
{
|
||||
uint64_t ps = (((s[i] * p2) << 31) | ((s[i] * p2) >> 33)) * p1;
|
||||
s64 = (s64 ^ ps) * p1 + p4;
|
||||
}
|
||||
}
|
||||
s64 += len;
|
||||
|
||||
// up to 31 bytes remain, process 0-3 8 byte blocks
|
||||
uint8_t* tail = (uint8_t*)(((char*)key) + (len / 32) * 32);
|
||||
for(int i = 0; i < (len & 31) / 8; i++, tail += 8)
|
||||
{
|
||||
uint64_t b = (*((uint64_t*)tail)) * p2;
|
||||
b = (((b << 31) | (b >> 33)) * p1) ^ s64;
|
||||
s64 = ((b << 27) | (b >> 37)) * p1 + p4;
|
||||
}
|
||||
|
||||
// up to 7 bytes remain, process 0-1 4 byte block
|
||||
for(int i = 0; i < (len & 7) / 4; i++, tail += 4)
|
||||
{
|
||||
uint64_t b = s64 ^ (*(uint32_t*)tail) * p1;
|
||||
s64 = ((b << 23) | (b >> 41)) * p2 + p3;
|
||||
}
|
||||
|
||||
// up to 3 bytes remain, process 0-3 1 byte blocks
|
||||
for(int i = 0; i < (len & 3); i++, tail++)
|
||||
{
|
||||
uint64_t b = s64 ^ (*tail) * p5;
|
||||
s64 = ((b << 11) | (b >> 53)) * p1;
|
||||
}
|
||||
|
||||
// finalization mix
|
||||
s64 = (s64 ^ (s64 >> 33)) * p2;
|
||||
s64 = (s64 ^ (s64 >> 29)) * p3;
|
||||
return (s64 ^ (s64 >> 32));
|
||||
}
|
||||
|
||||
u64 oc_hash_xx64_string_seed(oc_str8 string, u64 seed)
|
||||
{
|
||||
return (xxh_64(string.ptr, string.len, seed));
|
||||
}
|
||||
|
||||
u64 oc_hash_xx64_string(oc_str8 string)
|
||||
{
|
||||
return (xxh_64(string.ptr, string.len, 0));
|
||||
}
|
||||
|
||||
#if 0 //NOTE(martin): keep that here cause we could want to use them when aes is available, but we don't for now
|
||||
#if OC_ARCH_X64
|
||||
#include <immintrin.h>
|
||||
|
||||
u64 oc_hash_aes_u64(u64 x)
|
||||
{
|
||||
|
@ -127,79 +203,5 @@ u64 oc_hash_aes_string_seed(oc_str8 string, u64 seed)
|
|||
u64 result = _mm_extract_epi64(hash, 0);
|
||||
return (result);
|
||||
}
|
||||
#endif // OC_ARCH_X64
|
||||
|
||||
//xxhash64, copy-pasted from https://github.com/demetri/scribbles/blob/master/hashing/hash_functions.c
|
||||
// Thanks to Demetri Spanos
|
||||
|
||||
uint64_t xxh_64(const void* key, int len, uint64_t h)
|
||||
{
|
||||
// primes used in mul-rot updates
|
||||
uint64_t p1 = 0x9e3779b185ebca87, p2 = 0xc2b2ae3d27d4eb4f,
|
||||
p3 = 0x165667b19e3779f9, p4 = 0x85ebca77c2b2ae63, p5 = 0x27d4eb2f165667c5;
|
||||
|
||||
// inital 32-byte (4x8) wide hash state
|
||||
uint64_t s[4] = { h + p1 + p2, h + p2, h, h - p1 };
|
||||
|
||||
// bulk work: process all 32 byte blocks
|
||||
uint64_t* k32 = (uint64_t*)key;
|
||||
for(int i = 0; i < (len / 32); i += 4)
|
||||
{
|
||||
uint64_t b[4] = { k32[i + 0], k32[i + 1], k32[i + 2], k32[i + 3] };
|
||||
for(int j = 0; j < 4; j++)
|
||||
b[j] = b[j] * p2 + s[j];
|
||||
for(int j = 0; j < 4; j++)
|
||||
s[j] = ((b[j] << 31) | (b[j] >> 33)) * p1;
|
||||
}
|
||||
|
||||
// mix 32-byte state down to 8-byte state, initalize to value for short keys
|
||||
uint64_t s64 = (s[2] + p5);
|
||||
if(len > 32)
|
||||
{
|
||||
s64 = ((s[0] << 1) | (s[0] >> 63)) + ((s[1] << 7) | (s[1] >> 57)) + ((s[2] << 12) | (s[2] >> 52)) + ((s[3] << 18) | (s[3] >> 46));
|
||||
for(int i = 0; i < 4; i++)
|
||||
{
|
||||
uint64_t ps = (((s[i] * p2) << 31) | ((s[i] * p2) >> 33)) * p1;
|
||||
s64 = (s64 ^ ps) * p1 + p4;
|
||||
}
|
||||
}
|
||||
s64 += len;
|
||||
|
||||
// up to 31 bytes remain, process 0-3 8 byte blocks
|
||||
uint8_t* tail = (uint8_t*)(((char*)key) + (len / 32) * 32);
|
||||
for(int i = 0; i < (len & 31) / 8; i++, tail += 8)
|
||||
{
|
||||
uint64_t b = (*((uint64_t*)tail)) * p2;
|
||||
b = (((b << 31) | (b >> 33)) * p1) ^ s64;
|
||||
s64 = ((b << 27) | (b >> 37)) * p1 + p4;
|
||||
}
|
||||
|
||||
// up to 7 bytes remain, process 0-1 4 byte block
|
||||
for(int i = 0; i < (len & 7) / 4; i++, tail += 4)
|
||||
{
|
||||
uint64_t b = s64 ^ (*(uint32_t*)tail) * p1;
|
||||
s64 = ((b << 23) | (b >> 41)) * p2 + p3;
|
||||
}
|
||||
|
||||
// up to 3 bytes remain, process 0-3 1 byte blocks
|
||||
for(int i = 0; i < (len & 3); i++, tail++)
|
||||
{
|
||||
uint64_t b = s64 ^ (*tail) * p5;
|
||||
s64 = ((b << 11) | (b >> 53)) * p1;
|
||||
}
|
||||
|
||||
// finalization mix
|
||||
s64 = (s64 ^ (s64 >> 33)) * p2;
|
||||
s64 = (s64 ^ (s64 >> 29)) * p3;
|
||||
return (s64 ^ (s64 >> 32));
|
||||
}
|
||||
|
||||
u64 oc_hash_xx64_string_seed(oc_str8 string, u64 seed)
|
||||
{
|
||||
return (xxh_64(string.ptr, string.len, seed));
|
||||
}
|
||||
|
||||
u64 oc_hash_xx64_string(oc_str8 string)
|
||||
{
|
||||
return (xxh_64(string.ptr, string.len, 0));
|
||||
}
|
||||
#endif // OC_ARCH_X64
|
||||
#endif // 0
|
||||
|
|
|
@ -12,17 +12,11 @@
|
|||
#include "typedefs.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
ORCA_API u64 oc_hash_aes_u64(u64 x);
|
||||
ORCA_API u64 oc_hash_aes_u64_x2(u64 x, u64 y);
|
||||
ORCA_API u64 oc_hash_aes_string(oc_str8 string);
|
||||
ORCA_API u64 oc_hash_aes_string_seed(oc_str8 string, u64 seed);
|
||||
|
||||
ORCA_API u64 oc_hash_xx64_string_seed(oc_str8 string, u64 seed);
|
||||
ORCA_API u64 oc_hash_xx64_string(oc_str8 string);
|
||||
ORCA_API u64 oc_hash_xx64_string_seed(oc_str8 string, u64 seed);
|
||||
ORCA_API u64 oc_hash_xx64_string(oc_str8 string);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
|
Loading…
Reference in New Issue