From 370482a3e02ecc003ac7623044ca0751045dbe84 Mon Sep 17 00:00:00 2001 From: Martin Fouilleul Date: Tue, 19 Sep 2023 09:15:36 +0200 Subject: [PATCH 1/5] remove wrong alignment attribute of IM3Operation wasm3 --- src/ext/wasm3/source/m3_config_platforms.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ext/wasm3/source/m3_config_platforms.h b/src/ext/wasm3/source/m3_config_platforms.h index 0cb475d..63676fd 100644 --- a/src/ext/wasm3/source/m3_config_platforms.h +++ b/src/ext/wasm3/source/m3_config_platforms.h @@ -121,7 +121,7 @@ typedef int8_t i8; # if defined (M3_COMPILER_MSVC) # define vectorcall // For MSVC, better not to specify any call convention # elif defined(__x86_64__) -# define vectorcall __attribute__((aligned(32))) +# define vectorcall //# elif defined(__riscv) && (__riscv_xlen == 64) //# define vectorcall __attribute__((aligned(16))) # elif defined(__MINGW32__) -- 2.25.1 From 715ab0f6e3faee421767627a8efbf6a93fe607cc Mon Sep 17 00:00:00 2001 From: Martin Fouilleul Date: Tue, 19 Sep 2023 09:24:23 +0200 Subject: [PATCH 2/5] put mtl renderer constants in an enum to silence -Wgnu-folding-constant warnings --- src/graphics/mtl_renderer.m | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/graphics/mtl_renderer.m b/src/graphics/mtl_renderer.m index fd75ba8..fce53c7 100644 --- a/src/graphics/mtl_renderer.m +++ b/src/graphics/mtl_renderer.m @@ -15,9 +15,12 @@ #include "mtl_renderer.h" -const int OC_MTL_INPUT_BUFFERS_COUNT = 3, - OC_MTL_TILE_SIZE = 16, - OC_MTL_MSAA_COUNT = 8; +enum +{ + OC_MTL_INPUT_BUFFERS_COUNT = 3, + OC_MTL_TILE_SIZE = 16, + OC_MTL_MSAA_COUNT = 8 +}; typedef struct oc_mtl_canvas_backend { -- 2.25.1 From ecf89fa6b2bae8f1fc071062aa094cfe693f9b3d Mon Sep 17 00:00:00 2001 From: Martin Fouilleul Date: Tue, 19 Sep 2023 09:56:46 +0200 Subject: [PATCH 3/5] [macos build] - Remove the need for -maes, silence warning about ignored option on ARM macs - Set -mmacos-version-min when building wasm3 to match that of the platform layer and runtime --- scripts/dev.py | 7 +-- src/util/hash.c | 158 ++++++++++++++++++++++++------------------------ src/util/hash.h | 12 +--- 3 files changed, 86 insertions(+), 91 deletions(-) diff --git a/scripts/dev.py b/scripts/dev.py index c9d3292..1cb743b 100644 --- a/scripts/dev.py +++ b/scripts/dev.py @@ -170,7 +170,7 @@ def build_platform_layer_lib_win(release): def build_platform_layer_lib_mac(release): sdk_dir = "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk" - flags = ["-mmacos-version-min=10.15.4", "-maes"] + flags = ["-mmacos-version-min=10.15.4"] cflags = ["-std=c11"] debug_flags = ["-O3"] if release else ["-g", "-DOC_DEBUG", "-DOC_LOG_COMPILE_DEBUG"] ldflags = [f"-L{sdk_dir}/usr/lib", f"-F{sdk_dir}/System/Library/Frameworks/"] @@ -280,6 +280,7 @@ def build_wasm3_lib_mac(release): "-foptimize-sibling-calls", "-Wno-extern-initializer", "-Dd_m3VerboseErrorMessages", + "-mmacos-version-min=10.15.4" ] for f in glob.iglob("src/ext/wasm3/source/*.c"): @@ -348,9 +349,7 @@ def build_orca_mac(release): debug_flags = ["-O2"] if release else ["-g", "-DOC_DEBUG -DOC_LOG_COMPILE_DEBUG"] flags = [ *debug_flags, - "-mmacos-version-min=10.15.4", - "-maes", - ] + "-mmacos-version-min=10.15.4"] gen_all_bindings() diff --git a/src/util/hash.c b/src/util/hash.c index 2385064..af6f579 100644 --- a/src/util/hash.c +++ b/src/util/hash.c @@ -8,8 +8,84 @@ #include "hash.h" #include "platform/platform.h" -#if OC_ARCH_X64 - #include +//xxhash64, copy-pasted from https://github.com/demetri/scribbles/blob/master/hashing/hash_functions.c +// Thanks to Demetri Spanos + +uint64_t xxh_64(const void* key, int len, uint64_t h) +{ + // primes used in mul-rot updates + uint64_t p1 = 0x9e3779b185ebca87, p2 = 0xc2b2ae3d27d4eb4f, + p3 = 0x165667b19e3779f9, p4 = 0x85ebca77c2b2ae63, p5 = 0x27d4eb2f165667c5; + + // inital 32-byte (4x8) wide hash state + uint64_t s[4] = { h + p1 + p2, h + p2, h, h - p1 }; + + // bulk work: process all 32 byte blocks + uint64_t* k32 = (uint64_t*)key; + for(int i = 0; i < (len / 32); i += 4) + { + uint64_t b[4] = { k32[i + 0], k32[i + 1], k32[i + 2], k32[i + 3] }; + for(int j = 0; j < 4; j++) + b[j] = b[j] * p2 + s[j]; + for(int j = 0; j < 4; j++) + s[j] = ((b[j] << 31) | (b[j] >> 33)) * p1; + } + + // mix 32-byte state down to 8-byte state, initalize to value for short keys + uint64_t s64 = (s[2] + p5); + if(len > 32) + { + s64 = ((s[0] << 1) | (s[0] >> 63)) + ((s[1] << 7) | (s[1] >> 57)) + ((s[2] << 12) | (s[2] >> 52)) + ((s[3] << 18) | (s[3] >> 46)); + for(int i = 0; i < 4; i++) + { + uint64_t ps = (((s[i] * p2) << 31) | ((s[i] * p2) >> 33)) * p1; + s64 = (s64 ^ ps) * p1 + p4; + } + } + s64 += len; + + // up to 31 bytes remain, process 0-3 8 byte blocks + uint8_t* tail = (uint8_t*)(((char*)key) + (len / 32) * 32); + for(int i = 0; i < (len & 31) / 8; i++, tail += 8) + { + uint64_t b = (*((uint64_t*)tail)) * p2; + b = (((b << 31) | (b >> 33)) * p1) ^ s64; + s64 = ((b << 27) | (b >> 37)) * p1 + p4; + } + + // up to 7 bytes remain, process 0-1 4 byte block + for(int i = 0; i < (len & 7) / 4; i++, tail += 4) + { + uint64_t b = s64 ^ (*(uint32_t*)tail) * p1; + s64 = ((b << 23) | (b >> 41)) * p2 + p3; + } + + // up to 3 bytes remain, process 0-3 1 byte blocks + for(int i = 0; i < (len & 3); i++, tail++) + { + uint64_t b = s64 ^ (*tail) * p5; + s64 = ((b << 11) | (b >> 53)) * p1; + } + + // finalization mix + s64 = (s64 ^ (s64 >> 33)) * p2; + s64 = (s64 ^ (s64 >> 29)) * p3; + return (s64 ^ (s64 >> 32)); +} + +u64 oc_hash_xx64_string_seed(oc_str8 string, u64 seed) +{ + return (xxh_64(string.ptr, string.len, seed)); +} + +u64 oc_hash_xx64_string(oc_str8 string) +{ + return (xxh_64(string.ptr, string.len, 0)); +} + +#if 0 //NOTE(martin): keep that here cause we could want to use them when aes is available, but we don't for now + #if OC_ARCH_X64 + #include u64 oc_hash_aes_u64(u64 x) { @@ -127,79 +203,5 @@ u64 oc_hash_aes_string_seed(oc_str8 string, u64 seed) u64 result = _mm_extract_epi64(hash, 0); return (result); } -#endif // OC_ARCH_X64 - -//xxhash64, copy-pasted from https://github.com/demetri/scribbles/blob/master/hashing/hash_functions.c -// Thanks to Demetri Spanos - -uint64_t xxh_64(const void* key, int len, uint64_t h) -{ - // primes used in mul-rot updates - uint64_t p1 = 0x9e3779b185ebca87, p2 = 0xc2b2ae3d27d4eb4f, - p3 = 0x165667b19e3779f9, p4 = 0x85ebca77c2b2ae63, p5 = 0x27d4eb2f165667c5; - - // inital 32-byte (4x8) wide hash state - uint64_t s[4] = { h + p1 + p2, h + p2, h, h - p1 }; - - // bulk work: process all 32 byte blocks - uint64_t* k32 = (uint64_t*)key; - for(int i = 0; i < (len / 32); i += 4) - { - uint64_t b[4] = { k32[i + 0], k32[i + 1], k32[i + 2], k32[i + 3] }; - for(int j = 0; j < 4; j++) - b[j] = b[j] * p2 + s[j]; - for(int j = 0; j < 4; j++) - s[j] = ((b[j] << 31) | (b[j] >> 33)) * p1; - } - - // mix 32-byte state down to 8-byte state, initalize to value for short keys - uint64_t s64 = (s[2] + p5); - if(len > 32) - { - s64 = ((s[0] << 1) | (s[0] >> 63)) + ((s[1] << 7) | (s[1] >> 57)) + ((s[2] << 12) | (s[2] >> 52)) + ((s[3] << 18) | (s[3] >> 46)); - for(int i = 0; i < 4; i++) - { - uint64_t ps = (((s[i] * p2) << 31) | ((s[i] * p2) >> 33)) * p1; - s64 = (s64 ^ ps) * p1 + p4; - } - } - s64 += len; - - // up to 31 bytes remain, process 0-3 8 byte blocks - uint8_t* tail = (uint8_t*)(((char*)key) + (len / 32) * 32); - for(int i = 0; i < (len & 31) / 8; i++, tail += 8) - { - uint64_t b = (*((uint64_t*)tail)) * p2; - b = (((b << 31) | (b >> 33)) * p1) ^ s64; - s64 = ((b << 27) | (b >> 37)) * p1 + p4; - } - - // up to 7 bytes remain, process 0-1 4 byte block - for(int i = 0; i < (len & 7) / 4; i++, tail += 4) - { - uint64_t b = s64 ^ (*(uint32_t*)tail) * p1; - s64 = ((b << 23) | (b >> 41)) * p2 + p3; - } - - // up to 3 bytes remain, process 0-3 1 byte blocks - for(int i = 0; i < (len & 3); i++, tail++) - { - uint64_t b = s64 ^ (*tail) * p5; - s64 = ((b << 11) | (b >> 53)) * p1; - } - - // finalization mix - s64 = (s64 ^ (s64 >> 33)) * p2; - s64 = (s64 ^ (s64 >> 29)) * p3; - return (s64 ^ (s64 >> 32)); -} - -u64 oc_hash_xx64_string_seed(oc_str8 string, u64 seed) -{ - return (xxh_64(string.ptr, string.len, seed)); -} - -u64 oc_hash_xx64_string(oc_str8 string) -{ - return (xxh_64(string.ptr, string.len, 0)); -} + #endif // OC_ARCH_X64 +#endif // 0 diff --git a/src/util/hash.h b/src/util/hash.h index 30e19ca..2cc37e0 100644 --- a/src/util/hash.h +++ b/src/util/hash.h @@ -12,17 +12,11 @@ #include "typedefs.h" #ifdef __cplusplus -extern "C" -{ +extern "C" { #endif - ORCA_API u64 oc_hash_aes_u64(u64 x); - ORCA_API u64 oc_hash_aes_u64_x2(u64 x, u64 y); - ORCA_API u64 oc_hash_aes_string(oc_str8 string); - ORCA_API u64 oc_hash_aes_string_seed(oc_str8 string, u64 seed); - - ORCA_API u64 oc_hash_xx64_string_seed(oc_str8 string, u64 seed); - ORCA_API u64 oc_hash_xx64_string(oc_str8 string); +ORCA_API u64 oc_hash_xx64_string_seed(oc_str8 string, u64 seed); +ORCA_API u64 oc_hash_xx64_string(oc_str8 string); #ifdef __cplusplus } // extern "C" -- 2.25.1 From 647565e285a28254a67b13c96b9d28cca72b5af5 Mon Sep 17 00:00:00 2001 From: Martin Fouilleul Date: Tue, 19 Sep 2023 11:47:27 +0200 Subject: [PATCH 4/5] Put offset curve sample check count in an enum to silence -Wgnu-folding-constant --- src/graphics/mtl_renderer.m | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/graphics/mtl_renderer.m b/src/graphics/mtl_renderer.m index fce53c7..2ce50e8 100644 --- a/src/graphics/mtl_renderer.m +++ b/src/graphics/mtl_renderer.m @@ -19,7 +19,8 @@ enum { OC_MTL_INPUT_BUFFERS_COUNT = 3, OC_MTL_TILE_SIZE = 16, - OC_MTL_MSAA_COUNT = 8 + OC_MTL_MSAA_COUNT = 8, + OC_MTL_OFFSET_CURVE_SAMPLE_COUNT = 5, }; typedef struct oc_mtl_canvas_backend @@ -514,8 +515,7 @@ void oc_mtl_render_stroke_quadratic(oc_mtl_canvas_backend* backend, oc_vec2* p) } else { - const int CHECK_SAMPLE_COUNT = 5; - f32 checkSamples[CHECK_SAMPLE_COUNT] = { 1. / 6, 2. / 6, 3. / 6, 4. / 6, 5. / 6 }; + f32 checkSamples[OC_MTL_OFFSET_CURVE_SAMPLE_COUNT] = { 1. / 6, 2. / 6, 3. / 6, 4. / 6, 5. / 6 }; f32 d2LowBound = oc_square(0.5 * width - tolerance); f32 d2HighBound = oc_square(0.5 * width + tolerance); @@ -523,7 +523,7 @@ void oc_mtl_render_stroke_quadratic(oc_mtl_canvas_backend* backend, oc_vec2* p) f32 maxOvershoot = 0; f32 maxOvershootParameter = 0; - for(int i = 0; i < CHECK_SAMPLE_COUNT; i++) + for(int i = 0; i < OC_MTL_OFFSET_CURVE_SAMPLE_COUNT; i++) { f32 t = checkSamples[i]; @@ -616,8 +616,7 @@ void oc_mtl_render_stroke_cubic(oc_mtl_canvas_backend* backend, oc_vec2* p) } else { - const int CHECK_SAMPLE_COUNT = 5; - f32 checkSamples[CHECK_SAMPLE_COUNT] = { 1. / 6, 2. / 6, 3. / 6, 4. / 6, 5. / 6 }; + f32 checkSamples[OC_MTL_OFFSET_CURVE_SAMPLE_COUNT] = { 1. / 6, 2. / 6, 3. / 6, 4. / 6, 5. / 6 }; f32 d2LowBound = oc_square(0.5 * width - tolerance); f32 d2HighBound = oc_square(0.5 * width + tolerance); @@ -625,7 +624,7 @@ void oc_mtl_render_stroke_cubic(oc_mtl_canvas_backend* backend, oc_vec2* p) f32 maxOvershoot = 0; f32 maxOvershootParameter = 0; - for(int i = 0; i < CHECK_SAMPLE_COUNT; i++) + for(int i = 0; i < OC_MTL_OFFSET_CURVE_SAMPLE_COUNT; i++) { f32 t = checkSamples[i]; -- 2.25.1 From 94ce88e272c9150e87e0d2a355e074052f836549 Mon Sep 17 00:00:00 2001 From: Martin Fouilleul Date: Tue, 19 Sep 2023 11:48:42 +0200 Subject: [PATCH 5/5] [macos] use libtool instead of ar when building wasm3 lib, and pass -no_warning_for_no_symbols because translation units in wasm3 don't have symbols --- scripts/dev.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/dev.py b/scripts/dev.py index 1cb743b..c8531c2 100644 --- a/scripts/dev.py +++ b/scripts/dev.py @@ -290,7 +290,7 @@ def build_wasm3_lib_mac(release): "-o", f"build/obj/{name}", f, ], check=True) - subprocess.run(["ar", "-rcs", "build/lib/libwasm3.a", *glob.glob("build/obj/*.o")], check=True) + subprocess.run(["libtool", "-static", "-o", "build/lib/libwasm3.a", "-no_warning_for_no_symbols", *glob.glob("build/obj/*.o")], check=True) subprocess.run(["rm", "-rf", "build/obj"], check=True) -- 2.25.1