diff --git a/src/mtl_shader.metal b/src/mtl_shader.metal index a2006af..5212cf2 100644 --- a/src/mtl_shader.metal +++ b/src/mtl_shader.metal @@ -31,6 +31,27 @@ bool is_top_left(float2 a, float2 b) return( (a.y == b.y && b.x < a.x) ||(b.y < a.y)); } +/* +bool is_top_left(int2 a, int2 b) +{ + return( (a.y == b.y && b.x < a.x) + ||(b.y < a.y)); +} +*/ + +////////////////////////////////////////////////////////////////////////////// +//TODO: we should do these computations on 64bits, because otherwise +// we might overflow for values > 2048. +// Unfortunately this is costly. +// Another way is to precompute triangle edges (b - a) in full precision +// once to avoid doing it all the time... +////////////////////////////////////////////////////////////////////////////// + +int orient2d(int2 a, int2 b, int2 c) +{ + return((b.x-a.x)*(c.y-a.y) - (b.y-a.y)*(c.x-a.x)); +} + kernel void TileKernel(constant mg_vertex* vertexBuffer [[buffer(0)]], constant uint* indexBuffer [[buffer(1)]], @@ -67,13 +88,6 @@ kernel void TileKernel(constant mg_vertex* vertexBuffer [[buffer(0)]], boxMin = max(boxMin, clip.xy); boxMax = min(boxMax, clip.zw); - //NOTE(martin): compute triangle orientation and bias for each edge - int cw = ((p1 - p0).x*(p2 - p0).y - (p1 - p0).y*(p2 - p0).x) > 0 ? 1 : -1; - - int bias0 = is_top_left(p1, p2) ? 0 : -1; - int bias1 = is_top_left(p2, p0) ? 0 : -1; - int bias2 = is_top_left(p0, p1) ? 0 : -1; - //NOTE(martin): fill triangle data const float subPixelFactor = 16; @@ -93,20 +107,26 @@ kernel void TileKernel(constant mg_vertex* vertexBuffer [[buffer(0)]], triangleArray[gid].cubic1 = vertexBuffer[i1].cubic; triangleArray[gid].cubic2 = vertexBuffer[i2].cubic; - triangleArray[gid].p0 = int2(p0 * subPixelFactor); - triangleArray[gid].p1 = int2(p1 * subPixelFactor); - triangleArray[gid].p2 = int2(p2 * subPixelFactor); + int2 ip0 = int2(p0 * subPixelFactor); + int2 ip1 = int2(p1 * subPixelFactor); + int2 ip2 = int2(p2 * subPixelFactor); - triangleArray[gid].bias0 = bias0; - triangleArray[gid].bias1 = bias1; - triangleArray[gid].bias2 = bias2; + triangleArray[gid].p0 = ip0; + triangleArray[gid].p1 = ip1; + triangleArray[gid].p2 = ip2; + + //NOTE(martin): compute triangle orientation and bias for each edge + int cw = orient2d(ip0, ip1, ip2) > 0 ? 1 : -1; triangleArray[gid].cw = cw; - - int4 box = int4(floor(fbox))/RENDERER_TILE_SIZE; + triangleArray[gid].bias0 = is_top_left(p1, p2) ? -(1-cw)/2 : -(1+cw)/2; + triangleArray[gid].bias1 = is_top_left(p2, p0) ? -(1-cw)/2 : -(1+cw)/2; + triangleArray[gid].bias2 = is_top_left(p0, p1) ? -(1-cw)/2 : -(1+cw)/2; //NOTE(martin): it's important to do the computation with signed int, so that we can have negative xMax/yMax // otherwise all triangles on the left or below the x/y axis are attributed to tiles on row/column 0. + int4 box = int4(floor(fbox))/RENDERER_TILE_SIZE; + int xMin = max(0, box.x); int yMin = max(0, box.y); int xMax = min(box.z, nTilesX-1); @@ -153,33 +173,6 @@ kernel void SortKernel(constant mg_triangle_data* triangleArray [[buffer(0)]], } } - -bool is_top_left(int2 a, int2 b) -{ - return( (a.y == b.y && b.x < a.x) - ||(b.y < a.y)); -} - -////////////////////////////////////////////////////////////////////////////// -//TODO: we should do these computations on 64bits, because otherwise -// we might overflow for values > 2048. -// Unfortunately this is costly. -// Another way is to precompute triangle edges (b - a) in full precision -// once to avoid doing it all the time... -////////////////////////////////////////////////////////////////////////////// - -//TODO: coalesce -int orient2d(int2 a, int2 b, int2 c) -{ - return((b.x-a.x)*(c.y-a.y) - (b.y-a.y)*(c.x-a.x)); -} - -int is_clockwise(int2 p0, int2 p1, int2 p2) -{ - return((p1 - p0).x*(p2 - p0).y - (p1 - p0).y*(p2 - p0).x); -} - - kernel void RenderKernel(const device uint* tileCounters [[buffer(0)]], const device uint* tileArrayBuffer [[buffer(1)]], const device mg_triangle_data* triangleArray [[buffer(2)]],