[mtl canvas] detecting full covered/uncovered tiles

This commit is contained in:
Martin Fouilleul 2023-03-22 09:23:22 +01:00
parent 0d8bc824a2
commit 90b00979ed
2 changed files with 119 additions and 14 deletions

View File

@ -11,7 +11,7 @@
#include<simd/simd.h> #include<simd/simd.h>
#define RENDERER_TILE_BUFFER_SIZE 4096 #define RENDERER_TILE_BUFFER_SIZE 8192
#define RENDERER_TILE_SIZE 16 #define RENDERER_TILE_SIZE 16
#define RENDERER_MAX_TILES 65536 #define RENDERER_MAX_TILES 65536
@ -72,8 +72,12 @@ typedef struct mg_tile_elt
typedef struct mg_tile typedef struct mg_tile
{ {
atomic_int eltCount; vector_float4 color;
atomic_int firstElt; atomic_int firstElt;
atomic_int eltCount;
atomic_int partial;
atomic_int flipCount;
} mg_tile; } mg_tile;
typedef struct mg_shape_queue typedef struct mg_shape_queue

View File

@ -86,8 +86,11 @@ kernel void ShapeSetup(constant mg_shape* shapeBuffer [[buffer(0)]],
for(int i=0; i<tileCount; i++) for(int i=0; i<tileCount; i++)
{ {
atomic_store_explicit(&tiles[i].eltCount, 0, memory_order_relaxed); tiles[i].color = shapeBuffer[gid].color;
atomic_store_explicit(&tiles[i].firstElt, -1, memory_order_relaxed); atomic_store_explicit(&tiles[i].firstElt, -1, memory_order_relaxed);
atomic_store_explicit(&tiles[i].eltCount, 0, memory_order_relaxed);
atomic_store_explicit(&tiles[i].partial, 0, memory_order_relaxed);
atomic_store_explicit(&tiles[i].flipCount, 0, memory_order_relaxed);
} }
} }
@ -154,32 +157,107 @@ kernel void TriangleKernel(constant mg_vertex* vertexBuffer [[buffer(0)]],
triangleArray[gid].bias1 = is_top_left(p2, p0) ? -(1-cw)/2 : -(1+cw)/2; triangleArray[gid].bias1 = is_top_left(p2, p0) ? -(1-cw)/2 : -(1+cw)/2;
triangleArray[gid].bias2 = is_top_left(p0, p1) ? -(1-cw)/2 : -(1+cw)/2; triangleArray[gid].bias2 = is_top_left(p0, p1) ? -(1-cw)/2 : -(1+cw)/2;
int4 tileBox = int4(fbox)/RENDERER_TILE_SIZE; int4 coarseBox = int4(fbox)/RENDERER_TILE_SIZE;
//NOTE: bucket triangle into tiles //NOTE: bucket triangle into tiles
device mg_shape_queue* shapeQueue = &shapeQueueBuffer[shapeIndex]; device mg_shape_queue* shapeQueue = &shapeQueueBuffer[shapeIndex];
device mg_tile* tiles = &tilesBuffer[shapeQueue->tiles]; device mg_tile* tiles = &tilesBuffer[shapeQueue->tiles];
int xMin = max(0, tileBox.x - shapeQueue->area.x); int xMin = max(0, coarseBox.x - shapeQueue->area.x);
int yMin = max(0, tileBox.y - shapeQueue->area.y); int yMin = max(0, coarseBox.y - shapeQueue->area.y);
int xMax = min(tileBox.z - shapeQueue->area.x, shapeQueue->area.z-1); int xMax = min(coarseBox.z - shapeQueue->area.x, shapeQueue->area.z);
int yMax = min(tileBox.w - shapeQueue->area.y, shapeQueue->area.w-1); int yMax = min(coarseBox.w - shapeQueue->area.y, shapeQueue->area.w);
//NOTE(martin): it's important to do the computation with signed int, so that we can have negative xMax/yMax //NOTE(martin): it's important to do the computation with signed int, so that we can have negative xMax/yMax
// otherwise all triangles on the left or below the x/y axis are attributed to tiles on row/column 0. // otherwise all triangles on the left or below the x/y axis are attributed to tiles on row/column 0.
bool triangleFull = all( triangleArray[gid].cubic0 == float4(1, 1, 1, 1)
&& triangleArray[gid].cubic1 == float4(1, 1, 1, 1)
&& triangleArray[gid].cubic2 == float4(1, 1, 1, 1));
int2 edges[3][2] = {{ip0, ip1}, {ip1, ip2}, {ip2, ip0}};
for(int y = yMin; y <= yMax; y++) for(int y = yMin; y <= yMax; y++)
{ {
for(int x = xMin ; x <= xMax; x++) for(int x = xMin ; x <= xMax; x++)
{ {
int tileIndex = y*shapeQueue->area.z + x; int4 tileBox = int4(shapeQueue->area.x + x,
shapeQueue->area.y + y,
shapeQueue->area.x + x + 1,
shapeQueue->area.y + y + 1) * RENDERER_TILE_SIZE*int(subPixelFactor);
int eltIndex = atomic_fetch_add_explicit(eltOffset, 1, memory_order_relaxed); int2 b[4] = {{tileBox.x, tileBox.y},
{tileBox.z, tileBox.y},
{tileBox.z, tileBox.w},
{tileBox.x, tileBox.w}};
device mg_tile_elt* elt = &eltBuffer[eltIndex]; //TODO: should add the biases here?
elt->triangleIndex = gid;
elt->next = atomic_exchange_explicit(&tiles[tileIndex].firstElt, eltIndex, memory_order_relaxed);
atomic_fetch_add_explicit(&tiles[tileIndex].eltCount, 1, memory_order_relaxed); int sideFromEdge[3][4];
for(int edgeIndex=0; edgeIndex<3; edgeIndex++)
{
for(int cornerIndex=0; cornerIndex<4; cornerIndex++)
{
sideFromEdge[edgeIndex][cornerIndex] = cw*orient2d(edges[edgeIndex][0],
edges[edgeIndex][1],
b[cornerIndex]);
}
}
bool allRightFromEdge0 = sideFromEdge[0][0] < 0
&& sideFromEdge[0][1] < 0
&& sideFromEdge[0][2] < 0
&& sideFromEdge[0][3] < 0;
bool allRightFromEdge1 = sideFromEdge[1][0] < 0
&& sideFromEdge[1][1] < 0
&& sideFromEdge[1][2] < 0
&& sideFromEdge[1][3] < 0;
bool allRightFromEdge2 = sideFromEdge[2][0] < 0
&& sideFromEdge[2][1] < 0
&& sideFromEdge[2][2] < 0
&& sideFromEdge[2][3] < 0;
bool allOutside = allRightFromEdge0 || allRightFromEdge1 || allRightFromEdge2;
if(!allOutside)
{
int tileIndex = y*shapeQueue->area.z + x;
device mg_tile* tile = &tiles[tileIndex];
int eltIndex = atomic_fetch_add_explicit(eltOffset, 1, memory_order_relaxed);
device mg_tile_elt* elt = &eltBuffer[eltIndex];
elt->triangleIndex = gid;
elt->next = atomic_exchange_explicit(&tile->firstElt, eltIndex, memory_order_relaxed);
atomic_fetch_add_explicit(&tile->eltCount, 1, memory_order_relaxed);
bool allLeftFromEdge0 = sideFromEdge[0][0] > 0
&& sideFromEdge[0][1] > 0
&& sideFromEdge[0][2] > 0
&& sideFromEdge[0][3] > 0;
bool allLeftFromEdge1 = sideFromEdge[1][0] > 0
&& sideFromEdge[1][1] > 0
&& sideFromEdge[1][2] > 0
&& sideFromEdge[1][3] > 0;
bool allLeftFromEdge2 = sideFromEdge[2][0] > 0
&& sideFromEdge[2][1] > 0
&& sideFromEdge[2][2] > 0
&& sideFromEdge[2][3] > 0;
if(allLeftFromEdge0 && allLeftFromEdge1 && allLeftFromEdge2 && triangleFull)
{
atomic_fetch_add_explicit(&tile->flipCount, 1, memory_order_relaxed);
}
else
{
atomic_store_explicit(&tile->partial, 1, memory_order_relaxed);
}
}
} }
} }
} }
@ -218,6 +296,23 @@ kernel void GatherKernel(const device mg_shape_queue* shapeQueueBuffer [[buffer(
int localIndex = tileQueueCoord.y * shapeQueue->area.z + tileQueueCoord.x; int localIndex = tileQueueCoord.y * shapeQueue->area.z + tileQueueCoord.x;
const device mg_tile* tile = &tiles[localIndex]; const device mg_tile* tile = &tiles[localIndex];
if(atomic_load_explicit(&tile->partial, memory_order_relaxed) == 0)
{
if(atomic_load_explicit(&tile->flipCount, memory_order_relaxed) & 0x01)
{
if(tile->color.a == 1)
{
//NOTE: tile is full covered by a solid color, reset counter
count = 0;
}
}
else
{
//NOTE: tile is fully uncovered, skip that shape
continue;
}
}
int firstEltIndex = *(device int*)&tile->firstElt; int firstEltIndex = *(device int*)&tile->firstElt;
const device mg_tile_elt* elt = 0; const device mg_tile_elt* elt = 0;
@ -309,6 +404,12 @@ kernel void RenderKernel(const device uint* tileCounters [[buffer(0)]],
currentColor[i] = float4(0, 0, 0, 0); currentColor[i] = float4(0, 0, 0, 0);
} }
if(tileCounter >= RENDERER_TILE_BUFFER_SIZE)
{
outTexture.write(float4(1, 0, 0, 1), gid);
return;
}
for(uint tileArrayIndex=0; tileArrayIndex < tileCounter; tileArrayIndex++) for(uint tileArrayIndex=0; tileArrayIndex < tileCounter; tileArrayIndex++)
{ {
int triangleIndex = tileArrayBuffer[RENDERER_TILE_BUFFER_SIZE * tileIndex + tileArrayIndex]; int triangleIndex = tileArrayBuffer[RENDERER_TILE_BUFFER_SIZE * tileIndex + tileArrayIndex];