[mtl canvas] cull tiles queues outside viewport

This commit is contained in:
Martin Fouilleul 2023-03-22 17:28:11 +01:00
parent faf024a63a
commit c20758f6a2
4 changed files with 76 additions and 56 deletions

View File

@ -65,7 +65,7 @@ int main()
//NOTE: create surface //NOTE: create surface
mg_surface surface = mg_surface_create_for_window(window, MG_BACKEND_DEFAULT); mg_surface surface = mg_surface_create_for_window(window, MG_BACKEND_DEFAULT);
mg_surface_swap_interval(surface, 1); mg_surface_swap_interval(surface, 0);
//TODO: create canvas //TODO: create canvas
mg_canvas canvas = mg_canvas_create(surface); mg_canvas canvas = mg_canvas_create(surface);

View File

@ -244,6 +244,7 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
[shapeEncoder setBuffer: backend->tilesBuffer offset:0 atIndex: 2]; [shapeEncoder setBuffer: backend->tilesBuffer offset:0 atIndex: 2];
[shapeEncoder setBuffer: backend->tilesOffset offset:0 atIndex: 3]; [shapeEncoder setBuffer: backend->tilesOffset offset:0 atIndex: 3];
[shapeEncoder setBytes: &scale length: sizeof(float) atIndex: 4]; [shapeEncoder setBytes: &scale length: sizeof(float) atIndex: 4];
[shapeEncoder setBytes: &viewportSize length: sizeof(vector_uint2) atIndex: 5];
MTLSize shapeGroupSize = MTLSizeMake(backend->shapePipeline.maxTotalThreadsPerThreadgroup, 1, 1); MTLSize shapeGroupSize = MTLSizeMake(backend->shapePipeline.maxTotalThreadsPerThreadgroup, 1, 1);
MTLSize shapeGridSize = MTLSizeMake(shapeCount, 1, 1); MTLSize shapeGridSize = MTLSizeMake(shapeCount, 1, 1);

View File

@ -65,12 +65,13 @@ typedef struct mg_triangle_data
using namespace metal; using namespace metal;
#endif #endif
#define MG_TILE_CMD_MASK (1<<31) #define MG_TILE_CMD_MASK (3<<30)
typedef enum mg_tile_cmd_kind typedef enum mg_tile_cmd_kind
{ {
mg_cmd_triangle = 0, mg_cmd_triangle = 0,
mg_cmd_color = 1<<31, mg_cmd_color = 1<<30,
mg_cmd_flip = 2<<30
} mg_tile_cmd_kind; } mg_tile_cmd_kind;
typedef int mg_tile_cmd; typedef int mg_tile_cmd;

View File

@ -61,9 +61,10 @@ kernel void ShapeSetup(constant mg_shape* shapeBuffer [[buffer(0)]],
device mg_tile* tilesBuffer [[buffer(2)]], device mg_tile* tilesBuffer [[buffer(2)]],
device volatile atomic_uint* tilesOffset [[buffer(3)]], device volatile atomic_uint* tilesOffset [[buffer(3)]],
constant float* scaling [[buffer(4)]], constant float* scaling [[buffer(4)]],
constant int2* viewport [[buffer(5)]],
uint gid [[thread_position_in_grid]]) uint gid [[thread_position_in_grid]])
{ {
int2 tilesMatrixDim = (*viewport - 1) / RENDERER_TILE_SIZE + 1;
float4 box = shapeBuffer[gid].clip * scaling[0]; float4 box = shapeBuffer[gid].clip * scaling[0];
int2 firstTile = int2(box.xy)/RENDERER_TILE_SIZE; int2 firstTile = int2(box.xy)/RENDERER_TILE_SIZE;
@ -72,8 +73,11 @@ kernel void ShapeSetup(constant mg_shape* shapeBuffer [[buffer(0)]],
// any tile queue, the tileQueues pointer for that shape would alias the tileQueues pointer of another // any tile queue, the tileQueues pointer for that shape would alias the tileQueues pointer of another
// shape, and we would have to detect that in the tiling and drawing kernels. Instead, just accept some // shape, and we would have to detect that in the tiling and drawing kernels. Instead, just accept some
// waste and keep the other kernels more uniforms for now... // waste and keep the other kernels more uniforms for now...
int nTilesX = int(box.z)/RENDERER_TILE_SIZE - firstTile.x + 1; //TODO limit to screen
int nTilesY = int(box.w)/RENDERER_TILE_SIZE - firstTile.y + 1; int2 lastTile = max(firstTile, min(int2(box.zw)/RENDERER_TILE_SIZE, tilesMatrixDim));
int nTilesX = lastTile.x - firstTile.x + 1;
int nTilesY = lastTile.y - firstTile.y + 1;
int tileCount = nTilesX * nTilesY; int tileCount = nTilesX * nTilesY;
@ -172,8 +176,8 @@ kernel void TriangleKernel(constant mg_vertex* vertexBuffer [[buffer(0)]],
int xMin = max(0, coarseBox.x - shapeQueue->area.x); int xMin = max(0, coarseBox.x - shapeQueue->area.x);
int yMin = max(0, coarseBox.y - shapeQueue->area.y); int yMin = max(0, coarseBox.y - shapeQueue->area.y);
int xMax = min(coarseBox.z - shapeQueue->area.x, shapeQueue->area.z); int xMax = min(coarseBox.z - shapeQueue->area.x, shapeQueue->area.z-1);
int yMax = min(coarseBox.w - shapeQueue->area.y, shapeQueue->area.w); int yMax = min(coarseBox.w - shapeQueue->area.y, shapeQueue->area.w-1);
//NOTE(martin): it's important to do the computation with signed int, so that we can have negative xMax/yMax //NOTE(martin): it's important to do the computation with signed int, so that we can have negative xMax/yMax
// otherwise all triangles on the left or below the x/y axis are attributed to tiles on row/column 0. // otherwise all triangles on the left or below the x/y axis are attributed to tiles on row/column 0.
@ -233,6 +237,7 @@ kernel void TriangleKernel(constant mg_vertex* vertexBuffer [[buffer(0)]],
device mg_tile_elt* elt = &eltBuffer[eltIndex]; device mg_tile_elt* elt = &eltBuffer[eltIndex];
elt->triangleIndex = gid; elt->triangleIndex = gid;
elt->next = atomic_exchange_explicit(&tile->firstElt, eltIndex, memory_order_relaxed); elt->next = atomic_exchange_explicit(&tile->firstElt, eltIndex, memory_order_relaxed);
atomic_fetch_add_explicit(&tile->eltCount, 1, memory_order_relaxed); atomic_fetch_add_explicit(&tile->eltCount, 1, memory_order_relaxed);
@ -254,6 +259,7 @@ kernel void TriangleKernel(constant mg_vertex* vertexBuffer [[buffer(0)]],
if(allLeftFromEdge0 && allLeftFromEdge1 && allLeftFromEdge2 && triangleFull) if(allLeftFromEdge0 && allLeftFromEdge1 && allLeftFromEdge2 && triangleFull)
{ {
elt->triangleIndex |= mg_cmd_flip;
atomic_fetch_add_explicit(&tile->flipCount, 1, memory_order_relaxed); atomic_fetch_add_explicit(&tile->flipCount, 1, memory_order_relaxed);
} }
else else
@ -310,7 +316,7 @@ kernel void GatherKernel(const device mg_shape_queue* shapeQueueBuffer [[buffer(
const device mg_tile_elt* elt = &eltBuffer[firstEltIndex]; const device mg_tile_elt* elt = &eltBuffer[firstEltIndex];
count = 0; count = 0;
tileArray[count] = mg_cmd_color | elt->triangleIndex; tileArray[count] = mg_cmd_color | (elt->triangleIndex & ~MG_TILE_CMD_MASK);
count++; count++;
continue; continue;
} }
@ -458,7 +464,9 @@ kernel void RenderKernel(const device uint* tileCounters [[buffer(0)]],
currentShapeIndex = triangle->shapeIndex; currentShapeIndex = triangle->shapeIndex;
} }
if(cmdKind == mg_cmd_color) switch(cmdKind)
{
case mg_cmd_color:
{ {
for(int sampleIndex=0; sampleIndex<sampleCount; sampleIndex++) for(int sampleIndex=0; sampleIndex<sampleCount; sampleIndex++)
{ {
@ -467,8 +475,17 @@ kernel void RenderKernel(const device uint* tileCounters [[buffer(0)]],
sampleColor[sampleIndex] = nextColor; sampleColor[sampleIndex] = nextColor;
flipCount[sampleIndex] = 0; flipCount[sampleIndex] = 0;
} }
} break;
case mg_cmd_flip:
{
for(int sampleIndex=0; sampleIndex<sampleCount; sampleIndex++)
{
flipCount[sampleIndex]++;
} }
else } break;
case mg_cmd_triangle:
{ {
int2 p0 = triangle->p0; int2 p0 = triangle->p0;
int2 p1 = triangle->p1; int2 p1 = triangle->p1;
@ -515,6 +532,7 @@ kernel void RenderKernel(const device uint* tileCounters [[buffer(0)]],
} }
} }
} }
} break;
} }
} }