From c4415aaeea27e6f84caa66ad121dfcd9202b1389 Mon Sep 17 00:00:00 2001 From: Martin Fouilleul Date: Tue, 28 Mar 2023 17:58:23 +0200 Subject: [PATCH] [mtl canvas, wip] binning segments to tile queues --- examples/polygon/main.c | 4 +- src/mtl_renderer.h | 76 ++++++++----- src/mtl_renderer.m | 90 ++++++++++++++- src/mtl_renderer.metal | 243 +++++++++++++++++++++++++++++++++------- 4 files changed, 338 insertions(+), 75 deletions(-) diff --git a/examples/polygon/main.c b/examples/polygon/main.c index cedefea..835c5bc 100644 --- a/examples/polygon/main.c +++ b/examples/polygon/main.c @@ -83,8 +83,8 @@ int main() mg_fill(); mg_move_to(200, 100); - mg_line_to(400, 100); - mg_line_to(400, 200); + mg_line_to(410, 100); + mg_line_to(410, 200); mg_line_to(200, 200); mg_close_path(); mg_set_color_rgba(0, 1, 0, 1); diff --git a/src/mtl_renderer.h b/src/mtl_renderer.h index ac78d4c..a4acad7 100644 --- a/src/mtl_renderer.h +++ b/src/mtl_renderer.h @@ -11,36 +11,10 @@ #include -typedef enum { - MG_MTL_LINE = 1, -} mg_mtl_seg_kind; - -typedef enum { - MG_MTL_BL, // curve on bottom left - MG_MTL_BR, // curve on bottom right - MG_MTL_TL, // curve on top left - MG_MTL_TR // curve on top right -} mg_mtl_seg_config; - typedef enum { MG_MTL_FILL, } mg_mtl_cmd; -typedef struct mg_mtl_path_elt -{ - int pathIndex; - mg_mtl_seg_kind kind; - vector_float2 p[4]; -} mg_mtl_path_elt; - -typedef struct mg_mtl_segment -{ - int pathIndex; - mg_mtl_seg_config config; //TODO pack these - int windingIncrement; - vector_float4 box; -} mg_mtl_segment; - typedef struct mg_mtl_path { mg_mtl_cmd cmd; @@ -49,5 +23,55 @@ typedef struct mg_mtl_path } mg_mtl_path; +typedef enum { + MG_MTL_LINE = 1, +} mg_mtl_seg_kind; + +typedef struct mg_mtl_path_elt +{ + int pathIndex; + mg_mtl_seg_kind kind; + vector_float2 p[4]; +} mg_mtl_path_elt; + +typedef enum { + MG_MTL_BL, // curve on bottom left + MG_MTL_BR, // curve on bottom right + MG_MTL_TL, // curve on top left + MG_MTL_TR // curve on top right +} mg_mtl_seg_config; + +typedef struct mg_mtl_segment +{ + int pathIndex; + mg_mtl_seg_config config; //TODO pack these + int windingIncrement; + vector_float4 box; +} mg_mtl_segment; + +typedef struct mg_mtl_path_queue +{ + vector_int4 area; + int tileQueues; +} mg_mtl_path_queue; + +#ifdef __METAL_VERSION__ + using namespace metal; +#endif + +typedef enum { MG_MTL_OP_SEGMENT } mg_mtl_tile_op_kind; + +typedef struct mg_mtl_tile_op +{ + mg_mtl_tile_op_kind kind; + int index; + int next; +} mg_mtl_tile_op; + +typedef struct mg_mtl_tile_queue +{ + atomic_int first; + +} mg_mtl_tile_queue; #endif //__MTL_RENDERER_H_ diff --git a/src/mtl_renderer.m b/src/mtl_renderer.m index 6af1ee3..6decb59 100644 --- a/src/mtl_renderer.m +++ b/src/mtl_renderer.m @@ -25,6 +25,7 @@ typedef struct mg_mtl_canvas_backend mg_canvas_backend interface; mg_surface surface; + id pathPipeline; id segmentPipeline; id rasterPipeline; id blitPipeline; @@ -35,6 +36,11 @@ typedef struct mg_mtl_canvas_backend id elementBuffer; id segmentCountBuffer; id segmentBuffer; + id pathQueueBuffer; + id tileQueueBuffer; + id tileQueueCountBuffer; + id tileOpBuffer; + id tileOpCountBuffer; } mg_mtl_canvas_backend; @@ -88,6 +94,9 @@ void mg_mtl_canvas_render(mg_canvas_backend* interface, vec2 p3 = mg_mat2x3_mul(primitive->attributes.transform, elt->p[0]); currentPos = elt->p[0]; + mg_update_path_extents(&pathExtents, p0); + mg_update_path_extents(&pathExtents, p3); + mg_mtl_path_elt* mtlElt = &elementBufferData[mtlEltCount]; mtlEltCount++; @@ -123,12 +132,39 @@ void mg_mtl_canvas_render(mg_canvas_backend* interface, mp_rect frame = mg_surface_get_frame(backend->surface); f32 scale = surface->mtlLayer.contentsScale; vec2 viewportSize = {frame.w * scale, frame.h * scale}; + int tileSize = 16; //NOTE: encode GPU commands @autoreleasepool { mg_mtl_surface_acquire_command_buffer(surface); + //NOTE: clear counters + id blitEncoder = [surface->commandBuffer blitCommandEncoder]; + blitEncoder.label = @"clear counters"; + [blitEncoder fillBuffer: backend->segmentCountBuffer range: NSMakeRange(0, sizeof(int)) value: 0]; + [blitEncoder fillBuffer: backend->tileQueueCountBuffer range: NSMakeRange(0, sizeof(int)) value: 0]; + [blitEncoder fillBuffer: backend->tileOpCountBuffer range: NSMakeRange(0, sizeof(int)) value: 0]; + [blitEncoder endEncoding]; + + //NOTE: path setup pass + id pathEncoder = [surface->commandBuffer computeCommandEncoder]; + pathEncoder.label = @"path pass"; + [pathEncoder setComputePipelineState: backend->pathPipeline]; + + [pathEncoder setBytes:&pathCount length:sizeof(int) atIndex:0]; + [pathEncoder setBuffer:backend->pathBuffer offset:0 atIndex:1]; + [pathEncoder setBuffer:backend->pathQueueBuffer offset:0 atIndex:2]; + [pathEncoder setBuffer:backend->tileQueueBuffer offset:0 atIndex:3]; + [pathEncoder setBuffer:backend->tileQueueCountBuffer offset:0 atIndex:4]; + [pathEncoder setBytes:&tileSize length:sizeof(int) atIndex:5]; + + MTLSize pathGridSize = MTLSizeMake(pathCount, 1, 1); + MTLSize pathGroupSize = MTLSizeMake(64, 1, 1); + + [pathEncoder dispatchThreads: pathGridSize threadsPerThreadgroup: pathGroupSize]; + [pathEncoder endEncoding]; + //NOTE: segment setup pass id segmentEncoder = [surface->commandBuffer computeCommandEncoder]; segmentEncoder.label = @"segment pass"; @@ -138,12 +174,16 @@ void mg_mtl_canvas_render(mg_canvas_backend* interface, [segmentEncoder setBuffer:backend->elementBuffer offset:0 atIndex:1]; [segmentEncoder setBuffer:backend->segmentCountBuffer offset:0 atIndex:2]; [segmentEncoder setBuffer:backend->segmentBuffer offset:0 atIndex:3]; + [segmentEncoder setBuffer:backend->pathQueueBuffer offset:0 atIndex:4]; + [segmentEncoder setBuffer:backend->tileQueueBuffer offset:0 atIndex:5]; + [segmentEncoder setBuffer:backend->tileOpBuffer offset:0 atIndex:6]; + [segmentEncoder setBuffer:backend->tileOpCountBuffer offset:0 atIndex:7]; + [segmentEncoder setBytes:&tileSize length:sizeof(int) atIndex:8]; MTLSize segmentGridSize = MTLSizeMake(mtlEltCount, 1, 1); MTLSize segmentGroupSize = MTLSizeMake(64, 1, 1); [segmentEncoder dispatchThreads: segmentGridSize threadsPerThreadgroup: segmentGroupSize]; - [segmentEncoder endEncoding]; //NOTE: raster pass @@ -155,6 +195,10 @@ void mg_mtl_canvas_render(mg_canvas_backend* interface, [rasterEncoder setBuffer:backend->pathBuffer offset:0 atIndex:1]; [rasterEncoder setBuffer:backend->segmentCountBuffer offset:0 atIndex:2]; [rasterEncoder setBuffer:backend->segmentBuffer offset:0 atIndex:3]; + [rasterEncoder setBuffer:backend->pathQueueBuffer offset:0 atIndex:4]; + [rasterEncoder setBuffer:backend->tileQueueBuffer offset:0 atIndex:5]; + [rasterEncoder setBuffer:backend->tileOpBuffer offset:0 atIndex:6]; + [rasterEncoder setBytes:&tileSize length:sizeof(int) atIndex:7]; [rasterEncoder setTexture:backend->outTexture atIndex:0]; @@ -195,18 +239,30 @@ void mg_mtl_canvas_destroy(mg_canvas_backend* interface) @autoreleasepool { + [backend->pathPipeline release]; + [backend->segmentPipeline release]; + [backend->rasterPipeline release]; + [backend->blitPipeline release]; + [backend->pathBuffer release]; [backend->elementBuffer release]; [backend->segmentCountBuffer release]; [backend->segmentBuffer release]; + [backend->tileQueueBuffer release]; + [backend->tileQueueCountBuffer release]; + [backend->tileOpBuffer release]; + [backend->tileOpCountBuffer release]; } free(backend); } -const u32 MG_MTL_PATH_BUFFER_SIZE = (4<<20)*sizeof(mg_mtl_path), - MG_MTL_ELEMENT_BUFFER_SIZE = (4<<20)*sizeof(mg_mtl_path_elt), - MG_MTL_SEGMENT_BUFFER_SIZE = (4<<20)*sizeof(mg_mtl_segment); +const u32 MG_MTL_PATH_BUFFER_SIZE = (4<<20)*sizeof(mg_mtl_path), + MG_MTL_ELEMENT_BUFFER_SIZE = (4<<20)*sizeof(mg_mtl_path_elt), + MG_MTL_SEGMENT_BUFFER_SIZE = (4<<20)*sizeof(mg_mtl_segment), + MG_MTL_PATH_QUEUE_BUFFER_SIZE = (4<<20)*sizeof(mg_mtl_path_queue), + MG_MTL_TILE_QUEUE_BUFFER_SIZE = (4<<20)*sizeof(mg_mtl_tile_queue), + MG_MTL_TILE_OP_BUFFER_SIZE = (4<<20)*sizeof(mg_mtl_tile_op); mg_canvas_backend* mg_mtl_canvas_create(mg_surface surface) { @@ -238,19 +294,25 @@ mg_canvas_backend* mg_mtl_canvas_create(mg_surface surface) LOG_ERROR("error : %s\n", errStr); return(0); } - id segmentFunction = [library newFunctionWithName:@"mtl_segment"]; + id pathFunction = [library newFunctionWithName:@"mtl_path_setup"]; + id segmentFunction = [library newFunctionWithName:@"mtl_segment_setup"]; id rasterFunction = [library newFunctionWithName:@"mtl_raster"]; id vertexFunction = [library newFunctionWithName:@"mtl_vertex_shader"]; id fragmentFunction = [library newFunctionWithName:@"mtl_fragment_shader"]; //NOTE: create pipelines NSError* error = NULL; - backend->rasterPipeline = [metalSurface->device newComputePipelineStateWithFunction: rasterFunction + + backend->pathPipeline = [metalSurface->device newComputePipelineStateWithFunction: pathFunction error:&error]; backend->segmentPipeline = [metalSurface->device newComputePipelineStateWithFunction: segmentFunction error:&error]; + backend->rasterPipeline = [metalSurface->device newComputePipelineStateWithFunction: rasterFunction + error:&error]; + + MTLRenderPipelineDescriptor *pipelineStateDescriptor = [[MTLRenderPipelineDescriptor alloc] init]; pipelineStateDescriptor.label = @"blit pipeline"; pipelineStateDescriptor.vertexFunction = vertexFunction; @@ -296,6 +358,22 @@ mg_canvas_backend* mg_mtl_canvas_create(mg_surface surface) backend->segmentCountBuffer = [metalSurface->device newBufferWithLength: sizeof(int) options: bufferOptions]; + + + backend->pathQueueBuffer = [metalSurface->device newBufferWithLength: MG_MTL_PATH_QUEUE_BUFFER_SIZE + options: bufferOptions]; + + backend->tileQueueBuffer = [metalSurface->device newBufferWithLength: MG_MTL_TILE_QUEUE_BUFFER_SIZE + options: bufferOptions]; + + backend->tileQueueCountBuffer = [metalSurface->device newBufferWithLength: sizeof(int) + options: bufferOptions]; + + backend->tileOpBuffer = [metalSurface->device newBufferWithLength: MG_MTL_TILE_OP_BUFFER_SIZE + options: bufferOptions]; + + backend->tileOpCountBuffer = [metalSurface->device newBufferWithLength: sizeof(int) + options: bufferOptions]; } } diff --git a/src/mtl_renderer.metal b/src/mtl_renderer.metal index 8ff9119..b7e6ea4 100644 --- a/src/mtl_renderer.metal +++ b/src/mtl_renderer.metal @@ -7,11 +7,89 @@ using namespace metal; -kernel void mtl_segment(constant int* elementCount [[buffer(0)]], - const device mg_mtl_path_elt* elementBuffer [[buffer(1)]], - device atomic_int* segmentCount [[buffer(2)]], - device mg_mtl_segment* segmentBuffer [[buffer(3)]], - uint eltIndex [[thread_position_in_grid]]) +kernel void mtl_path_setup(constant int* pathCount [[buffer(0)]], + const device mg_mtl_path* pathBuffer [[buffer(1)]], + device mg_mtl_path_queue* pathQueueBuffer [[buffer(2)]], + device mg_mtl_tile_queue* tileQueueBuffer [[buffer(3)]], + device atomic_int* tileQueueCount [[buffer(4)]], + constant int* tileSize [[buffer(5)]], + uint pathIndex [[thread_position_in_grid]]) +{ + const device mg_mtl_path* path = &pathBuffer[pathIndex]; + + int2 firstTile = int2(path->box.xy)/tileSize[0]; + int2 lastTile = max(firstTile, int2(path->box.zw)/tileSize[0]); + int nTilesX = lastTile.x - firstTile.x + 1; + int nTilesY = lastTile.y - firstTile.y + 1; + int tileCount = nTilesX * nTilesY; + + int tileQueuesIndex = atomic_fetch_add_explicit(tileQueueCount, tileCount, memory_order_relaxed); + + pathQueueBuffer[pathIndex].area = int4(firstTile.x, firstTile.y, nTilesX, nTilesY); + pathQueueBuffer[pathIndex].tileQueues = tileQueuesIndex; + + device mg_mtl_tile_queue* tileQueues = &tileQueueBuffer[tileQueuesIndex]; + + for(int i=0; i= seg->box.y && p.y < seg->box.w) {...} + */ + bool isLeft = false; + + //NOTE: if point is left of curve bounding box, it is left of curve + if(p.x < seg->box.x) + { + isLeft = true; + } + else if(p.x < seg->box.z) + { + /*NOTE: if point and curve are on opposite sides of diagonal and on the left of diagonal, + it is left from the curve + otherwise if point and curve are on the same side of diagonal, do implicit test + */ + float alpha = (seg->box.w - seg->box.y)/(seg->box.z - seg->box.x); + float ofs = seg->box.w - seg->box.y; + float dx = p.x - seg->box.x; + float dy = p.y - seg->box.y; + + if( (seg->config == MG_MTL_BR && dy > alpha*dx) + ||(seg->config == MG_MTL_TR && dy < ofs - alpha*dx)) + { + isLeft = true; + } + else if( !(seg->config == MG_MTL_TL && dy < alpha*dx) + && !(seg->config == MG_MTL_BL && dy > ofs - alpha*dx)) + { + //Need implicit test, but for lines, we only have config BR or TR, so the test is always negative for now + } + } + return(isLeft); +} + +kernel void mtl_segment_setup(constant int* elementCount [[buffer(0)]], + const device mg_mtl_path_elt* elementBuffer [[buffer(1)]], + device atomic_int* segmentCount [[buffer(2)]], + device mg_mtl_segment* segmentBuffer [[buffer(3)]], + const device mg_mtl_path_queue* pathQueueBuffer [[buffer(4)]], + device mg_mtl_tile_queue* tileQueueBuffer [[buffer(5)]], + device mg_mtl_tile_op* tileOpBuffer [[buffer(6)]], + device atomic_int* tileOpCount [[buffer(7)]], + constant int* tileSize [[buffer(8)]], + uint eltIndex [[thread_position_in_grid]]) { const device mg_mtl_path_elt* elt = &elementBuffer[eltIndex]; float2 p0 = elt->p[0]; @@ -40,6 +118,51 @@ kernel void mtl_segment(constant int* elementCount [[buffer(0)]], } seg->windingIncrement = (p3.y > p0.y)? 1 : -1; + + //NOTE: add segment index to the queues of tiles it overlaps with + const device mg_mtl_path_queue* pathQueue = &pathQueueBuffer[seg->pathIndex]; + device mg_mtl_tile_queue* tileQueues = &tileQueueBuffer[pathQueue->tileQueues]; + + int4 coveredTiles = int4(seg->box)/tileSize[0]; + int xMin = max(0, coveredTiles.x - pathQueue->area.x); + int yMin = max(0, coveredTiles.y - pathQueue->area.y); + int xMax = min(coveredTiles.z - pathQueue->area.x, pathQueue->area.z-1); + int yMax = min(coveredTiles.w - pathQueue->area.y, pathQueue->area.w-1); + + for(int y = yMin; y <= yMax; y++) + { + for(int x = xMin ; x <= xMax; x++) + { + float4 tileBox = (float4){float(x + pathQueue->area.x), + float(y + pathQueue->area.y), + float(x + pathQueue->area.x + 1), + float(y + pathQueue->area.y + 1)} * float(tileSize[0]); + + //NOTE: select two corners of tile box to test against the curve + float2 testPoint[2] = {{tileBox.x, tileBox.y}, + {tileBox.z, tileBox.w}}; + if(seg->config == MG_MTL_BR || seg->config == MG_MTL_TL) + { + testPoint[0] = (float2){tileBox.x, tileBox.w}; + testPoint[1] = (float2){tileBox.z, tileBox.y}; + } + bool test0 = mtl_is_left_of_segment(testPoint[0], seg); + bool test1 = mtl_is_left_of_segment(testPoint[1], seg); + + //NOTE: the curve overlaps the tile only if test points are on opposite sides of segment + if(test0 != test1) + { + int tileOpIndex = atomic_fetch_add_explicit(tileOpCount, 1, memory_order_relaxed); + device mg_mtl_tile_op* op = &tileOpBuffer[tileOpIndex]; + + op->kind = MG_MTL_OP_SEGMENT; + op->index = segIndex; + + int tileIndex = y*pathQueue->area.z + x; + op->next = atomic_exchange_explicit(&tileQueues[tileIndex].first, tileOpIndex, memory_order_relaxed); + } + } + } } } @@ -47,58 +170,96 @@ kernel void mtl_raster(constant int* pathCount [[buffer(0)]], const device mg_mtl_path* pathBuffer [[buffer(1)]], constant int* segCount [[buffer(2)]], const device mg_mtl_segment* segmentBuffer [[buffer(3)]], + const device mg_mtl_path_queue* pathQueueBuffer [[buffer(4)]], + const device mg_mtl_tile_queue* tileQueueBuffer [[buffer(5)]], + const device mg_mtl_tile_op* tileOpBuffer [[buffer(6)]], + constant int* tileSize [[buffer(7)]], texture2d outTexture [[texture(0)]], - uint2 threadCoord [[thread_position_in_grid]]) + uint2 threadCoord [[thread_position_in_grid]], + uint2 gridSize [[threads_per_grid]]) { int2 pixelCoord = int2(threadCoord); + int2 tileCoord = pixelCoord / tileSize[0]; float4 color = float4(0, 0, 0, 0); int currentPath = 0; int winding = 0; - for(int segIndex = 0; segIndex < segCount[0]; segIndex++) + if( (pixelCoord.x % tileSize[0] == 0) + ||(pixelCoord.y % tileSize[0] == 0)) { - const device mg_mtl_segment* seg = &segmentBuffer[segIndex]; + outTexture.write(float4(0, 0, 0, 1), uint2(pixelCoord)); + return; + } - if(seg->pathIndex != currentPath) + for(int pathIndex = 0; pathIndex < pathCount[0]; pathIndex++) + { + const device mg_mtl_path_queue* pathQueue = &pathQueueBuffer[pathIndex]; + int2 pathTileCoord = tileCoord - pathQueue->area.xy; + + if( pathTileCoord.x >= 0 + && pathTileCoord.x < pathQueue->area.z + && pathTileCoord.y >= 0 + && pathTileCoord.y < pathQueue->area.w) { - //depending on winding number, update color - if(winding & 1) - { - float4 pathColor = pathBuffer[currentPath].color; - pathColor.rgb *= pathColor.a; - color = color*(1-pathColor.a) + pathColor; - } - currentPath = seg->pathIndex; - winding = 0; - } + int pathTileIndex = pathTileCoord.y * pathQueue->area.z + pathTileCoord.x; + const device mg_mtl_tile_queue* tileQueue = &tileQueueBuffer[pathQueue->tileQueues + pathTileIndex]; - if(pixelCoord.y >= seg->box.y && pixelCoord.y < seg->box.w) - { - if(pixelCoord.x < seg->box.x) + int opIndex = atomic_load_explicit(&tileQueue->first, memory_order_relaxed); + while(opIndex != -1) { - winding += seg->windingIncrement; - } - else if(pixelCoord.x < seg->box.z) - { - /*TODO: if pixel is on opposite size of diagonal as curve on the right, increment - otherwise if not on same size of diagonal as curve, do implicit test - */ - float alpha = (seg->box.w - seg->box.y)/(seg->box.z - seg->box.x); - float ofs = seg->box.w - seg->box.y; - float dx = pixelCoord.x - seg->box.x; - float dy = pixelCoord.y - seg->box.y; + //outTexture.write(float4(0, 0, 1, 1), uint2(pixelCoord)); + //return; - if( (seg->config == MG_MTL_BR && dy > alpha*dx) - ||(seg->config == MG_MTL_TR && dy < ofs - alpha*dx)) + const device mg_mtl_tile_op* op = &tileOpBuffer[opIndex]; + + if(op->kind == MG_MTL_OP_SEGMENT) { - winding += seg->windingIncrement; - } - else if( !(seg->config == MG_MTL_TL && dy < alpha*dx) - && !(seg->config == MG_MTL_BL && dy > ofs - alpha*dx)) - { - //Need implicit test, but for lines, we only have config BR or TR, so the test is always negative for now + const device mg_mtl_segment* seg = &segmentBuffer[op->index]; + + if(seg->pathIndex != currentPath) + { + //depending on winding number, update color + if(winding & 1) + { + float4 pathColor = pathBuffer[currentPath].color; + pathColor.rgb *= pathColor.a; + color = color*(1-pathColor.a) + pathColor; + } + currentPath = seg->pathIndex; + winding = 0; + } + + if(pixelCoord.y >= seg->box.y && pixelCoord.y < seg->box.w) + { + if(pixelCoord.x < seg->box.x) + { + winding += seg->windingIncrement; + } + else if(pixelCoord.x < seg->box.z) + { + /*TODO: if pixel is on opposite size of diagonal as curve on the right, increment + otherwise if not on same size of diagonal as curve, do implicit test + */ + float alpha = (seg->box.w - seg->box.y)/(seg->box.z - seg->box.x); + float ofs = seg->box.w - seg->box.y; + float dx = pixelCoord.x - seg->box.x; + float dy = pixelCoord.y - seg->box.y; + + if( (seg->config == MG_MTL_BR && dy > alpha*dx) + ||(seg->config == MG_MTL_TR && dy < ofs - alpha*dx)) + { + winding += seg->windingIncrement; + } + else if( !(seg->config == MG_MTL_TL && dy < alpha*dx) + && !(seg->config == MG_MTL_BL && dy > ofs - alpha*dx)) + { + //Need implicit test, but for lines, we only have config BR or TR, so the test is always negative for now + } + } + } } + opIndex = op->next; } } }