[mtl canvas, wip] Collate queues in a per-tile op list and prune occluded shapes

2023-03-29 16:21:28 +02:00 · 2023-03-29 16:21:28 +02:00 · 8af4e4eddc
parent a6c53649bd
commit 8af4e4eddc
2 changed files with 154 additions and 113 deletions
--- a/src/mtl_renderer.m
+++ b/src/mtl_renderer.m
@ -18,7 +18,8 @@

 #define LOG_SUBSYSTEM "Graphics"

-const int MG_MTL_INPUT_BUFFERS_COUNT = 3;
+const int MG_MTL_INPUT_BUFFERS_COUNT = 3,
+          MG_MTL_TILE_SIZE = 16;

 typedef struct mg_mtl_canvas_backend
 {
@ -28,6 +29,7 @@ typedef struct mg_mtl_canvas_backend
 	id<MTLComputePipelineState> pathPipeline;
 	id<MTLComputePipelineState> segmentPipeline;
 	id<MTLComputePipelineState> backpropPipeline;
+	id<MTLComputePipelineState> mergePipeline;
 	id<MTLComputePipelineState> rasterPipeline;
 	id<MTLRenderPipelineState> blitPipeline;

@ -46,6 +48,7 @@ typedef struct mg_mtl_canvas_backend
 	id<MTLBuffer> tileQueueCountBuffer;
 	id<MTLBuffer> tileOpBuffer;
 	id<MTLBuffer> tileOpCountBuffer;
+	id<MTLBuffer> screenTilesBuffer;

 } mg_mtl_canvas_backend;

@ -140,7 +143,13 @@ void mg_mtl_canvas_render(mg_canvas_backend* interface,
 	mp_rect frame = mg_surface_get_frame(backend->surface);
 	f32 scale = surface->mtlLayer.contentsScale;
 	vec2 viewportSize = {frame.w * scale, frame.h * scale};
-	int tileSize = 16;
+	int tileSize = MG_MTL_TILE_SIZE;
+	int nTilesX = (int)(frame.w * scale + tileSize - 1)/tileSize;
+	int nTilesY = (int)(frame.h * scale + tileSize - 1)/tileSize;
+
+	/////////////////////////////////////////////////////////////////////////////////////
+	//TODO: ensure screen tiles buffer is correct size
+	/////////////////////////////////////////////////////////////////////////////////////

 	//NOTE: encode GPU commands
 	@autoreleasepool
@ -208,19 +217,35 @@ void mg_mtl_canvas_render(mg_canvas_backend* interface,
 		[backpropEncoder dispatchThreads: backpropGridSize threadsPerThreadgroup: backpropGroupSize];
 		[backpropEncoder endEncoding];

+		//NOTE: merge pass
+		id<MTLComputeCommandEncoder> mergeEncoder = [surface->commandBuffer computeCommandEncoder];
+		mergeEncoder.label = @"merge pass";
+		[mergeEncoder setComputePipelineState: backend->mergePipeline];
+
+		[mergeEncoder setBytes:&pathCount length:sizeof(int) atIndex:0];
+		[mergeEncoder setBuffer:backend->pathBuffer[backend->bufferIndex] offset:0 atIndex:1];
+		[mergeEncoder setBuffer:backend->pathQueueBuffer offset:0 atIndex:2];
+		[mergeEncoder setBuffer:backend->tileQueueBuffer offset:0 atIndex:3];
+		[mergeEncoder setBuffer:backend->tileOpBuffer offset:0 atIndex:4];
+		[mergeEncoder setBuffer:backend->tileOpCountBuffer offset:0 atIndex:5];
+		[mergeEncoder setBuffer:backend->screenTilesBuffer offset:0 atIndex:6];
+
+		MTLSize mergeGridSize = MTLSizeMake(nTilesX, nTilesY, 1);
+		MTLSize mergeGroupSize = MTLSizeMake(16, 16, 1);
+
+		[mergeEncoder dispatchThreads: mergeGridSize threadsPerThreadgroup: mergeGroupSize];
+		[mergeEncoder endEncoding];
+
 		//NOTE: raster pass
 		id<MTLComputeCommandEncoder> rasterEncoder = [surface->commandBuffer computeCommandEncoder];
 		rasterEncoder.label = @"raster pass";
 		[rasterEncoder setComputePipelineState: backend->rasterPipeline];

-		[rasterEncoder setBytes:&pathCount length:sizeof(int) atIndex:0];
-		[rasterEncoder setBuffer:backend->pathBuffer[backend->bufferIndex] offset:0 atIndex:1];
-		[rasterEncoder setBuffer:backend->segmentCountBuffer offset:0 atIndex:2];
+		[rasterEncoder setBuffer:backend->screenTilesBuffer offset:0 atIndex:0];
+		[rasterEncoder setBuffer:backend->tileOpBuffer offset:0 atIndex:1];
+		[rasterEncoder setBuffer:backend->pathBuffer[backend->bufferIndex] offset:0 atIndex:2];
 		[rasterEncoder setBuffer:backend->segmentBuffer offset:0 atIndex:3];
-		[rasterEncoder setBuffer:backend->pathQueueBuffer offset:0 atIndex:4];
-		[rasterEncoder setBuffer:backend->tileQueueBuffer offset:0 atIndex:5];
-		[rasterEncoder setBuffer:backend->tileOpBuffer offset:0 atIndex:6];
-		[rasterEncoder setBytes:&tileSize length:sizeof(int) atIndex:7];
+		[rasterEncoder setBytes:&tileSize length:sizeof(int) atIndex:4];

 		[rasterEncoder setTexture:backend->outTexture atIndex:0];

@ -270,6 +295,7 @@ void mg_mtl_canvas_destroy(mg_canvas_backend* interface)
 		[backend->pathPipeline release];
 		[backend->segmentPipeline release];
 		[backend->backpropPipeline release];
+		[backend->mergePipeline release];
 		[backend->rasterPipeline release];
 		[backend->blitPipeline release];

@ -284,6 +310,7 @@ void mg_mtl_canvas_destroy(mg_canvas_backend* interface)
 		[backend->tileQueueCountBuffer release];
 		[backend->tileOpBuffer release];
 		[backend->tileOpCountBuffer release];
+		[backend->screenTilesBuffer release];
 	}

 	free(backend);
@ -329,6 +356,7 @@ mg_canvas_backend* mg_mtl_canvas_create(mg_surface surface)
 			id<MTLFunction> pathFunction = [library newFunctionWithName:@"mtl_path_setup"];
 			id<MTLFunction> segmentFunction = [library newFunctionWithName:@"mtl_segment_setup"];
 			id<MTLFunction> backpropFunction = [library newFunctionWithName:@"mtl_backprop"];
+			id<MTLFunction> mergeFunction = [library newFunctionWithName:@"mtl_merge"];
 			id<MTLFunction> rasterFunction = [library newFunctionWithName:@"mtl_raster"];
 			id<MTLFunction> vertexFunction = [library newFunctionWithName:@"mtl_vertex_shader"];
 			id<MTLFunction> fragmentFunction = [library newFunctionWithName:@"mtl_fragment_shader"];
@ -345,6 +373,9 @@ mg_canvas_backend* mg_mtl_canvas_create(mg_surface surface)
 			backend->backpropPipeline = [metalSurface->device newComputePipelineStateWithFunction: backpropFunction
 		                                                                           	error:&error];

+			backend->mergePipeline = [metalSurface->device newComputePipelineStateWithFunction: mergeFunction
+		                                                                           	error:&error];
+
 			backend->rasterPipeline = [metalSurface->device newComputePipelineStateWithFunction: rasterFunction
 		                                                                           	error:&error];

@ -417,8 +448,13 @@ mg_canvas_backend* mg_mtl_canvas_create(mg_surface surface)

 			backend->tileOpCountBuffer = [metalSurface->device newBufferWithLength: sizeof(int)
 			                                                   options: bufferOptions];
-		}

+			int tileSize = MG_MTL_TILE_SIZE;
+			int nTilesX = (int)(frame.w * scale + tileSize - 1)/tileSize;
+			int nTilesY = (int)(frame.h * scale + tileSize - 1)/tileSize;
+			backend->screenTilesBuffer = [metalSurface->device newBufferWithLength: nTilesX*nTilesY*sizeof(int)
+			                                                   options: bufferOptions];
+		}
 	}
 	return((mg_canvas_backend*)backend);
 }
--- a/src/mtl_renderer.metal
+++ b/src/mtl_renderer.metal
@ -166,6 +166,7 @@ kernel void mtl_segment_setup(constant int* elementCount [[buffer(0)]],

 					op->kind = MG_MTL_OP_SEGMENT;
 					op->index = segIndex;
+					op->next = -1;

 					int tileIndex = y*pathQueue->area.z + x;
 					device mg_mtl_tile_queue* tile = &tileQueues[tileIndex];
@ -252,7 +253,7 @@ kernel void mtl_backprop(const device mg_mtl_path_queue* pathQueueBuffer [[buffe
 		rowIndex = atomic_fetch_add_explicit(&nextRowIndex, 1, memory_order_relaxed);
 	}
 }
-/*
+
 kernel void mtl_merge(constant int* pathCount [[buffer(0)]],
                      const device mg_mtl_path* pathBuffer [[buffer(1)]],
                      const device mg_mtl_path_queue* pathQueueBuffer [[buffer(2)]],
@ -266,6 +267,7 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]],
 	int2 tileCoord = int2(threadCoord);
 	int tileIndex = tileCoord.y * gridSize.x + tileCoord.x;
 	device int* nextLink = &screenTilesBuffer[tileIndex];
+	*nextLink = -1;

 	for(int pathIndex = 0; pathIndex < pathCount[0]; pathIndex++)
 	{
@ -281,63 +283,69 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]],
 			const device mg_mtl_tile_queue* tileQueue = &tileQueueBuffer[pathQueue->tileQueues + pathTileIndex];

 			int windingOffset = atomic_load_explicit(&tileQueue->windingOffset, memory_order_relaxed);
-			int opIndex = atomic_load_explicit(&tileQueue->first, memory_order_relaxed);
+			int firstOpIndex = atomic_load_explicit(&tileQueue->first, memory_order_relaxed);

-			if((opIndex != -1) || (windingOffset & 1))
+			if(firstOpIndex == -1)
 			{
-				//NOTE: add path start op (with winding offset)
-				int startOpIndex = atomic_fetch_add_explicit(tileOpCount, 1, memory_order_relaxed);
-				device mg_mtl_tile_op* startOp = &tileOpBuffer[startOpIndex];
-				startOp->kind = MG_MTL_OP_START;
-				startOp->index = pathIndex;
-				startOp->windingOffset = windingOffset;
-
-				if(opIndex == -1)
+				if(windingOffset & 1)
 				{
-					//NOTE: the tile is fully covered by path fill. Insert start op,
-					//      and if the fill color is opaque, trim tile list.
+					//NOTE: tile is full covered. Add path start op (with winding offset).
+					//      Additionally if color is opaque, trim tile list.
+					int pathOpIndex = atomic_fetch_add_explicit(tileOpCount, 1, memory_order_relaxed);
+					device mg_mtl_tile_op* pathOp = &tileOpBuffer[pathOpIndex];
+					pathOp->kind = MG_MTL_OP_START;
+					pathOp->next = -1;
+					pathOp->index = pathIndex;
+					pathOp->windingOffset = windingOffset;
+
 					if(pathBuffer[pathIndex].color.a == 1)
 					{
-						screenTilesBuffer[tileIndex] = startOpIndex;
+						screenTilesBuffer[tileIndex] = pathOpIndex;
 					}
 					else
 					{
-						*nextLink = startOpIndex;
+						*nextLink = pathOpIndex;
 					}
-					nextLink = &startOp->next;
+					nextLink = &pathOp->next;
 				}
-				else
-				{
-					//NOTE: add start op
-					*nextLink = startOpIndex;
-					nextLink = &startOp->next;
+				// else, tile is fully uncovered, skip path
+			}
+			else
+			{
+				//NOTE: add path start op (with winding offset)
+				int pathOpIndex = atomic_fetch_add_explicit(tileOpCount, 1, memory_order_relaxed);
+				device mg_mtl_tile_op* pathOp = &tileOpBuffer[pathOpIndex];
+				pathOp->kind = MG_MTL_OP_START;
+				pathOp->next = -1;
+				pathOp->index = pathIndex;
+				pathOp->windingOffset = windingOffset;

-					//NOTE: chain path ops to end of tile list
-					device mg_mtl_tile_op* lastOp = &tileOpBuffer[opIndex];
-					*nextLink = opIndex;
-					nextLink = &lastOp->next;
-				}
+				*nextLink = pathOpIndex;
+				nextLink = &pathOp->next;
+
+				//NOTE: chain remaining path ops to end of tile list
+				int lastOpIndex = tileQueue->last;
+				device mg_mtl_tile_op* lastOp = &tileOpBuffer[lastOpIndex];
+				*nextLink = firstOpIndex;
+				nextLink = &lastOp->next;
 			}
 		}
 	}
 }
-*/
-kernel void mtl_raster(constant int* pathCount [[buffer(0)]],
-                       const device mg_mtl_path* pathBuffer [[buffer(1)]],
-                       constant int* segCount [[buffer(2)]],
+
+kernel void mtl_raster(const device int* screenTilesBuffer [[buffer(0)]],
+                       const device mg_mtl_tile_op* tileOpBuffer [[buffer(1)]],
+                       const device mg_mtl_path* pathBuffer [[buffer(2)]],
                       const device mg_mtl_segment* segmentBuffer [[buffer(3)]],
-                       const device mg_mtl_path_queue* pathQueueBuffer [[buffer(4)]],
-                       const device mg_mtl_tile_queue* tileQueueBuffer [[buffer(5)]],
-                       const device mg_mtl_tile_op* tileOpBuffer [[buffer(6)]],
-                       constant int* tileSize [[buffer(7)]],
+                       constant int* tileSize [[buffer(4)]],
                       texture2d<float, access::write> outTexture [[texture(0)]],
                       uint2 threadCoord [[thread_position_in_grid]],
                       uint2 gridSize [[threads_per_grid]])
 {
 	int2 pixelCoord = int2(threadCoord);
 	int2 tileCoord = pixelCoord / tileSize[0];
-
-	float4 color = float4(0, 0, 0, 0);
+	int nTilesX = (int(gridSize.x) + tileSize[0] - 1)/tileSize[0];
+	int tileIndex = tileCoord.y * nTilesX + tileCoord.x;

 	if( (pixelCoord.x % tileSize[0] == 0)
 	  ||(pixelCoord.y % tileSize[0] == 0))
@ -346,83 +354,80 @@ kernel void mtl_raster(constant int* pathCount [[buffer(0)]],
 		return;
 	}

-	for(int pathIndex = 0; pathIndex < pathCount[0]; pathIndex++)
+	float4 color = float4(0, 0, 0, 0);
+	int pathIndex = 0;
+	int winding = 0;
+	int opIndex = screenTilesBuffer[tileIndex];
+
+	while(opIndex != -1)
 	{
-		const device mg_mtl_path_queue* pathQueue = &pathQueueBuffer[pathIndex];
-		int2 pathTileCoord = tileCoord - pathQueue->area.xy;
+		const device mg_mtl_tile_op* op = &tileOpBuffer[opIndex];

-		if(  pathTileCoord.x >= 0
-		  && pathTileCoord.x < pathQueue->area.z
-		  && pathTileCoord.y >= 0
-		  && pathTileCoord.y < pathQueue->area.w)
+		if(op->kind == MG_MTL_OP_START)
 		{
-			int pathTileIndex = pathTileCoord.y * pathQueue->area.z + pathTileCoord.x;
-			const device mg_mtl_tile_queue* tileQueue = &tileQueueBuffer[pathQueue->tileQueues + pathTileIndex];
-
-			int winding = atomic_load_explicit(&tileQueue->windingOffset, memory_order_relaxed);
-
-			int opIndex = atomic_load_explicit(&tileQueue->first, memory_order_relaxed);
-			while(opIndex != -1)
-			{
-				const device mg_mtl_tile_op* op = &tileOpBuffer[opIndex];
-
-				if(op->kind == MG_MTL_OP_SEGMENT)
-				{
-					const device mg_mtl_segment* seg = &segmentBuffer[op->index];
-
-					if(pixelCoord.y >= seg->box.y && pixelCoord.y < seg->box.w)
-					{
-						if(pixelCoord.x < seg->box.x)
-						{
-							winding += seg->windingIncrement;
-						}
-						else if(pixelCoord.x < seg->box.z)
-						{
-							/*TODO: if pixel is on opposite size of diagonal as curve on the right, increment
-					       			otherwise if not on same size of diagonal as curve, do implicit test
-							*/
-							float alpha = (seg->box.w - seg->box.y)/(seg->box.z - seg->box.x);
-							float ofs = seg->box.w - seg->box.y;
-							float dx = pixelCoord.x - seg->box.x;
-							float dy = pixelCoord.y - seg->box.y;
-
-							if( (seg->config == MG_MTL_BR && dy > alpha*dx)
-				  			||(seg->config == MG_MTL_TR && dy < ofs - alpha*dx))
-							{
-								winding += seg->windingIncrement;
-							}
-							else if(  !(seg->config == MG_MTL_TL && dy < alpha*dx)
-				       			&& !(seg->config == MG_MTL_BL && dy > ofs - alpha*dx))
-							{
-								//Need implicit test, but for lines, we only have config BR or TR, so the test is always negative for now
-							}
-						}
-					}
-
-					if(op->crossRight)
-					{
-						if( (seg->config == MG_MTL_BR || seg->config == MG_MTL_TL)
-						  &&(pixelCoord.y >= seg->box.w))
-						{
-							winding += seg->windingIncrement;
-						}
-						else if( (seg->config == MG_MTL_BL || seg->config == MG_MTL_TR)
-						       &&(pixelCoord.y >= seg->box.y))
-						{
-							winding -= seg->windingIncrement;
-						}
-					}
-				}
-				opIndex = op->next;
-			}
-
 			if(winding & 1)
 			{
 				float4 pathColor = pathBuffer[pathIndex].color;
 				pathColor.rgb *= pathColor.a;
 				color = color*(1-pathColor.a) + pathColor;
 			}
+			pathIndex = op->index;
+			winding = op->windingOffset;
 		}
+		else if(op->kind == MG_MTL_OP_SEGMENT)
+		{
+			const device mg_mtl_segment* seg = &segmentBuffer[op->index];
+
+			if(pixelCoord.y >= seg->box.y && pixelCoord.y < seg->box.w)
+			{
+				if(pixelCoord.x < seg->box.x)
+				{
+					winding += seg->windingIncrement;
+				}
+				else if(pixelCoord.x < seg->box.z)
+				{
+					/*TODO: if pixel is on opposite size of diagonal as curve on the right, increment
+					       	otherwise if not on same size of diagonal as curve, do implicit test
+					*/
+					float alpha = (seg->box.w - seg->box.y)/(seg->box.z - seg->box.x);
+					float ofs = seg->box.w - seg->box.y;
+					float dx = pixelCoord.x - seg->box.x;
+					float dy = pixelCoord.y - seg->box.y;
+
+					if( (seg->config == MG_MTL_BR && dy > alpha*dx)
+				  	||(seg->config == MG_MTL_TR && dy < ofs - alpha*dx))
+					{
+						winding += seg->windingIncrement;
+					}
+					else if(  !(seg->config == MG_MTL_TL && dy < alpha*dx)
+				       	&& !(seg->config == MG_MTL_BL && dy > ofs - alpha*dx))
+					{
+						//Need implicit test, but for lines, we only have config BR or TR, so the test is always negative for now
+					}
+				}
+			}
+
+			if(op->crossRight)
+			{
+				if( (seg->config == MG_MTL_BR || seg->config == MG_MTL_TL)
+						&&(pixelCoord.y >= seg->box.w))
+				{
+					winding += seg->windingIncrement;
+				}
+				else if( (seg->config == MG_MTL_BL || seg->config == MG_MTL_TR)
+						     &&(pixelCoord.y >= seg->box.y))
+				{
+					winding -= seg->windingIncrement;
+				}
+			}
+		}
+		opIndex = op->next;
+	}
+	if(winding & 1)
+	{
+		float4 pathColor = pathBuffer[pathIndex].color;
+		pathColor.rgb *= pathColor.a;
+		color = color*(1-pathColor.a) + pathColor;
 	}

 	outTexture.write(color, uint2(pixelCoord));