[mtl canvas, wip] binning segments to tile queues

This commit is contained in:
Martin Fouilleul 2023-03-28 17:58:23 +02:00
parent c71bc5be98
commit c4415aaeea
4 changed files with 338 additions and 75 deletions

View File

@ -83,8 +83,8 @@ int main()
mg_fill();
mg_move_to(200, 100);
mg_line_to(400, 100);
mg_line_to(400, 200);
mg_line_to(410, 100);
mg_line_to(410, 200);
mg_line_to(200, 200);
mg_close_path();
mg_set_color_rgba(0, 1, 0, 1);

View File

@ -11,36 +11,10 @@
#include<simd/simd.h>
typedef enum {
MG_MTL_LINE = 1,
} mg_mtl_seg_kind;
typedef enum {
MG_MTL_BL, // curve on bottom left
MG_MTL_BR, // curve on bottom right
MG_MTL_TL, // curve on top left
MG_MTL_TR // curve on top right
} mg_mtl_seg_config;
typedef enum {
MG_MTL_FILL,
} mg_mtl_cmd;
typedef struct mg_mtl_path_elt
{
int pathIndex;
mg_mtl_seg_kind kind;
vector_float2 p[4];
} mg_mtl_path_elt;
typedef struct mg_mtl_segment
{
int pathIndex;
mg_mtl_seg_config config; //TODO pack these
int windingIncrement;
vector_float4 box;
} mg_mtl_segment;
typedef struct mg_mtl_path
{
mg_mtl_cmd cmd;
@ -49,5 +23,55 @@ typedef struct mg_mtl_path
} mg_mtl_path;
typedef enum {
MG_MTL_LINE = 1,
} mg_mtl_seg_kind;
typedef struct mg_mtl_path_elt
{
int pathIndex;
mg_mtl_seg_kind kind;
vector_float2 p[4];
} mg_mtl_path_elt;
typedef enum {
MG_MTL_BL, // curve on bottom left
MG_MTL_BR, // curve on bottom right
MG_MTL_TL, // curve on top left
MG_MTL_TR // curve on top right
} mg_mtl_seg_config;
typedef struct mg_mtl_segment
{
int pathIndex;
mg_mtl_seg_config config; //TODO pack these
int windingIncrement;
vector_float4 box;
} mg_mtl_segment;
typedef struct mg_mtl_path_queue
{
vector_int4 area;
int tileQueues;
} mg_mtl_path_queue;
#ifdef __METAL_VERSION__
using namespace metal;
#endif
typedef enum { MG_MTL_OP_SEGMENT } mg_mtl_tile_op_kind;
typedef struct mg_mtl_tile_op
{
mg_mtl_tile_op_kind kind;
int index;
int next;
} mg_mtl_tile_op;
typedef struct mg_mtl_tile_queue
{
atomic_int first;
} mg_mtl_tile_queue;
#endif //__MTL_RENDERER_H_

View File

@ -25,6 +25,7 @@ typedef struct mg_mtl_canvas_backend
mg_canvas_backend interface;
mg_surface surface;
id<MTLComputePipelineState> pathPipeline;
id<MTLComputePipelineState> segmentPipeline;
id<MTLComputePipelineState> rasterPipeline;
id<MTLRenderPipelineState> blitPipeline;
@ -35,6 +36,11 @@ typedef struct mg_mtl_canvas_backend
id<MTLBuffer> elementBuffer;
id<MTLBuffer> segmentCountBuffer;
id<MTLBuffer> segmentBuffer;
id<MTLBuffer> pathQueueBuffer;
id<MTLBuffer> tileQueueBuffer;
id<MTLBuffer> tileQueueCountBuffer;
id<MTLBuffer> tileOpBuffer;
id<MTLBuffer> tileOpCountBuffer;
} mg_mtl_canvas_backend;
@ -88,6 +94,9 @@ void mg_mtl_canvas_render(mg_canvas_backend* interface,
vec2 p3 = mg_mat2x3_mul(primitive->attributes.transform, elt->p[0]);
currentPos = elt->p[0];
mg_update_path_extents(&pathExtents, p0);
mg_update_path_extents(&pathExtents, p3);
mg_mtl_path_elt* mtlElt = &elementBufferData[mtlEltCount];
mtlEltCount++;
@ -123,12 +132,39 @@ void mg_mtl_canvas_render(mg_canvas_backend* interface,
mp_rect frame = mg_surface_get_frame(backend->surface);
f32 scale = surface->mtlLayer.contentsScale;
vec2 viewportSize = {frame.w * scale, frame.h * scale};
int tileSize = 16;
//NOTE: encode GPU commands
@autoreleasepool
{
mg_mtl_surface_acquire_command_buffer(surface);
//NOTE: clear counters
id<MTLBlitCommandEncoder> blitEncoder = [surface->commandBuffer blitCommandEncoder];
blitEncoder.label = @"clear counters";
[blitEncoder fillBuffer: backend->segmentCountBuffer range: NSMakeRange(0, sizeof(int)) value: 0];
[blitEncoder fillBuffer: backend->tileQueueCountBuffer range: NSMakeRange(0, sizeof(int)) value: 0];
[blitEncoder fillBuffer: backend->tileOpCountBuffer range: NSMakeRange(0, sizeof(int)) value: 0];
[blitEncoder endEncoding];
//NOTE: path setup pass
id<MTLComputeCommandEncoder> pathEncoder = [surface->commandBuffer computeCommandEncoder];
pathEncoder.label = @"path pass";
[pathEncoder setComputePipelineState: backend->pathPipeline];
[pathEncoder setBytes:&pathCount length:sizeof(int) atIndex:0];
[pathEncoder setBuffer:backend->pathBuffer offset:0 atIndex:1];
[pathEncoder setBuffer:backend->pathQueueBuffer offset:0 atIndex:2];
[pathEncoder setBuffer:backend->tileQueueBuffer offset:0 atIndex:3];
[pathEncoder setBuffer:backend->tileQueueCountBuffer offset:0 atIndex:4];
[pathEncoder setBytes:&tileSize length:sizeof(int) atIndex:5];
MTLSize pathGridSize = MTLSizeMake(pathCount, 1, 1);
MTLSize pathGroupSize = MTLSizeMake(64, 1, 1);
[pathEncoder dispatchThreads: pathGridSize threadsPerThreadgroup: pathGroupSize];
[pathEncoder endEncoding];
//NOTE: segment setup pass
id<MTLComputeCommandEncoder> segmentEncoder = [surface->commandBuffer computeCommandEncoder];
segmentEncoder.label = @"segment pass";
@ -138,12 +174,16 @@ void mg_mtl_canvas_render(mg_canvas_backend* interface,
[segmentEncoder setBuffer:backend->elementBuffer offset:0 atIndex:1];
[segmentEncoder setBuffer:backend->segmentCountBuffer offset:0 atIndex:2];
[segmentEncoder setBuffer:backend->segmentBuffer offset:0 atIndex:3];
[segmentEncoder setBuffer:backend->pathQueueBuffer offset:0 atIndex:4];
[segmentEncoder setBuffer:backend->tileQueueBuffer offset:0 atIndex:5];
[segmentEncoder setBuffer:backend->tileOpBuffer offset:0 atIndex:6];
[segmentEncoder setBuffer:backend->tileOpCountBuffer offset:0 atIndex:7];
[segmentEncoder setBytes:&tileSize length:sizeof(int) atIndex:8];
MTLSize segmentGridSize = MTLSizeMake(mtlEltCount, 1, 1);
MTLSize segmentGroupSize = MTLSizeMake(64, 1, 1);
[segmentEncoder dispatchThreads: segmentGridSize threadsPerThreadgroup: segmentGroupSize];
[segmentEncoder endEncoding];
//NOTE: raster pass
@ -155,6 +195,10 @@ void mg_mtl_canvas_render(mg_canvas_backend* interface,
[rasterEncoder setBuffer:backend->pathBuffer offset:0 atIndex:1];
[rasterEncoder setBuffer:backend->segmentCountBuffer offset:0 atIndex:2];
[rasterEncoder setBuffer:backend->segmentBuffer offset:0 atIndex:3];
[rasterEncoder setBuffer:backend->pathQueueBuffer offset:0 atIndex:4];
[rasterEncoder setBuffer:backend->tileQueueBuffer offset:0 atIndex:5];
[rasterEncoder setBuffer:backend->tileOpBuffer offset:0 atIndex:6];
[rasterEncoder setBytes:&tileSize length:sizeof(int) atIndex:7];
[rasterEncoder setTexture:backend->outTexture atIndex:0];
@ -195,18 +239,30 @@ void mg_mtl_canvas_destroy(mg_canvas_backend* interface)
@autoreleasepool
{
[backend->pathPipeline release];
[backend->segmentPipeline release];
[backend->rasterPipeline release];
[backend->blitPipeline release];
[backend->pathBuffer release];
[backend->elementBuffer release];
[backend->segmentCountBuffer release];
[backend->segmentBuffer release];
[backend->tileQueueBuffer release];
[backend->tileQueueCountBuffer release];
[backend->tileOpBuffer release];
[backend->tileOpCountBuffer release];
}
free(backend);
}
const u32 MG_MTL_PATH_BUFFER_SIZE = (4<<20)*sizeof(mg_mtl_path),
MG_MTL_ELEMENT_BUFFER_SIZE = (4<<20)*sizeof(mg_mtl_path_elt),
MG_MTL_SEGMENT_BUFFER_SIZE = (4<<20)*sizeof(mg_mtl_segment);
const u32 MG_MTL_PATH_BUFFER_SIZE = (4<<20)*sizeof(mg_mtl_path),
MG_MTL_ELEMENT_BUFFER_SIZE = (4<<20)*sizeof(mg_mtl_path_elt),
MG_MTL_SEGMENT_BUFFER_SIZE = (4<<20)*sizeof(mg_mtl_segment),
MG_MTL_PATH_QUEUE_BUFFER_SIZE = (4<<20)*sizeof(mg_mtl_path_queue),
MG_MTL_TILE_QUEUE_BUFFER_SIZE = (4<<20)*sizeof(mg_mtl_tile_queue),
MG_MTL_TILE_OP_BUFFER_SIZE = (4<<20)*sizeof(mg_mtl_tile_op);
mg_canvas_backend* mg_mtl_canvas_create(mg_surface surface)
{
@ -238,19 +294,25 @@ mg_canvas_backend* mg_mtl_canvas_create(mg_surface surface)
LOG_ERROR("error : %s\n", errStr);
return(0);
}
id<MTLFunction> segmentFunction = [library newFunctionWithName:@"mtl_segment"];
id<MTLFunction> pathFunction = [library newFunctionWithName:@"mtl_path_setup"];
id<MTLFunction> segmentFunction = [library newFunctionWithName:@"mtl_segment_setup"];
id<MTLFunction> rasterFunction = [library newFunctionWithName:@"mtl_raster"];
id<MTLFunction> vertexFunction = [library newFunctionWithName:@"mtl_vertex_shader"];
id<MTLFunction> fragmentFunction = [library newFunctionWithName:@"mtl_fragment_shader"];
//NOTE: create pipelines
NSError* error = NULL;
backend->rasterPipeline = [metalSurface->device newComputePipelineStateWithFunction: rasterFunction
backend->pathPipeline = [metalSurface->device newComputePipelineStateWithFunction: pathFunction
error:&error];
backend->segmentPipeline = [metalSurface->device newComputePipelineStateWithFunction: segmentFunction
error:&error];
backend->rasterPipeline = [metalSurface->device newComputePipelineStateWithFunction: rasterFunction
error:&error];
MTLRenderPipelineDescriptor *pipelineStateDescriptor = [[MTLRenderPipelineDescriptor alloc] init];
pipelineStateDescriptor.label = @"blit pipeline";
pipelineStateDescriptor.vertexFunction = vertexFunction;
@ -296,6 +358,22 @@ mg_canvas_backend* mg_mtl_canvas_create(mg_surface surface)
backend->segmentCountBuffer = [metalSurface->device newBufferWithLength: sizeof(int)
options: bufferOptions];
backend->pathQueueBuffer = [metalSurface->device newBufferWithLength: MG_MTL_PATH_QUEUE_BUFFER_SIZE
options: bufferOptions];
backend->tileQueueBuffer = [metalSurface->device newBufferWithLength: MG_MTL_TILE_QUEUE_BUFFER_SIZE
options: bufferOptions];
backend->tileQueueCountBuffer = [metalSurface->device newBufferWithLength: sizeof(int)
options: bufferOptions];
backend->tileOpBuffer = [metalSurface->device newBufferWithLength: MG_MTL_TILE_OP_BUFFER_SIZE
options: bufferOptions];
backend->tileOpCountBuffer = [metalSurface->device newBufferWithLength: sizeof(int)
options: bufferOptions];
}
}

View File

@ -7,11 +7,89 @@
using namespace metal;
kernel void mtl_segment(constant int* elementCount [[buffer(0)]],
const device mg_mtl_path_elt* elementBuffer [[buffer(1)]],
device atomic_int* segmentCount [[buffer(2)]],
device mg_mtl_segment* segmentBuffer [[buffer(3)]],
uint eltIndex [[thread_position_in_grid]])
kernel void mtl_path_setup(constant int* pathCount [[buffer(0)]],
const device mg_mtl_path* pathBuffer [[buffer(1)]],
device mg_mtl_path_queue* pathQueueBuffer [[buffer(2)]],
device mg_mtl_tile_queue* tileQueueBuffer [[buffer(3)]],
device atomic_int* tileQueueCount [[buffer(4)]],
constant int* tileSize [[buffer(5)]],
uint pathIndex [[thread_position_in_grid]])
{
const device mg_mtl_path* path = &pathBuffer[pathIndex];
int2 firstTile = int2(path->box.xy)/tileSize[0];
int2 lastTile = max(firstTile, int2(path->box.zw)/tileSize[0]);
int nTilesX = lastTile.x - firstTile.x + 1;
int nTilesY = lastTile.y - firstTile.y + 1;
int tileCount = nTilesX * nTilesY;
int tileQueuesIndex = atomic_fetch_add_explicit(tileQueueCount, tileCount, memory_order_relaxed);
pathQueueBuffer[pathIndex].area = int4(firstTile.x, firstTile.y, nTilesX, nTilesY);
pathQueueBuffer[pathIndex].tileQueues = tileQueuesIndex;
device mg_mtl_tile_queue* tileQueues = &tileQueueBuffer[tileQueuesIndex];
for(int i=0; i<tileCount; i++)
{
atomic_store_explicit(&tileQueues[i].first, -1, memory_order_relaxed);
}
}
bool mtl_is_left_of_segment(float2 p, const device mg_mtl_segment* seg)
{
//NOTE: test is p is on the left of a curve segment.
/*WARN: if p is outside the bounding box of segment, we still consider it left from
the segment if it is left of its diagonal. This is done so that we can test
if tile corners are on the same side of the curve during tiling (corner are
not necessarily inside the bounding box, even if the tile itself overlaps
the curve).
During fine rasterization, this function need to be guarded by a the following
check: if(p.y >= seg->box.y && p.y < seg->box.w) {...}
*/
bool isLeft = false;
//NOTE: if point is left of curve bounding box, it is left of curve
if(p.x < seg->box.x)
{
isLeft = true;
}
else if(p.x < seg->box.z)
{
/*NOTE: if point and curve are on opposite sides of diagonal and on the left of diagonal,
it is left from the curve
otherwise if point and curve are on the same side of diagonal, do implicit test
*/
float alpha = (seg->box.w - seg->box.y)/(seg->box.z - seg->box.x);
float ofs = seg->box.w - seg->box.y;
float dx = p.x - seg->box.x;
float dy = p.y - seg->box.y;
if( (seg->config == MG_MTL_BR && dy > alpha*dx)
||(seg->config == MG_MTL_TR && dy < ofs - alpha*dx))
{
isLeft = true;
}
else if( !(seg->config == MG_MTL_TL && dy < alpha*dx)
&& !(seg->config == MG_MTL_BL && dy > ofs - alpha*dx))
{
//Need implicit test, but for lines, we only have config BR or TR, so the test is always negative for now
}
}
return(isLeft);
}
kernel void mtl_segment_setup(constant int* elementCount [[buffer(0)]],
const device mg_mtl_path_elt* elementBuffer [[buffer(1)]],
device atomic_int* segmentCount [[buffer(2)]],
device mg_mtl_segment* segmentBuffer [[buffer(3)]],
const device mg_mtl_path_queue* pathQueueBuffer [[buffer(4)]],
device mg_mtl_tile_queue* tileQueueBuffer [[buffer(5)]],
device mg_mtl_tile_op* tileOpBuffer [[buffer(6)]],
device atomic_int* tileOpCount [[buffer(7)]],
constant int* tileSize [[buffer(8)]],
uint eltIndex [[thread_position_in_grid]])
{
const device mg_mtl_path_elt* elt = &elementBuffer[eltIndex];
float2 p0 = elt->p[0];
@ -40,6 +118,51 @@ kernel void mtl_segment(constant int* elementCount [[buffer(0)]],
}
seg->windingIncrement = (p3.y > p0.y)? 1 : -1;
//NOTE: add segment index to the queues of tiles it overlaps with
const device mg_mtl_path_queue* pathQueue = &pathQueueBuffer[seg->pathIndex];
device mg_mtl_tile_queue* tileQueues = &tileQueueBuffer[pathQueue->tileQueues];
int4 coveredTiles = int4(seg->box)/tileSize[0];
int xMin = max(0, coveredTiles.x - pathQueue->area.x);
int yMin = max(0, coveredTiles.y - pathQueue->area.y);
int xMax = min(coveredTiles.z - pathQueue->area.x, pathQueue->area.z-1);
int yMax = min(coveredTiles.w - pathQueue->area.y, pathQueue->area.w-1);
for(int y = yMin; y <= yMax; y++)
{
for(int x = xMin ; x <= xMax; x++)
{
float4 tileBox = (float4){float(x + pathQueue->area.x),
float(y + pathQueue->area.y),
float(x + pathQueue->area.x + 1),
float(y + pathQueue->area.y + 1)} * float(tileSize[0]);
//NOTE: select two corners of tile box to test against the curve
float2 testPoint[2] = {{tileBox.x, tileBox.y},
{tileBox.z, tileBox.w}};
if(seg->config == MG_MTL_BR || seg->config == MG_MTL_TL)
{
testPoint[0] = (float2){tileBox.x, tileBox.w};
testPoint[1] = (float2){tileBox.z, tileBox.y};
}
bool test0 = mtl_is_left_of_segment(testPoint[0], seg);
bool test1 = mtl_is_left_of_segment(testPoint[1], seg);
//NOTE: the curve overlaps the tile only if test points are on opposite sides of segment
if(test0 != test1)
{
int tileOpIndex = atomic_fetch_add_explicit(tileOpCount, 1, memory_order_relaxed);
device mg_mtl_tile_op* op = &tileOpBuffer[tileOpIndex];
op->kind = MG_MTL_OP_SEGMENT;
op->index = segIndex;
int tileIndex = y*pathQueue->area.z + x;
op->next = atomic_exchange_explicit(&tileQueues[tileIndex].first, tileOpIndex, memory_order_relaxed);
}
}
}
}
}
@ -47,58 +170,96 @@ kernel void mtl_raster(constant int* pathCount [[buffer(0)]],
const device mg_mtl_path* pathBuffer [[buffer(1)]],
constant int* segCount [[buffer(2)]],
const device mg_mtl_segment* segmentBuffer [[buffer(3)]],
const device mg_mtl_path_queue* pathQueueBuffer [[buffer(4)]],
const device mg_mtl_tile_queue* tileQueueBuffer [[buffer(5)]],
const device mg_mtl_tile_op* tileOpBuffer [[buffer(6)]],
constant int* tileSize [[buffer(7)]],
texture2d<float, access::write> outTexture [[texture(0)]],
uint2 threadCoord [[thread_position_in_grid]])
uint2 threadCoord [[thread_position_in_grid]],
uint2 gridSize [[threads_per_grid]])
{
int2 pixelCoord = int2(threadCoord);
int2 tileCoord = pixelCoord / tileSize[0];
float4 color = float4(0, 0, 0, 0);
int currentPath = 0;
int winding = 0;
for(int segIndex = 0; segIndex < segCount[0]; segIndex++)
if( (pixelCoord.x % tileSize[0] == 0)
||(pixelCoord.y % tileSize[0] == 0))
{
const device mg_mtl_segment* seg = &segmentBuffer[segIndex];
outTexture.write(float4(0, 0, 0, 1), uint2(pixelCoord));
return;
}
if(seg->pathIndex != currentPath)
for(int pathIndex = 0; pathIndex < pathCount[0]; pathIndex++)
{
const device mg_mtl_path_queue* pathQueue = &pathQueueBuffer[pathIndex];
int2 pathTileCoord = tileCoord - pathQueue->area.xy;
if( pathTileCoord.x >= 0
&& pathTileCoord.x < pathQueue->area.z
&& pathTileCoord.y >= 0
&& pathTileCoord.y < pathQueue->area.w)
{
//depending on winding number, update color
if(winding & 1)
{
float4 pathColor = pathBuffer[currentPath].color;
pathColor.rgb *= pathColor.a;
color = color*(1-pathColor.a) + pathColor;
}
currentPath = seg->pathIndex;
winding = 0;
}
int pathTileIndex = pathTileCoord.y * pathQueue->area.z + pathTileCoord.x;
const device mg_mtl_tile_queue* tileQueue = &tileQueueBuffer[pathQueue->tileQueues + pathTileIndex];
if(pixelCoord.y >= seg->box.y && pixelCoord.y < seg->box.w)
{
if(pixelCoord.x < seg->box.x)
int opIndex = atomic_load_explicit(&tileQueue->first, memory_order_relaxed);
while(opIndex != -1)
{
winding += seg->windingIncrement;
}
else if(pixelCoord.x < seg->box.z)
{
/*TODO: if pixel is on opposite size of diagonal as curve on the right, increment
otherwise if not on same size of diagonal as curve, do implicit test
*/
float alpha = (seg->box.w - seg->box.y)/(seg->box.z - seg->box.x);
float ofs = seg->box.w - seg->box.y;
float dx = pixelCoord.x - seg->box.x;
float dy = pixelCoord.y - seg->box.y;
//outTexture.write(float4(0, 0, 1, 1), uint2(pixelCoord));
//return;
if( (seg->config == MG_MTL_BR && dy > alpha*dx)
||(seg->config == MG_MTL_TR && dy < ofs - alpha*dx))
const device mg_mtl_tile_op* op = &tileOpBuffer[opIndex];
if(op->kind == MG_MTL_OP_SEGMENT)
{
winding += seg->windingIncrement;
}
else if( !(seg->config == MG_MTL_TL && dy < alpha*dx)
&& !(seg->config == MG_MTL_BL && dy > ofs - alpha*dx))
{
//Need implicit test, but for lines, we only have config BR or TR, so the test is always negative for now
const device mg_mtl_segment* seg = &segmentBuffer[op->index];
if(seg->pathIndex != currentPath)
{
//depending on winding number, update color
if(winding & 1)
{
float4 pathColor = pathBuffer[currentPath].color;
pathColor.rgb *= pathColor.a;
color = color*(1-pathColor.a) + pathColor;
}
currentPath = seg->pathIndex;
winding = 0;
}
if(pixelCoord.y >= seg->box.y && pixelCoord.y < seg->box.w)
{
if(pixelCoord.x < seg->box.x)
{
winding += seg->windingIncrement;
}
else if(pixelCoord.x < seg->box.z)
{
/*TODO: if pixel is on opposite size of diagonal as curve on the right, increment
otherwise if not on same size of diagonal as curve, do implicit test
*/
float alpha = (seg->box.w - seg->box.y)/(seg->box.z - seg->box.x);
float ofs = seg->box.w - seg->box.y;
float dx = pixelCoord.x - seg->box.x;
float dy = pixelCoord.y - seg->box.y;
if( (seg->config == MG_MTL_BR && dy > alpha*dx)
||(seg->config == MG_MTL_TR && dy < ofs - alpha*dx))
{
winding += seg->windingIncrement;
}
else if( !(seg->config == MG_MTL_TL && dy < alpha*dx)
&& !(seg->config == MG_MTL_BL && dy > ofs - alpha*dx))
{
//Need implicit test, but for lines, we only have config BR or TR, so the test is always negative for now
}
}
}
}
opIndex = op->next;
}
}
}