diff --git a/src/mtl_renderer.m b/src/mtl_renderer.m index abbe27e..ad9531d 100644 --- a/src/mtl_renderer.m +++ b/src/mtl_renderer.m @@ -68,6 +68,9 @@ typedef struct mg_mtl_canvas_backend vec4 pathScreenExtents; vec4 pathUserExtents; + int maxTileQueueCount; + int maxSegmentCount; + } mg_mtl_canvas_backend; typedef struct mg_mtl_image_data @@ -145,16 +148,19 @@ void mg_mtl_canvas_encode_element(mg_mtl_canvas_backend* backend, mg_path_elt_ty switch(kind) { case MG_PATH_LINE: + backend->maxSegmentCount += 1; elt->kind = MG_MTL_LINE; count = 2; break; case MG_PATH_QUADRATIC: + backend->maxSegmentCount += 3; elt->kind = MG_MTL_QUADRATIC; count = 3; break; case MG_PATH_CUBIC: + backend->maxSegmentCount += 7; elt->kind = MG_MTL_CUBIC; count = 4; break; @@ -244,6 +250,10 @@ void mg_mtl_encode_path(mg_mtl_canvas_backend* backend, mg_primitive* primitive, simd_make_float3(uvTransform.m[1]/scale, uvTransform.m[4]/scale, 0), simd_make_float3(uvTransform.m[2], uvTransform.m[5], 1)); } + + int nTilesX = ((path->box.z - path->box.x)*scale - 1) / MG_MTL_TILE_SIZE + 1; + int nTilesY = ((path->box.w - path->box.y)*scale - 1) / MG_MTL_TILE_SIZE + 1; + backend->maxTileQueueCount += (nTilesX * nTilesY); } bool mg_intersect_hull_legs(vec2 p0, vec2 p1, vec2 p2, vec2 p3, vec2* intersection) @@ -899,6 +909,27 @@ void mg_mtl_render_stroke(mg_mtl_canvas_backend* backend, } +void mg_mtl_grow_buffer_if_needed(mg_mtl_canvas_backend* backend, id* buffer, u64 wantedSize) +{ + u64 bufferSize = [(*buffer) length]; + if(bufferSize < wantedSize) + { + int newSize = wantedSize * 1.2; + + @autoreleasepool + { + //NOTE: MTLBuffers are retained by the command buffer, so we don't risk deallocating while the buffer is in use + [*buffer release]; + *buffer = nil; + + id device = backend->surface->device; + MTLResourceOptions bufferOptions = MTLResourceStorageModePrivate; + + *buffer = [device newBufferWithLength: newSize options: bufferOptions]; + } + } +} + void mg_mtl_render_batch(mg_mtl_canvas_backend* backend, mg_mtl_surface* surface, mg_image_data* image, @@ -910,13 +941,21 @@ void mg_mtl_render_batch(mg_mtl_canvas_backend* backend, { int pathBufferOffset = backend->pathBatchStart * sizeof(mg_mtl_path); int elementBufferOffset = backend->eltBatchStart * sizeof(mg_mtl_path_elt); - int pathCount = backend->pathCount - backend->pathBatchStart; - int eltCount = backend->eltCount - backend->eltBatchStart; + int pathCount = backend->pathCount - backend->pathBatchStart; + int eltCount = backend->eltCount - backend->eltBatchStart; + + //NOTE: update intermediate buffers sizes if needed + + mg_mtl_grow_buffer_if_needed(backend, &backend->pathQueueBuffer, pathCount * sizeof(mg_mtl_path_queue)); + mg_mtl_grow_buffer_if_needed(backend, &backend->tileQueueBuffer, backend->maxTileQueueCount * sizeof(mg_mtl_tile_queue)); + mg_mtl_grow_buffer_if_needed(backend, &backend->segmentBuffer, backend->maxSegmentCount * sizeof(mg_mtl_segment)); + mg_mtl_grow_buffer_if_needed(backend, &backend->screenTilesBuffer, nTilesX * nTilesY * sizeof(mg_mtl_screen_tile)); + mg_mtl_grow_buffer_if_needed(backend, &backend->tileOpBuffer, backend->maxSegmentCount * 30 * sizeof(mg_mtl_tile_op)); //NOTE: encode GPU commands @autoreleasepool { - //NOTE: create output texture + //NOTE: clear output texture MTLRenderPassDescriptor* clearDescriptor = [MTLRenderPassDescriptor renderPassDescriptor]; clearDescriptor.colorAttachments[0].texture = backend->outTexture; clearDescriptor.colorAttachments[0].loadAction = MTLLoadActionClear; @@ -941,13 +980,16 @@ void mg_mtl_render_batch(mg_mtl_canvas_backend* backend, pathEncoder.label = @"path pass"; [pathEncoder setComputePipelineState: backend->pathPipeline]; + int tileQueueMax = [backend->tileQueueBuffer length] / sizeof(mg_mtl_tile_queue); + [pathEncoder setBytes:&pathCount length:sizeof(int) atIndex:0]; [pathEncoder setBuffer:backend->pathBuffer[backend->bufferIndex] offset:pathBufferOffset atIndex:1]; [pathEncoder setBuffer:backend->pathQueueBuffer offset:0 atIndex:2]; [pathEncoder setBuffer:backend->tileQueueBuffer offset:0 atIndex:3]; [pathEncoder setBuffer:backend->tileQueueCountBuffer offset:0 atIndex:4]; - [pathEncoder setBytes:&tileSize length:sizeof(int) atIndex:5]; - [pathEncoder setBytes:&scale length:sizeof(int) atIndex:6]; + [pathEncoder setBytes:&tileQueueMax length:sizeof(int) atIndex:5]; + [pathEncoder setBytes:&tileSize length:sizeof(int) atIndex:6]; + [pathEncoder setBytes:&scale length:sizeof(int) atIndex:7]; MTLSize pathGridSize = MTLSizeMake(pathCount, 1, 1); MTLSize pathGroupSize = MTLSizeMake([backend->pathPipeline maxTotalThreadsPerThreadgroup], 1, 1); @@ -960,6 +1002,9 @@ void mg_mtl_render_batch(mg_mtl_canvas_backend* backend, segmentEncoder.label = @"segment pass"; [segmentEncoder setComputePipelineState: backend->segmentPipeline]; + int tileOpMax = [backend->tileOpBuffer length] / sizeof(mg_mtl_tile_op); + int segmentMax = [backend->segmentBuffer length] / sizeof(mg_mtl_segment); + [segmentEncoder setBytes:&eltCount length:sizeof(int) atIndex:0]; [segmentEncoder setBuffer:backend->elementBuffer[backend->bufferIndex] offset:elementBufferOffset atIndex:1]; [segmentEncoder setBuffer:backend->segmentCountBuffer offset:0 atIndex:2]; @@ -968,10 +1013,12 @@ void mg_mtl_render_batch(mg_mtl_canvas_backend* backend, [segmentEncoder setBuffer:backend->tileQueueBuffer offset:0 atIndex:5]; [segmentEncoder setBuffer:backend->tileOpBuffer offset:0 atIndex:6]; [segmentEncoder setBuffer:backend->tileOpCountBuffer offset:0 atIndex:7]; - [segmentEncoder setBytes:&tileSize length:sizeof(int) atIndex:8]; - [segmentEncoder setBytes:&scale length:sizeof(int) atIndex:9]; - [segmentEncoder setBuffer:backend->logBuffer[backend->bufferIndex] offset:0 atIndex:10]; - [segmentEncoder setBuffer:backend->logOffsetBuffer[backend->bufferIndex] offset:0 atIndex:11]; + [segmentEncoder setBytes:&tileOpMax length:sizeof(int) atIndex:8]; + [segmentEncoder setBytes:&segmentMax length:sizeof(int) atIndex:9]; + [segmentEncoder setBytes:&tileSize length:sizeof(int) atIndex:10]; + [segmentEncoder setBytes:&scale length:sizeof(int) atIndex:11]; + [segmentEncoder setBuffer:backend->logBuffer[backend->bufferIndex] offset:0 atIndex:12]; + [segmentEncoder setBuffer:backend->logOffsetBuffer[backend->bufferIndex] offset:0 atIndex:13]; MTLSize segmentGridSize = MTLSizeMake(eltCount, 1, 1); MTLSize segmentGroupSize = MTLSizeMake([backend->segmentPipeline maxTotalThreadsPerThreadgroup], 1, 1); @@ -1008,10 +1055,11 @@ void mg_mtl_render_batch(mg_mtl_canvas_backend* backend, [mergeEncoder setBuffer:backend->tileOpCountBuffer offset:0 atIndex:5]; [mergeEncoder setBuffer:backend->rasterDispatchBuffer offset:0 atIndex:6]; [mergeEncoder setBuffer:backend->screenTilesBuffer offset:0 atIndex:7]; - [mergeEncoder setBytes:&tileSize length:sizeof(int) atIndex:8]; - [mergeEncoder setBytes:&scale length:sizeof(float) atIndex:9]; - [mergeEncoder setBuffer:backend->logBuffer[backend->bufferIndex] offset:0 atIndex:10]; - [mergeEncoder setBuffer:backend->logOffsetBuffer[backend->bufferIndex] offset:0 atIndex:11]; + [mergeEncoder setBytes:&tileOpMax length:sizeof(int) atIndex:8]; + [mergeEncoder setBytes:&tileSize length:sizeof(int) atIndex:9]; + [mergeEncoder setBytes:&scale length:sizeof(float) atIndex:10]; + [mergeEncoder setBuffer:backend->logBuffer[backend->bufferIndex] offset:0 atIndex:11]; + [mergeEncoder setBuffer:backend->logOffsetBuffer[backend->bufferIndex] offset:0 atIndex:12]; MTLSize mergeGridSize = MTLSizeMake(nTilesX, nTilesY, 1); MTLSize mergeGroupSize = MTLSizeMake(MG_MTL_TILE_SIZE, MG_MTL_TILE_SIZE, 1); @@ -1075,6 +1123,9 @@ void mg_mtl_render_batch(mg_mtl_canvas_backend* backend, backend->pathBatchStart = backend->pathCount; backend->eltBatchStart = backend->eltCount; + + backend->maxSegmentCount = 0; + backend->maxTileQueueCount = 0; } void mg_mtl_canvas_resize(mg_mtl_canvas_backend* backend, vec2 size) @@ -1168,6 +1219,8 @@ void mg_mtl_canvas_render(mg_canvas_backend* interface, backend->pathBatchStart = 0; backend->eltCount = 0; backend->eltBatchStart = 0; + backend->maxSegmentCount = 0; + backend->maxTileQueueCount = 0; //NOTE: encode and render batches vec2 currentPos = {0}; @@ -1350,12 +1403,13 @@ void mg_mtl_canvas_image_upload_region(mg_canvas_backend* backendInterface, mg_i bytesPerRow: 4 * region.w]; }} -const u32 MG_MTL_DEFAULT_PATH_BUFFER_LEN = (4<<10), - MG_MTL_DEFAULT_ELT_BUFFER_LEN = (4<<10), - MG_MTL_SEGMENT_BUFFER_SIZE = (4<<20)*sizeof(mg_mtl_segment), - MG_MTL_PATH_QUEUE_BUFFER_SIZE = (4<<20)*sizeof(mg_mtl_path_queue), - MG_MTL_TILE_QUEUE_BUFFER_SIZE = (4<<20)*sizeof(mg_mtl_tile_queue), - MG_MTL_TILE_OP_BUFFER_SIZE = (4<<20)*sizeof(mg_mtl_tile_op); +const u32 MG_MTL_DEFAULT_PATH_BUFFER_LEN = (4<<10), + MG_MTL_DEFAULT_ELT_BUFFER_LEN = (4<<10), + + MG_MTL_DEFAULT_SEGMENT_BUFFER_LEN = (4<<10), + MG_MTL_DEFAULT_PATH_QUEUE_BUFFER_LEN = (4<<10), + MG_MTL_DEFAULT_TILE_QUEUE_BUFFER_LEN = (4<<10), + MG_MTL_DEFAULT_TILE_OP_BUFFER_LEN = (4<<14); mg_canvas_backend* mtl_canvas_backend_create(mg_mtl_surface* surface) { @@ -1461,22 +1515,22 @@ mg_canvas_backend* mtl_canvas_backend_create(mg_mtl_surface* surface) } bufferOptions = MTLResourceStorageModePrivate; - backend->segmentBuffer = [surface->device newBufferWithLength: MG_MTL_SEGMENT_BUFFER_SIZE + backend->segmentBuffer = [surface->device newBufferWithLength: MG_MTL_DEFAULT_SEGMENT_BUFFER_LEN * sizeof(mg_mtl_segment) options: bufferOptions]; backend->segmentCountBuffer = [surface->device newBufferWithLength: sizeof(int) options: bufferOptions]; - backend->pathQueueBuffer = [surface->device newBufferWithLength: MG_MTL_PATH_QUEUE_BUFFER_SIZE + backend->pathQueueBuffer = [surface->device newBufferWithLength: MG_MTL_DEFAULT_PATH_QUEUE_BUFFER_LEN * sizeof(mg_mtl_path_queue) options: bufferOptions]; - backend->tileQueueBuffer = [surface->device newBufferWithLength: MG_MTL_TILE_QUEUE_BUFFER_SIZE + backend->tileQueueBuffer = [surface->device newBufferWithLength: MG_MTL_DEFAULT_TILE_QUEUE_BUFFER_LEN * sizeof(mg_mtl_tile_queue) options: bufferOptions]; backend->tileQueueCountBuffer = [surface->device newBufferWithLength: sizeof(int) options: bufferOptions]; - backend->tileOpBuffer = [surface->device newBufferWithLength: MG_MTL_TILE_OP_BUFFER_SIZE + backend->tileOpBuffer = [surface->device newBufferWithLength: MG_MTL_DEFAULT_TILE_OP_BUFFER_LEN * sizeof(mg_mtl_tile_op) options: bufferOptions]; backend->tileOpCountBuffer = [surface->device newBufferWithLength: sizeof(int) diff --git a/src/mtl_renderer.metal b/src/mtl_renderer.metal index 08276b6..1013f85 100644 --- a/src/mtl_renderer.metal +++ b/src/mtl_renderer.metal @@ -231,8 +231,9 @@ kernel void mtl_path_setup(constant int* pathCount [[buffer(0)]], device mg_mtl_path_queue* pathQueueBuffer [[buffer(2)]], device mg_mtl_tile_queue* tileQueueBuffer [[buffer(3)]], device atomic_int* tileQueueCount [[buffer(4)]], - constant int* tileSize [[buffer(5)]], - constant float* scale [[buffer(6)]], + constant int* tileQueueMax [[buffer(5)]], + constant int* tileSize [[buffer(6)]], + constant float* scale [[buffer(7)]], uint pathIndex [[thread_position_in_grid]]) { const device mg_mtl_path* path = &pathBuffer[pathIndex]; @@ -254,16 +255,24 @@ kernel void mtl_path_setup(constant int* pathCount [[buffer(0)]], int tileQueuesIndex = atomic_fetch_add_explicit(tileQueueCount, tileCount, memory_order_relaxed); - pathQueueBuffer[pathIndex].area = int4(firstTile.x, firstTile.y, nTilesX, nTilesY); - pathQueueBuffer[pathIndex].tileQueues = tileQueuesIndex; - - device mg_mtl_tile_queue* tileQueues = &tileQueueBuffer[tileQueuesIndex]; - - for(int i=0; i= tileQueueMax[0]) { - atomic_store_explicit(&tileQueues[i].first, -1, memory_order_relaxed); - tileQueues[i].last = -1; - atomic_store_explicit(&tileQueues[i].windingOffset, 0, memory_order_relaxed); + pathQueueBuffer[pathIndex].area = int4(0); + pathQueueBuffer[pathIndex].tileQueues = 0; + } + else + { + pathQueueBuffer[pathIndex].area = int4(firstTile.x, firstTile.y, nTilesX, nTilesY); + pathQueueBuffer[pathIndex].tileQueues = tileQueuesIndex; + + device mg_mtl_tile_queue* tileQueues = &tileQueueBuffer[tileQueuesIndex]; + + for(int i=0; itileOpCount, 1, memory_order_relaxed); - device mg_mtl_tile_op* op = &context->tileOpBuffer[tileOpIndex]; - op->kind = MG_MTL_OP_SEGMENT; - op->index = segIndex; - op->crossRight = false; - op->next = -1; - - int tileIndex = y*pathArea.z + x; - device mg_mtl_tile_queue* tile = &context->tileQueues[tileIndex]; - op->next = atomic_exchange_explicit(&tile->first, tileOpIndex, memory_order_relaxed); - if(op->next == -1) + if(tileOpIndex < context->tileOpMax) { - tile->last = tileOpIndex; - } + device mg_mtl_tile_op* op = &context->tileOpBuffer[tileOpIndex]; - //NOTE: if the segment crosses the tile's bottom boundary, update the tile's winding offset - if(crossB) - { - mtl_log(context->log, "cross bottom boundary, increment "); - mtl_log_f32(context->log, seg->windingIncrement); - mtl_log(context->log, "\n"); - atomic_fetch_add_explicit(&tile->windingOffset, seg->windingIncrement, memory_order_relaxed); - } + op->kind = MG_MTL_OP_SEGMENT; + op->index = segIndex; + op->crossRight = false; + op->next = -1; - //NOTE: if the segment crosses the right boundary, mark it. We reuse one of the previous tests - if(crossR) - { - op->crossRight = true; + int tileIndex = y*pathArea.z + x; + device mg_mtl_tile_queue* tile = &context->tileQueues[tileIndex]; + op->next = atomic_exchange_explicit(&tile->first, tileOpIndex, memory_order_relaxed); + if(op->next == -1) + { + tile->last = tileOpIndex; + } + + //NOTE: if the segment crosses the tile's bottom boundary, update the tile's winding offset + if(crossB) + { + mtl_log(context->log, "cross bottom boundary, increment "); + mtl_log_f32(context->log, seg->windingIncrement); + mtl_log(context->log, "\n"); + atomic_fetch_add_explicit(&tile->windingOffset, seg->windingIncrement, memory_order_relaxed); + } + + //NOTE: if the segment crosses the right boundary, mark it. We reuse one of the previous tests + if(crossR) + { + op->crossRight = true; + } } } } @@ -508,54 +524,60 @@ device mg_mtl_segment* mtl_segment_push(thread mtl_segment_setup_context* contex } break; } + device mg_mtl_segment* seg = 0; + int segIndex = atomic_fetch_add_explicit(context->segmentCount, 1, memory_order_relaxed); - device mg_mtl_segment* seg = &context->segmentBuffer[segIndex]; - bool goingUp = e.y >= s.y; - bool goingRight = e.x >= s.x; - - seg->kind = kind; - seg->pathIndex = context->pathIndex; - seg->windingIncrement = goingUp? 1 : -1; - - seg->box = (vector_float4){min(s.x, e.x), - min(s.y, e.y), - max(s.x, e.x), - max(s.y, e.y)}; - - float dx = c.x - seg->box.x; - float dy = c.y - seg->box.y; - float alpha = (seg->box.w - seg->box.y)/(seg->box.z - seg->box.x); - float ofs = seg->box.w - seg->box.y; - - if(goingUp == goingRight) + if(segIndex < context->segmentMax) { - if(seg->kind == MG_MTL_LINE) + seg = &context->segmentBuffer[segIndex]; + + bool goingUp = e.y >= s.y; + bool goingRight = e.x >= s.x; + + seg->kind = kind; + seg->pathIndex = context->pathIndex; + seg->windingIncrement = goingUp? 1 : -1; + + seg->box = (vector_float4){min(s.x, e.x), + min(s.y, e.y), + max(s.x, e.x), + max(s.y, e.y)}; + + float dx = c.x - seg->box.x; + float dy = c.y - seg->box.y; + float alpha = (seg->box.w - seg->box.y)/(seg->box.z - seg->box.x); + float ofs = seg->box.w - seg->box.y; + + if(goingUp == goingRight) { - seg->config = MG_MTL_BR; - } - else if(dy > alpha*dx) - { - seg->config = MG_MTL_TL; + if(seg->kind == MG_MTL_LINE) + { + seg->config = MG_MTL_BR; + } + else if(dy > alpha*dx) + { + seg->config = MG_MTL_TL; + } + else + { + seg->config = MG_MTL_BR; + } } else { - seg->config = MG_MTL_BR; - } - } - else - { - if(seg->kind == MG_MTL_LINE) - { - seg->config = MG_MTL_TR; - } - else if(dy < ofs - alpha*dx) - { - seg->config = MG_MTL_BL; - } - else - { - seg->config = MG_MTL_TR; + if(seg->kind == MG_MTL_LINE) + { + seg->config = MG_MTL_TR; + } + else if(dy < ofs - alpha*dx) + { + seg->config = MG_MTL_BL; + } + else + { + seg->config = MG_MTL_TR; + } } } return(seg); @@ -567,8 +589,11 @@ device mg_mtl_segment* mtl_segment_push(thread mtl_segment_setup_context* contex void mtl_line_setup(thread mtl_segment_setup_context* context, float2 p[2]) { device mg_mtl_segment* seg = mtl_segment_push(context, p, MG_MTL_LINE); - seg->hullVertex = p[0]; - mtl_segment_bin_to_tiles(context, seg); + if(seg) + { + seg->hullVertex = p[0]; + mtl_segment_bin_to_tiles(context, seg); + } } float2 mtl_quadratic_blossom(float2 p[3], float u, float v) @@ -636,26 +661,29 @@ void mtl_quadratic_emit(thread mtl_segment_setup_context* context, { device mg_mtl_segment* seg = mtl_segment_push(context, p, MG_MTL_QUADRATIC); - //NOTE: compute implicit equation matrix - float det = p[0].x*(p[1].y-p[2].y) + p[1].x*(p[2].y-p[0].y) + p[2].x*(p[0].y - p[1].y); + if(seg) + { + //NOTE: compute implicit equation matrix + float det = p[0].x*(p[1].y-p[2].y) + p[1].x*(p[2].y-p[0].y) + p[2].x*(p[0].y - p[1].y); - float a = p[0].y - p[1].y + 0.5*(p[2].y - p[0].y); - float b = p[1].x - p[0].x + 0.5*(p[0].x - p[2].x); - float c = p[0].x*p[1].y - p[1].x*p[0].y + 0.5*(p[2].x*p[0].y - p[0].x*p[2].y); - float d = p[0].y - p[1].y; - float e = p[1].x - p[0].x; - float f = p[0].x*p[1].y - p[1].x*p[0].y; + float a = p[0].y - p[1].y + 0.5*(p[2].y - p[0].y); + float b = p[1].x - p[0].x + 0.5*(p[0].x - p[2].x); + float c = p[0].x*p[1].y - p[1].x*p[0].y + 0.5*(p[2].x*p[0].y - p[0].x*p[2].y); + float d = p[0].y - p[1].y; + float e = p[1].x - p[0].x; + float f = p[0].x*p[1].y - p[1].x*p[0].y; - float flip = (seg->config == MG_MTL_TL || seg->config == MG_MTL_BL)? -1 : 1; - float g = flip*(p[2].x*(p[0].y - p[1].y) + p[0].x*(p[1].y - p[2].y) + p[1].x*(p[2].y - p[0].y)); + float flip = (seg->config == MG_MTL_TL || seg->config == MG_MTL_BL)? -1 : 1; + float g = flip*(p[2].x*(p[0].y - p[1].y) + p[0].x*(p[1].y - p[2].y) + p[1].x*(p[2].y - p[0].y)); - seg->implicitMatrix = (1/det)*matrix_float3x3({a, d, 0.}, + seg->implicitMatrix = (1/det)*matrix_float3x3({a, d, 0.}, {b, e, 0.}, {c, f, g}); - seg->hullVertex = p[1]; + seg->hullVertex = p[1]; - mtl_segment_bin_to_tiles(context, seg); + mtl_segment_bin_to_tiles(context, seg); + } } void mtl_quadratic_setup(thread mtl_segment_setup_context* context, thread float2* p) @@ -1044,70 +1072,73 @@ void mtl_cubic_emit(thread mtl_segment_setup_context* context, mtl_cubic_info cu { device mg_mtl_segment* seg = mtl_segment_push(context, sp, MG_MTL_CUBIC); - float2 v0 = p[0]; - float2 v1 = p[3]; - float2 v2; - matrix_float3x3 K; - - float sqrNorm0 = length_squared(p[1]-p[0]); - float sqrNorm1 = length_squared(p[2]-p[3]); - - //TODO: should not be the local sub-curve, but the global curve!!! - if(length_squared(p[0]-p[3]) > 1e-5) + if(seg) { - if(sqrNorm0 >= sqrNorm1) - { - v2 = p[1]; - K = {curve.K[0].xyz, curve.K[3].xyz, curve.K[1].xyz}; + float2 v0 = p[0]; + float2 v1 = p[3]; + float2 v2; + matrix_float3x3 K; + + float sqrNorm0 = length_squared(p[1]-p[0]); + float sqrNorm1 = length_squared(p[2]-p[3]); + + //TODO: should not be the local sub-curve, but the global curve!!! + if(length_squared(p[0]-p[3]) > 1e-5) + { + if(sqrNorm0 >= sqrNorm1) + { + v2 = p[1]; + K = {curve.K[0].xyz, curve.K[3].xyz, curve.K[1].xyz}; + } + else + { + v2 = p[2]; + K = {curve.K[0].xyz, curve.K[3].xyz, curve.K[2].xyz}; + } } else { + v1 = p[1]; v2 = p[2]; - K = {curve.K[0].xyz, curve.K[3].xyz, curve.K[2].xyz}; + K = {curve.K[0].xyz, curve.K[1].xyz, curve.K[2].xyz}; } - } - else - { - v1 = p[1]; - v2 = p[2]; - K = {curve.K[0].xyz, curve.K[1].xyz, curve.K[2].xyz}; - } - //NOTE: set matrices + //NOTE: set matrices - //TODO: should we compute matrix relative to a base point to avoid loss of precision - // when computing barycentric matrix? + //TODO: should we compute matrix relative to a base point to avoid loss of precision + // when computing barycentric matrix? - matrix_float3x3 B = mtl_barycentric_matrix(v0, v1, v2); - seg->implicitMatrix = K*B; - seg->hullVertex = mtl_select_hull_vertex(sp[0], sp[1], sp[2], sp[3], context->log); + matrix_float3x3 B = mtl_barycentric_matrix(v0, v1, v2); + seg->implicitMatrix = K*B; + seg->hullVertex = mtl_select_hull_vertex(sp[0], sp[1], sp[2], sp[3], context->log); - //NOTE: compute sign flip - seg->sign = 1; + //NOTE: compute sign flip + seg->sign = 1; - if(curve.kind == MTL_CUBIC_SERPENTINE - || curve.kind == MTL_CUBIC_CUSP) - { - seg->sign = (curve.d1 < 0)? -1 : 1; + if(curve.kind == MTL_CUBIC_SERPENTINE + || curve.kind == MTL_CUBIC_CUSP) + { + seg->sign = (curve.d1 < 0)? -1 : 1; + } + else if(curve.kind == MTL_CUBIC_LOOP) + { + float d1 = curve.d1; + float d2 = curve.d2; + float d3 = curve.d3; + + float H0 = d3*d1-square(d2) + d1*d2*s0 - square(d1)*square(s0); + float H1 = d3*d1-square(d2) + d1*d2*s1 - square(d1)*square(s1); + float H = (abs(H0) > abs(H1)) ? H0 : H1; + seg->sign = (H*d1 > 0) ? -1 : 1; + } + + if(sp[3].y > sp[0].y) + { + seg->sign *= -1; + } + + //NOTE: bin to tiles + mtl_segment_bin_to_tiles(context, seg); } - else if(curve.kind == MTL_CUBIC_LOOP) - { - float d1 = curve.d1; - float d2 = curve.d2; - float d3 = curve.d3; - - float H0 = d3*d1-square(d2) + d1*d2*s0 - square(d1)*square(s0); - float H1 = d3*d1-square(d2) + d1*d2*s1 - square(d1)*square(s1); - float H = (abs(H0) > abs(H1)) ? H0 : H1; - seg->sign = (H*d1 > 0) ? -1 : 1; - } - - if(sp[3].y > sp[0].y) - { - seg->sign *= -1; - } - - //NOTE: bin to tiles - mtl_segment_bin_to_tiles(context, seg); } void mtl_cubic_setup(thread mtl_segment_setup_context* context, float2 p[4]) @@ -1229,11 +1260,13 @@ kernel void mtl_segment_setup(constant int* elementCount [[buffer(0)]], device mg_mtl_tile_queue* tileQueueBuffer [[buffer(5)]], device mg_mtl_tile_op* tileOpBuffer [[buffer(6)]], device atomic_int* tileOpCount [[buffer(7)]], - constant int* tileSize [[buffer(8)]], - constant float* scale [[buffer(9)]], + constant int* segmentMax [[buffer(8)]], + constant int* tileOpMax [[buffer(9)]], + constant int* tileSize [[buffer(10)]], + constant float* scale [[buffer(11)]], - device char* logBuffer [[buffer(10)]], - device atomic_int* logOffsetBuffer [[buffer(11)]], + device char* logBuffer [[buffer(12)]], + device atomic_int* logOffsetBuffer [[buffer(13)]], uint eltIndex [[thread_position_in_grid]]) { const device mg_mtl_path_elt* elt = &elementBuffer[eltIndex]; @@ -1247,10 +1280,12 @@ kernel void mtl_segment_setup(constant int* elementCount [[buffer(0)]], .tileQueues = tileQueues, .tileOpBuffer = tileOpBuffer, .tileOpCount = tileOpCount, + .tileOpMax = tileOpMax[0], + .segmentMax = segmentMax[0], .tileSize = tileSize[0], .log.buffer = logBuffer, .log.offset = logOffsetBuffer, - .log.enabled = false}; + .log.enabled = false,}; switch(elt->kind) { @@ -1327,10 +1362,11 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]], device atomic_int* tileOpCount [[buffer(5)]], device MTLDispatchThreadgroupsIndirectArguments* dispatchBuffer [[buffer(6)]], device mg_mtl_screen_tile* screenTilesBuffer [[buffer(7)]], - constant int* tileSize [[buffer(8)]], - constant float* scale [[buffer(9)]], - device char* logBuffer [[buffer(10)]], - device atomic_int* logOffsetBuffer [[buffer(11)]], + constant int* tileOpMax [[buffer(8)]], + constant int* tileSize [[buffer(9)]], + constant float* scale [[buffer(10)]], + device char* logBuffer [[buffer(11)]], + device atomic_int* logOffsetBuffer [[buffer(12)]], uint2 threadCoord [[thread_position_in_grid]], uint2 gridSize [[threads_per_grid]]) { @@ -1393,6 +1429,12 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]], //NOTE: tile is full covered. Add path start op (with winding offset). // Additionally if color is opaque and tile is fully inside clip, trim tile list. int pathOpIndex = atomic_fetch_add_explicit(tileOpCount, 1, memory_order_relaxed); + + if(pathOpIndex >= tileOpMax[0]) + { + return; + } + device mg_mtl_tile_op* pathOp = &tileOpBuffer[pathOpIndex]; pathOp->kind = MG_MTL_OP_CLIP_FILL; pathOp->next = -1; @@ -1421,6 +1463,11 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]], { //NOTE: add path start op (with winding offset) int startOpIndex = atomic_fetch_add_explicit(tileOpCount, 1, memory_order_relaxed); + if(startOpIndex >= tileOpMax[0]) + { + return; + } + device mg_mtl_tile_op* startOp = &tileOpBuffer[startOpIndex]; startOp->kind = MG_MTL_OP_START; startOp->next = -1; @@ -1439,6 +1486,11 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]], //NOTE: add path end op int endOpIndex = atomic_fetch_add_explicit(tileOpCount, 1, memory_order_relaxed); + if(endOpIndex >= tileOpMax[0]) + { + return; + } + device mg_mtl_tile_op* endOp = &tileOpBuffer[endOpIndex]; endOp->kind = MG_MTL_OP_END; endOp->next = -1; @@ -1446,7 +1498,6 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]], *nextLink = endOpIndex; nextLink = &endOp->next; - } } }