[osx, canvas]

- Pass buffer lengths to kernels and bound check when allocating from buffers
- Dynamically compute/guess size of intermediate buffers and grow if needed
This commit is contained in:
Martin Fouilleul 2023-07-11 20:03:17 +02:00
parent 98a516ff0a
commit 025ebd91d5
2 changed files with 282 additions and 177 deletions

View File

@ -68,6 +68,9 @@ typedef struct mg_mtl_canvas_backend
vec4 pathScreenExtents; vec4 pathScreenExtents;
vec4 pathUserExtents; vec4 pathUserExtents;
int maxTileQueueCount;
int maxSegmentCount;
} mg_mtl_canvas_backend; } mg_mtl_canvas_backend;
typedef struct mg_mtl_image_data typedef struct mg_mtl_image_data
@ -145,16 +148,19 @@ void mg_mtl_canvas_encode_element(mg_mtl_canvas_backend* backend, mg_path_elt_ty
switch(kind) switch(kind)
{ {
case MG_PATH_LINE: case MG_PATH_LINE:
backend->maxSegmentCount += 1;
elt->kind = MG_MTL_LINE; elt->kind = MG_MTL_LINE;
count = 2; count = 2;
break; break;
case MG_PATH_QUADRATIC: case MG_PATH_QUADRATIC:
backend->maxSegmentCount += 3;
elt->kind = MG_MTL_QUADRATIC; elt->kind = MG_MTL_QUADRATIC;
count = 3; count = 3;
break; break;
case MG_PATH_CUBIC: case MG_PATH_CUBIC:
backend->maxSegmentCount += 7;
elt->kind = MG_MTL_CUBIC; elt->kind = MG_MTL_CUBIC;
count = 4; count = 4;
break; break;
@ -244,6 +250,10 @@ void mg_mtl_encode_path(mg_mtl_canvas_backend* backend, mg_primitive* primitive,
simd_make_float3(uvTransform.m[1]/scale, uvTransform.m[4]/scale, 0), simd_make_float3(uvTransform.m[1]/scale, uvTransform.m[4]/scale, 0),
simd_make_float3(uvTransform.m[2], uvTransform.m[5], 1)); simd_make_float3(uvTransform.m[2], uvTransform.m[5], 1));
} }
int nTilesX = ((path->box.z - path->box.x)*scale - 1) / MG_MTL_TILE_SIZE + 1;
int nTilesY = ((path->box.w - path->box.y)*scale - 1) / MG_MTL_TILE_SIZE + 1;
backend->maxTileQueueCount += (nTilesX * nTilesY);
} }
bool mg_intersect_hull_legs(vec2 p0, vec2 p1, vec2 p2, vec2 p3, vec2* intersection) bool mg_intersect_hull_legs(vec2 p0, vec2 p1, vec2 p2, vec2 p3, vec2* intersection)
@ -899,6 +909,27 @@ void mg_mtl_render_stroke(mg_mtl_canvas_backend* backend,
} }
void mg_mtl_grow_buffer_if_needed(mg_mtl_canvas_backend* backend, id<MTLBuffer>* buffer, u64 wantedSize)
{
u64 bufferSize = [(*buffer) length];
if(bufferSize < wantedSize)
{
int newSize = wantedSize * 1.2;
@autoreleasepool
{
//NOTE: MTLBuffers are retained by the command buffer, so we don't risk deallocating while the buffer is in use
[*buffer release];
*buffer = nil;
id<MTLDevice> device = backend->surface->device;
MTLResourceOptions bufferOptions = MTLResourceStorageModePrivate;
*buffer = [device newBufferWithLength: newSize options: bufferOptions];
}
}
}
void mg_mtl_render_batch(mg_mtl_canvas_backend* backend, void mg_mtl_render_batch(mg_mtl_canvas_backend* backend,
mg_mtl_surface* surface, mg_mtl_surface* surface,
mg_image_data* image, mg_image_data* image,
@ -910,13 +941,21 @@ void mg_mtl_render_batch(mg_mtl_canvas_backend* backend,
{ {
int pathBufferOffset = backend->pathBatchStart * sizeof(mg_mtl_path); int pathBufferOffset = backend->pathBatchStart * sizeof(mg_mtl_path);
int elementBufferOffset = backend->eltBatchStart * sizeof(mg_mtl_path_elt); int elementBufferOffset = backend->eltBatchStart * sizeof(mg_mtl_path_elt);
int pathCount = backend->pathCount - backend->pathBatchStart; int pathCount = backend->pathCount - backend->pathBatchStart;
int eltCount = backend->eltCount - backend->eltBatchStart; int eltCount = backend->eltCount - backend->eltBatchStart;
//NOTE: update intermediate buffers sizes if needed
mg_mtl_grow_buffer_if_needed(backend, &backend->pathQueueBuffer, pathCount * sizeof(mg_mtl_path_queue));
mg_mtl_grow_buffer_if_needed(backend, &backend->tileQueueBuffer, backend->maxTileQueueCount * sizeof(mg_mtl_tile_queue));
mg_mtl_grow_buffer_if_needed(backend, &backend->segmentBuffer, backend->maxSegmentCount * sizeof(mg_mtl_segment));
mg_mtl_grow_buffer_if_needed(backend, &backend->screenTilesBuffer, nTilesX * nTilesY * sizeof(mg_mtl_screen_tile));
mg_mtl_grow_buffer_if_needed(backend, &backend->tileOpBuffer, backend->maxSegmentCount * 30 * sizeof(mg_mtl_tile_op));
//NOTE: encode GPU commands //NOTE: encode GPU commands
@autoreleasepool @autoreleasepool
{ {
//NOTE: create output texture //NOTE: clear output texture
MTLRenderPassDescriptor* clearDescriptor = [MTLRenderPassDescriptor renderPassDescriptor]; MTLRenderPassDescriptor* clearDescriptor = [MTLRenderPassDescriptor renderPassDescriptor];
clearDescriptor.colorAttachments[0].texture = backend->outTexture; clearDescriptor.colorAttachments[0].texture = backend->outTexture;
clearDescriptor.colorAttachments[0].loadAction = MTLLoadActionClear; clearDescriptor.colorAttachments[0].loadAction = MTLLoadActionClear;
@ -941,13 +980,16 @@ void mg_mtl_render_batch(mg_mtl_canvas_backend* backend,
pathEncoder.label = @"path pass"; pathEncoder.label = @"path pass";
[pathEncoder setComputePipelineState: backend->pathPipeline]; [pathEncoder setComputePipelineState: backend->pathPipeline];
int tileQueueMax = [backend->tileQueueBuffer length] / sizeof(mg_mtl_tile_queue);
[pathEncoder setBytes:&pathCount length:sizeof(int) atIndex:0]; [pathEncoder setBytes:&pathCount length:sizeof(int) atIndex:0];
[pathEncoder setBuffer:backend->pathBuffer[backend->bufferIndex] offset:pathBufferOffset atIndex:1]; [pathEncoder setBuffer:backend->pathBuffer[backend->bufferIndex] offset:pathBufferOffset atIndex:1];
[pathEncoder setBuffer:backend->pathQueueBuffer offset:0 atIndex:2]; [pathEncoder setBuffer:backend->pathQueueBuffer offset:0 atIndex:2];
[pathEncoder setBuffer:backend->tileQueueBuffer offset:0 atIndex:3]; [pathEncoder setBuffer:backend->tileQueueBuffer offset:0 atIndex:3];
[pathEncoder setBuffer:backend->tileQueueCountBuffer offset:0 atIndex:4]; [pathEncoder setBuffer:backend->tileQueueCountBuffer offset:0 atIndex:4];
[pathEncoder setBytes:&tileSize length:sizeof(int) atIndex:5]; [pathEncoder setBytes:&tileQueueMax length:sizeof(int) atIndex:5];
[pathEncoder setBytes:&scale length:sizeof(int) atIndex:6]; [pathEncoder setBytes:&tileSize length:sizeof(int) atIndex:6];
[pathEncoder setBytes:&scale length:sizeof(int) atIndex:7];
MTLSize pathGridSize = MTLSizeMake(pathCount, 1, 1); MTLSize pathGridSize = MTLSizeMake(pathCount, 1, 1);
MTLSize pathGroupSize = MTLSizeMake([backend->pathPipeline maxTotalThreadsPerThreadgroup], 1, 1); MTLSize pathGroupSize = MTLSizeMake([backend->pathPipeline maxTotalThreadsPerThreadgroup], 1, 1);
@ -960,6 +1002,9 @@ void mg_mtl_render_batch(mg_mtl_canvas_backend* backend,
segmentEncoder.label = @"segment pass"; segmentEncoder.label = @"segment pass";
[segmentEncoder setComputePipelineState: backend->segmentPipeline]; [segmentEncoder setComputePipelineState: backend->segmentPipeline];
int tileOpMax = [backend->tileOpBuffer length] / sizeof(mg_mtl_tile_op);
int segmentMax = [backend->segmentBuffer length] / sizeof(mg_mtl_segment);
[segmentEncoder setBytes:&eltCount length:sizeof(int) atIndex:0]; [segmentEncoder setBytes:&eltCount length:sizeof(int) atIndex:0];
[segmentEncoder setBuffer:backend->elementBuffer[backend->bufferIndex] offset:elementBufferOffset atIndex:1]; [segmentEncoder setBuffer:backend->elementBuffer[backend->bufferIndex] offset:elementBufferOffset atIndex:1];
[segmentEncoder setBuffer:backend->segmentCountBuffer offset:0 atIndex:2]; [segmentEncoder setBuffer:backend->segmentCountBuffer offset:0 atIndex:2];
@ -968,10 +1013,12 @@ void mg_mtl_render_batch(mg_mtl_canvas_backend* backend,
[segmentEncoder setBuffer:backend->tileQueueBuffer offset:0 atIndex:5]; [segmentEncoder setBuffer:backend->tileQueueBuffer offset:0 atIndex:5];
[segmentEncoder setBuffer:backend->tileOpBuffer offset:0 atIndex:6]; [segmentEncoder setBuffer:backend->tileOpBuffer offset:0 atIndex:6];
[segmentEncoder setBuffer:backend->tileOpCountBuffer offset:0 atIndex:7]; [segmentEncoder setBuffer:backend->tileOpCountBuffer offset:0 atIndex:7];
[segmentEncoder setBytes:&tileSize length:sizeof(int) atIndex:8]; [segmentEncoder setBytes:&tileOpMax length:sizeof(int) atIndex:8];
[segmentEncoder setBytes:&scale length:sizeof(int) atIndex:9]; [segmentEncoder setBytes:&segmentMax length:sizeof(int) atIndex:9];
[segmentEncoder setBuffer:backend->logBuffer[backend->bufferIndex] offset:0 atIndex:10]; [segmentEncoder setBytes:&tileSize length:sizeof(int) atIndex:10];
[segmentEncoder setBuffer:backend->logOffsetBuffer[backend->bufferIndex] offset:0 atIndex:11]; [segmentEncoder setBytes:&scale length:sizeof(int) atIndex:11];
[segmentEncoder setBuffer:backend->logBuffer[backend->bufferIndex] offset:0 atIndex:12];
[segmentEncoder setBuffer:backend->logOffsetBuffer[backend->bufferIndex] offset:0 atIndex:13];
MTLSize segmentGridSize = MTLSizeMake(eltCount, 1, 1); MTLSize segmentGridSize = MTLSizeMake(eltCount, 1, 1);
MTLSize segmentGroupSize = MTLSizeMake([backend->segmentPipeline maxTotalThreadsPerThreadgroup], 1, 1); MTLSize segmentGroupSize = MTLSizeMake([backend->segmentPipeline maxTotalThreadsPerThreadgroup], 1, 1);
@ -1008,10 +1055,11 @@ void mg_mtl_render_batch(mg_mtl_canvas_backend* backend,
[mergeEncoder setBuffer:backend->tileOpCountBuffer offset:0 atIndex:5]; [mergeEncoder setBuffer:backend->tileOpCountBuffer offset:0 atIndex:5];
[mergeEncoder setBuffer:backend->rasterDispatchBuffer offset:0 atIndex:6]; [mergeEncoder setBuffer:backend->rasterDispatchBuffer offset:0 atIndex:6];
[mergeEncoder setBuffer:backend->screenTilesBuffer offset:0 atIndex:7]; [mergeEncoder setBuffer:backend->screenTilesBuffer offset:0 atIndex:7];
[mergeEncoder setBytes:&tileSize length:sizeof(int) atIndex:8]; [mergeEncoder setBytes:&tileOpMax length:sizeof(int) atIndex:8];
[mergeEncoder setBytes:&scale length:sizeof(float) atIndex:9]; [mergeEncoder setBytes:&tileSize length:sizeof(int) atIndex:9];
[mergeEncoder setBuffer:backend->logBuffer[backend->bufferIndex] offset:0 atIndex:10]; [mergeEncoder setBytes:&scale length:sizeof(float) atIndex:10];
[mergeEncoder setBuffer:backend->logOffsetBuffer[backend->bufferIndex] offset:0 atIndex:11]; [mergeEncoder setBuffer:backend->logBuffer[backend->bufferIndex] offset:0 atIndex:11];
[mergeEncoder setBuffer:backend->logOffsetBuffer[backend->bufferIndex] offset:0 atIndex:12];
MTLSize mergeGridSize = MTLSizeMake(nTilesX, nTilesY, 1); MTLSize mergeGridSize = MTLSizeMake(nTilesX, nTilesY, 1);
MTLSize mergeGroupSize = MTLSizeMake(MG_MTL_TILE_SIZE, MG_MTL_TILE_SIZE, 1); MTLSize mergeGroupSize = MTLSizeMake(MG_MTL_TILE_SIZE, MG_MTL_TILE_SIZE, 1);
@ -1075,6 +1123,9 @@ void mg_mtl_render_batch(mg_mtl_canvas_backend* backend,
backend->pathBatchStart = backend->pathCount; backend->pathBatchStart = backend->pathCount;
backend->eltBatchStart = backend->eltCount; backend->eltBatchStart = backend->eltCount;
backend->maxSegmentCount = 0;
backend->maxTileQueueCount = 0;
} }
void mg_mtl_canvas_resize(mg_mtl_canvas_backend* backend, vec2 size) void mg_mtl_canvas_resize(mg_mtl_canvas_backend* backend, vec2 size)
@ -1168,6 +1219,8 @@ void mg_mtl_canvas_render(mg_canvas_backend* interface,
backend->pathBatchStart = 0; backend->pathBatchStart = 0;
backend->eltCount = 0; backend->eltCount = 0;
backend->eltBatchStart = 0; backend->eltBatchStart = 0;
backend->maxSegmentCount = 0;
backend->maxTileQueueCount = 0;
//NOTE: encode and render batches //NOTE: encode and render batches
vec2 currentPos = {0}; vec2 currentPos = {0};
@ -1350,12 +1403,13 @@ void mg_mtl_canvas_image_upload_region(mg_canvas_backend* backendInterface, mg_i
bytesPerRow: 4 * region.w]; bytesPerRow: 4 * region.w];
}} }}
const u32 MG_MTL_DEFAULT_PATH_BUFFER_LEN = (4<<10), const u32 MG_MTL_DEFAULT_PATH_BUFFER_LEN = (4<<10),
MG_MTL_DEFAULT_ELT_BUFFER_LEN = (4<<10), MG_MTL_DEFAULT_ELT_BUFFER_LEN = (4<<10),
MG_MTL_SEGMENT_BUFFER_SIZE = (4<<20)*sizeof(mg_mtl_segment),
MG_MTL_PATH_QUEUE_BUFFER_SIZE = (4<<20)*sizeof(mg_mtl_path_queue), MG_MTL_DEFAULT_SEGMENT_BUFFER_LEN = (4<<10),
MG_MTL_TILE_QUEUE_BUFFER_SIZE = (4<<20)*sizeof(mg_mtl_tile_queue), MG_MTL_DEFAULT_PATH_QUEUE_BUFFER_LEN = (4<<10),
MG_MTL_TILE_OP_BUFFER_SIZE = (4<<20)*sizeof(mg_mtl_tile_op); MG_MTL_DEFAULT_TILE_QUEUE_BUFFER_LEN = (4<<10),
MG_MTL_DEFAULT_TILE_OP_BUFFER_LEN = (4<<14);
mg_canvas_backend* mtl_canvas_backend_create(mg_mtl_surface* surface) mg_canvas_backend* mtl_canvas_backend_create(mg_mtl_surface* surface)
{ {
@ -1461,22 +1515,22 @@ mg_canvas_backend* mtl_canvas_backend_create(mg_mtl_surface* surface)
} }
bufferOptions = MTLResourceStorageModePrivate; bufferOptions = MTLResourceStorageModePrivate;
backend->segmentBuffer = [surface->device newBufferWithLength: MG_MTL_SEGMENT_BUFFER_SIZE backend->segmentBuffer = [surface->device newBufferWithLength: MG_MTL_DEFAULT_SEGMENT_BUFFER_LEN * sizeof(mg_mtl_segment)
options: bufferOptions]; options: bufferOptions];
backend->segmentCountBuffer = [surface->device newBufferWithLength: sizeof(int) backend->segmentCountBuffer = [surface->device newBufferWithLength: sizeof(int)
options: bufferOptions]; options: bufferOptions];
backend->pathQueueBuffer = [surface->device newBufferWithLength: MG_MTL_PATH_QUEUE_BUFFER_SIZE backend->pathQueueBuffer = [surface->device newBufferWithLength: MG_MTL_DEFAULT_PATH_QUEUE_BUFFER_LEN * sizeof(mg_mtl_path_queue)
options: bufferOptions]; options: bufferOptions];
backend->tileQueueBuffer = [surface->device newBufferWithLength: MG_MTL_TILE_QUEUE_BUFFER_SIZE backend->tileQueueBuffer = [surface->device newBufferWithLength: MG_MTL_DEFAULT_TILE_QUEUE_BUFFER_LEN * sizeof(mg_mtl_tile_queue)
options: bufferOptions]; options: bufferOptions];
backend->tileQueueCountBuffer = [surface->device newBufferWithLength: sizeof(int) backend->tileQueueCountBuffer = [surface->device newBufferWithLength: sizeof(int)
options: bufferOptions]; options: bufferOptions];
backend->tileOpBuffer = [surface->device newBufferWithLength: MG_MTL_TILE_OP_BUFFER_SIZE backend->tileOpBuffer = [surface->device newBufferWithLength: MG_MTL_DEFAULT_TILE_OP_BUFFER_LEN * sizeof(mg_mtl_tile_op)
options: bufferOptions]; options: bufferOptions];
backend->tileOpCountBuffer = [surface->device newBufferWithLength: sizeof(int) backend->tileOpCountBuffer = [surface->device newBufferWithLength: sizeof(int)

View File

@ -231,8 +231,9 @@ kernel void mtl_path_setup(constant int* pathCount [[buffer(0)]],
device mg_mtl_path_queue* pathQueueBuffer [[buffer(2)]], device mg_mtl_path_queue* pathQueueBuffer [[buffer(2)]],
device mg_mtl_tile_queue* tileQueueBuffer [[buffer(3)]], device mg_mtl_tile_queue* tileQueueBuffer [[buffer(3)]],
device atomic_int* tileQueueCount [[buffer(4)]], device atomic_int* tileQueueCount [[buffer(4)]],
constant int* tileSize [[buffer(5)]], constant int* tileQueueMax [[buffer(5)]],
constant float* scale [[buffer(6)]], constant int* tileSize [[buffer(6)]],
constant float* scale [[buffer(7)]],
uint pathIndex [[thread_position_in_grid]]) uint pathIndex [[thread_position_in_grid]])
{ {
const device mg_mtl_path* path = &pathBuffer[pathIndex]; const device mg_mtl_path* path = &pathBuffer[pathIndex];
@ -254,16 +255,24 @@ kernel void mtl_path_setup(constant int* pathCount [[buffer(0)]],
int tileQueuesIndex = atomic_fetch_add_explicit(tileQueueCount, tileCount, memory_order_relaxed); int tileQueuesIndex = atomic_fetch_add_explicit(tileQueueCount, tileCount, memory_order_relaxed);
pathQueueBuffer[pathIndex].area = int4(firstTile.x, firstTile.y, nTilesX, nTilesY); if(tileQueuesIndex + tileCount >= tileQueueMax[0])
pathQueueBuffer[pathIndex].tileQueues = tileQueuesIndex;
device mg_mtl_tile_queue* tileQueues = &tileQueueBuffer[tileQueuesIndex];
for(int i=0; i<tileCount; i++)
{ {
atomic_store_explicit(&tileQueues[i].first, -1, memory_order_relaxed); pathQueueBuffer[pathIndex].area = int4(0);
tileQueues[i].last = -1; pathQueueBuffer[pathIndex].tileQueues = 0;
atomic_store_explicit(&tileQueues[i].windingOffset, 0, memory_order_relaxed); }
else
{
pathQueueBuffer[pathIndex].area = int4(firstTile.x, firstTile.y, nTilesX, nTilesY);
pathQueueBuffer[pathIndex].tileQueues = tileQueuesIndex;
device mg_mtl_tile_queue* tileQueues = &tileQueueBuffer[tileQueuesIndex];
for(int i=0; i<tileCount; i++)
{
atomic_store_explicit(&tileQueues[i].first, -1, memory_order_relaxed);
tileQueues[i].last = -1;
atomic_store_explicit(&tileQueues[i].windingOffset, 0, memory_order_relaxed);
}
} }
} }
@ -376,6 +385,9 @@ typedef struct mtl_segment_setup_context
int pathIndex; int pathIndex;
int tileOpMax;
int segmentMax;
} mtl_segment_setup_context; } mtl_segment_setup_context;
void mtl_segment_bin_to_tiles(thread mtl_segment_setup_context* context, device mg_mtl_segment* seg) void mtl_segment_bin_to_tiles(thread mtl_segment_setup_context* context, device mg_mtl_segment* seg)
@ -439,34 +451,38 @@ void mtl_segment_bin_to_tiles(thread mtl_segment_setup_context* context, device
if(crossL || crossR || crossT || crossB || s0Inside || s1Inside) if(crossL || crossR || crossT || crossB || s0Inside || s1Inside)
{ {
int tileOpIndex = atomic_fetch_add_explicit(context->tileOpCount, 1, memory_order_relaxed); int tileOpIndex = atomic_fetch_add_explicit(context->tileOpCount, 1, memory_order_relaxed);
device mg_mtl_tile_op* op = &context->tileOpBuffer[tileOpIndex];
op->kind = MG_MTL_OP_SEGMENT; if(tileOpIndex < context->tileOpMax)
op->index = segIndex;
op->crossRight = false;
op->next = -1;
int tileIndex = y*pathArea.z + x;
device mg_mtl_tile_queue* tile = &context->tileQueues[tileIndex];
op->next = atomic_exchange_explicit(&tile->first, tileOpIndex, memory_order_relaxed);
if(op->next == -1)
{ {
tile->last = tileOpIndex; device mg_mtl_tile_op* op = &context->tileOpBuffer[tileOpIndex];
}
//NOTE: if the segment crosses the tile's bottom boundary, update the tile's winding offset op->kind = MG_MTL_OP_SEGMENT;
if(crossB) op->index = segIndex;
{ op->crossRight = false;
mtl_log(context->log, "cross bottom boundary, increment "); op->next = -1;
mtl_log_f32(context->log, seg->windingIncrement);
mtl_log(context->log, "\n");
atomic_fetch_add_explicit(&tile->windingOffset, seg->windingIncrement, memory_order_relaxed);
}
//NOTE: if the segment crosses the right boundary, mark it. We reuse one of the previous tests int tileIndex = y*pathArea.z + x;
if(crossR) device mg_mtl_tile_queue* tile = &context->tileQueues[tileIndex];
{ op->next = atomic_exchange_explicit(&tile->first, tileOpIndex, memory_order_relaxed);
op->crossRight = true; if(op->next == -1)
{
tile->last = tileOpIndex;
}
//NOTE: if the segment crosses the tile's bottom boundary, update the tile's winding offset
if(crossB)
{
mtl_log(context->log, "cross bottom boundary, increment ");
mtl_log_f32(context->log, seg->windingIncrement);
mtl_log(context->log, "\n");
atomic_fetch_add_explicit(&tile->windingOffset, seg->windingIncrement, memory_order_relaxed);
}
//NOTE: if the segment crosses the right boundary, mark it. We reuse one of the previous tests
if(crossR)
{
op->crossRight = true;
}
} }
} }
} }
@ -508,54 +524,60 @@ device mg_mtl_segment* mtl_segment_push(thread mtl_segment_setup_context* contex
} break; } break;
} }
device mg_mtl_segment* seg = 0;
int segIndex = atomic_fetch_add_explicit(context->segmentCount, 1, memory_order_relaxed); int segIndex = atomic_fetch_add_explicit(context->segmentCount, 1, memory_order_relaxed);
device mg_mtl_segment* seg = &context->segmentBuffer[segIndex];
bool goingUp = e.y >= s.y; if(segIndex < context->segmentMax)
bool goingRight = e.x >= s.x;
seg->kind = kind;
seg->pathIndex = context->pathIndex;
seg->windingIncrement = goingUp? 1 : -1;
seg->box = (vector_float4){min(s.x, e.x),
min(s.y, e.y),
max(s.x, e.x),
max(s.y, e.y)};
float dx = c.x - seg->box.x;
float dy = c.y - seg->box.y;
float alpha = (seg->box.w - seg->box.y)/(seg->box.z - seg->box.x);
float ofs = seg->box.w - seg->box.y;
if(goingUp == goingRight)
{ {
if(seg->kind == MG_MTL_LINE) seg = &context->segmentBuffer[segIndex];
bool goingUp = e.y >= s.y;
bool goingRight = e.x >= s.x;
seg->kind = kind;
seg->pathIndex = context->pathIndex;
seg->windingIncrement = goingUp? 1 : -1;
seg->box = (vector_float4){min(s.x, e.x),
min(s.y, e.y),
max(s.x, e.x),
max(s.y, e.y)};
float dx = c.x - seg->box.x;
float dy = c.y - seg->box.y;
float alpha = (seg->box.w - seg->box.y)/(seg->box.z - seg->box.x);
float ofs = seg->box.w - seg->box.y;
if(goingUp == goingRight)
{ {
seg->config = MG_MTL_BR; if(seg->kind == MG_MTL_LINE)
} {
else if(dy > alpha*dx) seg->config = MG_MTL_BR;
{ }
seg->config = MG_MTL_TL; else if(dy > alpha*dx)
{
seg->config = MG_MTL_TL;
}
else
{
seg->config = MG_MTL_BR;
}
} }
else else
{ {
seg->config = MG_MTL_BR; if(seg->kind == MG_MTL_LINE)
} {
} seg->config = MG_MTL_TR;
else }
{ else if(dy < ofs - alpha*dx)
if(seg->kind == MG_MTL_LINE) {
{ seg->config = MG_MTL_BL;
seg->config = MG_MTL_TR; }
} else
else if(dy < ofs - alpha*dx) {
{ seg->config = MG_MTL_TR;
seg->config = MG_MTL_BL; }
}
else
{
seg->config = MG_MTL_TR;
} }
} }
return(seg); return(seg);
@ -567,8 +589,11 @@ device mg_mtl_segment* mtl_segment_push(thread mtl_segment_setup_context* contex
void mtl_line_setup(thread mtl_segment_setup_context* context, float2 p[2]) void mtl_line_setup(thread mtl_segment_setup_context* context, float2 p[2])
{ {
device mg_mtl_segment* seg = mtl_segment_push(context, p, MG_MTL_LINE); device mg_mtl_segment* seg = mtl_segment_push(context, p, MG_MTL_LINE);
seg->hullVertex = p[0]; if(seg)
mtl_segment_bin_to_tiles(context, seg); {
seg->hullVertex = p[0];
mtl_segment_bin_to_tiles(context, seg);
}
} }
float2 mtl_quadratic_blossom(float2 p[3], float u, float v) float2 mtl_quadratic_blossom(float2 p[3], float u, float v)
@ -636,26 +661,29 @@ void mtl_quadratic_emit(thread mtl_segment_setup_context* context,
{ {
device mg_mtl_segment* seg = mtl_segment_push(context, p, MG_MTL_QUADRATIC); device mg_mtl_segment* seg = mtl_segment_push(context, p, MG_MTL_QUADRATIC);
//NOTE: compute implicit equation matrix if(seg)
float det = p[0].x*(p[1].y-p[2].y) + p[1].x*(p[2].y-p[0].y) + p[2].x*(p[0].y - p[1].y); {
//NOTE: compute implicit equation matrix
float det = p[0].x*(p[1].y-p[2].y) + p[1].x*(p[2].y-p[0].y) + p[2].x*(p[0].y - p[1].y);
float a = p[0].y - p[1].y + 0.5*(p[2].y - p[0].y); float a = p[0].y - p[1].y + 0.5*(p[2].y - p[0].y);
float b = p[1].x - p[0].x + 0.5*(p[0].x - p[2].x); float b = p[1].x - p[0].x + 0.5*(p[0].x - p[2].x);
float c = p[0].x*p[1].y - p[1].x*p[0].y + 0.5*(p[2].x*p[0].y - p[0].x*p[2].y); float c = p[0].x*p[1].y - p[1].x*p[0].y + 0.5*(p[2].x*p[0].y - p[0].x*p[2].y);
float d = p[0].y - p[1].y; float d = p[0].y - p[1].y;
float e = p[1].x - p[0].x; float e = p[1].x - p[0].x;
float f = p[0].x*p[1].y - p[1].x*p[0].y; float f = p[0].x*p[1].y - p[1].x*p[0].y;
float flip = (seg->config == MG_MTL_TL || seg->config == MG_MTL_BL)? -1 : 1; float flip = (seg->config == MG_MTL_TL || seg->config == MG_MTL_BL)? -1 : 1;
float g = flip*(p[2].x*(p[0].y - p[1].y) + p[0].x*(p[1].y - p[2].y) + p[1].x*(p[2].y - p[0].y)); float g = flip*(p[2].x*(p[0].y - p[1].y) + p[0].x*(p[1].y - p[2].y) + p[1].x*(p[2].y - p[0].y));
seg->implicitMatrix = (1/det)*matrix_float3x3({a, d, 0.}, seg->implicitMatrix = (1/det)*matrix_float3x3({a, d, 0.},
{b, e, 0.}, {b, e, 0.},
{c, f, g}); {c, f, g});
seg->hullVertex = p[1]; seg->hullVertex = p[1];
mtl_segment_bin_to_tiles(context, seg); mtl_segment_bin_to_tiles(context, seg);
}
} }
void mtl_quadratic_setup(thread mtl_segment_setup_context* context, thread float2* p) void mtl_quadratic_setup(thread mtl_segment_setup_context* context, thread float2* p)
@ -1044,70 +1072,73 @@ void mtl_cubic_emit(thread mtl_segment_setup_context* context, mtl_cubic_info cu
{ {
device mg_mtl_segment* seg = mtl_segment_push(context, sp, MG_MTL_CUBIC); device mg_mtl_segment* seg = mtl_segment_push(context, sp, MG_MTL_CUBIC);
float2 v0 = p[0]; if(seg)
float2 v1 = p[3];
float2 v2;
matrix_float3x3 K;
float sqrNorm0 = length_squared(p[1]-p[0]);
float sqrNorm1 = length_squared(p[2]-p[3]);
//TODO: should not be the local sub-curve, but the global curve!!!
if(length_squared(p[0]-p[3]) > 1e-5)
{ {
if(sqrNorm0 >= sqrNorm1) float2 v0 = p[0];
{ float2 v1 = p[3];
v2 = p[1]; float2 v2;
K = {curve.K[0].xyz, curve.K[3].xyz, curve.K[1].xyz}; matrix_float3x3 K;
float sqrNorm0 = length_squared(p[1]-p[0]);
float sqrNorm1 = length_squared(p[2]-p[3]);
//TODO: should not be the local sub-curve, but the global curve!!!
if(length_squared(p[0]-p[3]) > 1e-5)
{
if(sqrNorm0 >= sqrNorm1)
{
v2 = p[1];
K = {curve.K[0].xyz, curve.K[3].xyz, curve.K[1].xyz};
}
else
{
v2 = p[2];
K = {curve.K[0].xyz, curve.K[3].xyz, curve.K[2].xyz};
}
} }
else else
{ {
v1 = p[1];
v2 = p[2]; v2 = p[2];
K = {curve.K[0].xyz, curve.K[3].xyz, curve.K[2].xyz}; K = {curve.K[0].xyz, curve.K[1].xyz, curve.K[2].xyz};
} }
} //NOTE: set matrices
else
{
v1 = p[1];
v2 = p[2];
K = {curve.K[0].xyz, curve.K[1].xyz, curve.K[2].xyz};
}
//NOTE: set matrices
//TODO: should we compute matrix relative to a base point to avoid loss of precision //TODO: should we compute matrix relative to a base point to avoid loss of precision
// when computing barycentric matrix? // when computing barycentric matrix?
matrix_float3x3 B = mtl_barycentric_matrix(v0, v1, v2); matrix_float3x3 B = mtl_barycentric_matrix(v0, v1, v2);
seg->implicitMatrix = K*B; seg->implicitMatrix = K*B;
seg->hullVertex = mtl_select_hull_vertex(sp[0], sp[1], sp[2], sp[3], context->log); seg->hullVertex = mtl_select_hull_vertex(sp[0], sp[1], sp[2], sp[3], context->log);
//NOTE: compute sign flip //NOTE: compute sign flip
seg->sign = 1; seg->sign = 1;
if(curve.kind == MTL_CUBIC_SERPENTINE if(curve.kind == MTL_CUBIC_SERPENTINE
|| curve.kind == MTL_CUBIC_CUSP) || curve.kind == MTL_CUBIC_CUSP)
{ {
seg->sign = (curve.d1 < 0)? -1 : 1; seg->sign = (curve.d1 < 0)? -1 : 1;
}
else if(curve.kind == MTL_CUBIC_LOOP)
{
float d1 = curve.d1;
float d2 = curve.d2;
float d3 = curve.d3;
float H0 = d3*d1-square(d2) + d1*d2*s0 - square(d1)*square(s0);
float H1 = d3*d1-square(d2) + d1*d2*s1 - square(d1)*square(s1);
float H = (abs(H0) > abs(H1)) ? H0 : H1;
seg->sign = (H*d1 > 0) ? -1 : 1;
}
if(sp[3].y > sp[0].y)
{
seg->sign *= -1;
}
//NOTE: bin to tiles
mtl_segment_bin_to_tiles(context, seg);
} }
else if(curve.kind == MTL_CUBIC_LOOP)
{
float d1 = curve.d1;
float d2 = curve.d2;
float d3 = curve.d3;
float H0 = d3*d1-square(d2) + d1*d2*s0 - square(d1)*square(s0);
float H1 = d3*d1-square(d2) + d1*d2*s1 - square(d1)*square(s1);
float H = (abs(H0) > abs(H1)) ? H0 : H1;
seg->sign = (H*d1 > 0) ? -1 : 1;
}
if(sp[3].y > sp[0].y)
{
seg->sign *= -1;
}
//NOTE: bin to tiles
mtl_segment_bin_to_tiles(context, seg);
} }
void mtl_cubic_setup(thread mtl_segment_setup_context* context, float2 p[4]) void mtl_cubic_setup(thread mtl_segment_setup_context* context, float2 p[4])
@ -1229,11 +1260,13 @@ kernel void mtl_segment_setup(constant int* elementCount [[buffer(0)]],
device mg_mtl_tile_queue* tileQueueBuffer [[buffer(5)]], device mg_mtl_tile_queue* tileQueueBuffer [[buffer(5)]],
device mg_mtl_tile_op* tileOpBuffer [[buffer(6)]], device mg_mtl_tile_op* tileOpBuffer [[buffer(6)]],
device atomic_int* tileOpCount [[buffer(7)]], device atomic_int* tileOpCount [[buffer(7)]],
constant int* tileSize [[buffer(8)]], constant int* segmentMax [[buffer(8)]],
constant float* scale [[buffer(9)]], constant int* tileOpMax [[buffer(9)]],
constant int* tileSize [[buffer(10)]],
constant float* scale [[buffer(11)]],
device char* logBuffer [[buffer(10)]], device char* logBuffer [[buffer(12)]],
device atomic_int* logOffsetBuffer [[buffer(11)]], device atomic_int* logOffsetBuffer [[buffer(13)]],
uint eltIndex [[thread_position_in_grid]]) uint eltIndex [[thread_position_in_grid]])
{ {
const device mg_mtl_path_elt* elt = &elementBuffer[eltIndex]; const device mg_mtl_path_elt* elt = &elementBuffer[eltIndex];
@ -1247,10 +1280,12 @@ kernel void mtl_segment_setup(constant int* elementCount [[buffer(0)]],
.tileQueues = tileQueues, .tileQueues = tileQueues,
.tileOpBuffer = tileOpBuffer, .tileOpBuffer = tileOpBuffer,
.tileOpCount = tileOpCount, .tileOpCount = tileOpCount,
.tileOpMax = tileOpMax[0],
.segmentMax = segmentMax[0],
.tileSize = tileSize[0], .tileSize = tileSize[0],
.log.buffer = logBuffer, .log.buffer = logBuffer,
.log.offset = logOffsetBuffer, .log.offset = logOffsetBuffer,
.log.enabled = false}; .log.enabled = false,};
switch(elt->kind) switch(elt->kind)
{ {
@ -1327,10 +1362,11 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]],
device atomic_int* tileOpCount [[buffer(5)]], device atomic_int* tileOpCount [[buffer(5)]],
device MTLDispatchThreadgroupsIndirectArguments* dispatchBuffer [[buffer(6)]], device MTLDispatchThreadgroupsIndirectArguments* dispatchBuffer [[buffer(6)]],
device mg_mtl_screen_tile* screenTilesBuffer [[buffer(7)]], device mg_mtl_screen_tile* screenTilesBuffer [[buffer(7)]],
constant int* tileSize [[buffer(8)]], constant int* tileOpMax [[buffer(8)]],
constant float* scale [[buffer(9)]], constant int* tileSize [[buffer(9)]],
device char* logBuffer [[buffer(10)]], constant float* scale [[buffer(10)]],
device atomic_int* logOffsetBuffer [[buffer(11)]], device char* logBuffer [[buffer(11)]],
device atomic_int* logOffsetBuffer [[buffer(12)]],
uint2 threadCoord [[thread_position_in_grid]], uint2 threadCoord [[thread_position_in_grid]],
uint2 gridSize [[threads_per_grid]]) uint2 gridSize [[threads_per_grid]])
{ {
@ -1393,6 +1429,12 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]],
//NOTE: tile is full covered. Add path start op (with winding offset). //NOTE: tile is full covered. Add path start op (with winding offset).
// Additionally if color is opaque and tile is fully inside clip, trim tile list. // Additionally if color is opaque and tile is fully inside clip, trim tile list.
int pathOpIndex = atomic_fetch_add_explicit(tileOpCount, 1, memory_order_relaxed); int pathOpIndex = atomic_fetch_add_explicit(tileOpCount, 1, memory_order_relaxed);
if(pathOpIndex >= tileOpMax[0])
{
return;
}
device mg_mtl_tile_op* pathOp = &tileOpBuffer[pathOpIndex]; device mg_mtl_tile_op* pathOp = &tileOpBuffer[pathOpIndex];
pathOp->kind = MG_MTL_OP_CLIP_FILL; pathOp->kind = MG_MTL_OP_CLIP_FILL;
pathOp->next = -1; pathOp->next = -1;
@ -1421,6 +1463,11 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]],
{ {
//NOTE: add path start op (with winding offset) //NOTE: add path start op (with winding offset)
int startOpIndex = atomic_fetch_add_explicit(tileOpCount, 1, memory_order_relaxed); int startOpIndex = atomic_fetch_add_explicit(tileOpCount, 1, memory_order_relaxed);
if(startOpIndex >= tileOpMax[0])
{
return;
}
device mg_mtl_tile_op* startOp = &tileOpBuffer[startOpIndex]; device mg_mtl_tile_op* startOp = &tileOpBuffer[startOpIndex];
startOp->kind = MG_MTL_OP_START; startOp->kind = MG_MTL_OP_START;
startOp->next = -1; startOp->next = -1;
@ -1439,6 +1486,11 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]],
//NOTE: add path end op //NOTE: add path end op
int endOpIndex = atomic_fetch_add_explicit(tileOpCount, 1, memory_order_relaxed); int endOpIndex = atomic_fetch_add_explicit(tileOpCount, 1, memory_order_relaxed);
if(endOpIndex >= tileOpMax[0])
{
return;
}
device mg_mtl_tile_op* endOp = &tileOpBuffer[endOpIndex]; device mg_mtl_tile_op* endOp = &tileOpBuffer[endOpIndex];
endOp->kind = MG_MTL_OP_END; endOp->kind = MG_MTL_OP_END;
endOp->next = -1; endOp->next = -1;
@ -1446,7 +1498,6 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]],
*nextLink = endOpIndex; *nextLink = endOpIndex;
nextLink = &endOp->next; nextLink = &endOp->next;
} }
} }
} }