diff --git a/examples/canvas/main.c b/examples/canvas/main.c index b5dca42..7632e3d 100644 --- a/examples/canvas/main.c +++ b/examples/canvas/main.c @@ -162,6 +162,7 @@ int main() // head mg_set_color_rgba(1, 1, 0, 1); + mg_circle_fill(x, y, 200); // smile diff --git a/examples/tiger/main.c b/examples/tiger/main.c index 0ecdab2..830dee4 100644 --- a/examples/tiger/main.c +++ b/examples/tiger/main.c @@ -82,6 +82,11 @@ int main() mp_window_bring_to_front(window); mp_window_focus(window); + bool tracked = false; + vec2 trackPoint = {0}; + f32 zoom = 1; + f32 startX = 0, startY = 0; + f64 frameTime = 0; while(!mp_should_quit()) @@ -99,8 +104,35 @@ int main() mp_request_quit(); } break; - case MP_EVENT_KEYBOARD_KEY: + case MP_EVENT_MOUSE_BUTTON: { + if(event.key.code == MP_MOUSE_LEFT) + { + if(event.key.action == MP_KEY_PRESS) + { + tracked = true; + vec2 mousePos = mp_mouse_position(); + trackPoint.x = mousePos.x/zoom - startX; + trackPoint.y = mousePos.y/zoom - startY; + } + else + { + tracked = false; + } + } + } break; + + case MP_EVENT_MOUSE_WHEEL: + { + vec2 mousePos = mp_mouse_position(); + f32 trackX = mousePos.x/zoom - startX; + f32 trackY = mousePos.y/zoom - startY; + + zoom *= 1 + event.move.deltaY * 0.01; + zoom = Clamp(zoom, 0.2, 10); + + startX = mousePos.x/zoom - trackX; + startY = mousePos.y/zoom - trackY; } break; default: @@ -108,13 +140,20 @@ int main() } } + if(tracked) + { + vec2 mousePos = mp_mouse_position(); + startX = mousePos.x/zoom - trackPoint.x; + startY = mousePos.y/zoom - trackPoint.y; + } + mg_surface_prepare(surface); mg_set_color_rgba(1, 0, 1, 1); mg_clear(); - mg_matrix_push((mg_mat2x3){1, 0, 300, - 0, 1, 200}); + mg_matrix_push((mg_mat2x3){zoom, 0, 300+startX*zoom, + 0, zoom, 200+startY*zoom}); draw_tiger(); diff --git a/src/graphics.c b/src/graphics.c index ee9c749..66e09f2 100644 --- a/src/graphics.c +++ b/src/graphics.c @@ -208,6 +208,7 @@ typedef struct mg_canvas_data mp_rect srcRegion; vec4 shapeExtents; + vec4 shapeScreenExtents; u32 nextShapeIndex; u32 vertexCount; u32 indexCount; @@ -789,6 +790,13 @@ void mg_finalize_shape(mg_canvas_data* canvas) mg_vertex_layout* layout = &canvas->backend->vertexLayout; *(mg_mat2x3*)(layout->uvTransformBuffer + index*layout->uvTransformStride) = uvTransform; + //TODO: transform extents before clipping + mp_rect clip = {maximum(canvas->clip.x, canvas->shapeScreenExtents.x), + maximum(canvas->clip.y, canvas->shapeScreenExtents.y), + minimum(canvas->clip.x + canvas->clip.w, canvas->shapeScreenExtents.z), + minimum(canvas->clip.y + canvas->clip.h, canvas->shapeScreenExtents.w)}; + + *(mp_rect*)(((char*)layout->clipBuffer) + index*layout->clipStride) = clip; } } @@ -799,17 +807,12 @@ u32 mg_next_shape(mg_canvas_data* canvas, mg_attributes* attributes) canvas->transform = attributes->transform; canvas->srcRegion = attributes->srcRegion; canvas->shapeExtents = (vec4){FLT_MAX, FLT_MAX, -FLT_MAX, -FLT_MAX}; + canvas->shapeScreenExtents = (vec4){FLT_MAX, FLT_MAX, -FLT_MAX, -FLT_MAX}; mg_vertex_layout* layout = &canvas->backend->vertexLayout; int index = canvas->nextShapeIndex; canvas->nextShapeIndex++; - mp_rect clip = {canvas->clip.x, - canvas->clip.y, - canvas->clip.x + canvas->clip.w, - canvas->clip.y + canvas->clip.h}; - - *(mp_rect*)(((char*)layout->clipBuffer) + index*layout->clipStride) = clip; *(mg_color*)(((char*)layout->colorBuffer) + index*layout->colorStride) = attributes->color; return(index); @@ -842,6 +845,11 @@ void mg_push_vertex_cubic(mg_canvas_data* canvas, vec2 pos, vec4 cubic) vec2 screenPos = mg_mat2x3_mul(canvas->transform, pos); + canvas->shapeScreenExtents.x = minimum(canvas->shapeScreenExtents.x, screenPos.x); + canvas->shapeScreenExtents.y = minimum(canvas->shapeScreenExtents.y, screenPos.y); + canvas->shapeScreenExtents.z = maximum(canvas->shapeScreenExtents.z, screenPos.x); + canvas->shapeScreenExtents.w = maximum(canvas->shapeScreenExtents.w, screenPos.y); + mg_vertex_layout* layout = &canvas->backend->vertexLayout; ASSERT(canvas->vertexCount < layout->maxVertexCount); ASSERT(canvas->nextShapeIndex > 0); diff --git a/src/mtl_canvas.m b/src/mtl_canvas.m index fa5ceca..5911829 100644 --- a/src/mtl_canvas.m +++ b/src/mtl_canvas.m @@ -34,9 +34,8 @@ typedef struct mg_mtl_canvas_backend mg_color clearColor; // permanent metal resources + id shapePipeline; id trianglePipeline; - id tilingPipeline; - id sortingPipeline; id computePipeline; id renderPipeline; @@ -53,9 +52,10 @@ typedef struct mg_mtl_canvas_backend id shapeBuffer[MG_MTL_MAX_BUFFER_AVAILABLE]; id vertexBuffer[MG_MTL_MAX_BUFFER_AVAILABLE]; id indexBuffer[MG_MTL_MAX_BUFFER_AVAILABLE]; - id tileCounters; - id tileArrayBuffer; + id shapeQueueBuffer; id triangleArray; + id arenaBuffer; + id arenaOffset; } mg_mtl_canvas_backend; @@ -212,15 +212,33 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image int triangleCount = indexCount/3; //----------------------------------------------------------- - //NOTE(martin): encode the clear counter + //NOTE(martin): encode the clear arena offset //----------------------------------------------------------- id blitEncoder = [surface->commandBuffer blitCommandEncoder]; - blitEncoder.label = @"clear counters"; - [blitEncoder fillBuffer: backend->tileCounters range: NSMakeRange(0, RENDERER_MAX_TILES*sizeof(uint)) value: 0]; + blitEncoder.label = @"clear arena"; + [blitEncoder fillBuffer: backend->arenaOffset range: NSMakeRange(0, sizeof(int)) value: 0]; [blitEncoder endEncoding]; //----------------------------------------------------------- - //NOTE(martin): encode the triangle prepass + //NOTE(martin): encode the shape setup pass + //----------------------------------------------------------- + id shapeEncoder = [surface->commandBuffer computeCommandEncoder]; + shapeEncoder.label = @"shape pass"; + [shapeEncoder setComputePipelineState: backend->shapePipeline]; + [shapeEncoder setBuffer: backend->shapeBuffer[backend->bufferIndex] offset:backend->shapeBufferOffset atIndex: 0]; + [shapeEncoder setBuffer: backend->shapeQueueBuffer offset:0 atIndex: 1]; + [shapeEncoder setBuffer: backend->arenaBuffer offset:0 atIndex: 2]; + [shapeEncoder setBuffer: backend->arenaOffset offset:0 atIndex: 3]; + [shapeEncoder setBytes: &scale length: sizeof(float) atIndex: 4]; + + MTLSize shapeGroupSize = MTLSizeMake(backend->shapePipeline.maxTotalThreadsPerThreadgroup, 1, 1); + MTLSize shapeGridSize = MTLSizeMake(shapeCount, 1, 1); + + [shapeEncoder dispatchThreads: shapeGridSize threadsPerThreadgroup: shapeGroupSize]; + [shapeEncoder endEncoding]; + + //----------------------------------------------------------- + //NOTE(martin): encode the triangle setup and binning //----------------------------------------------------------- id triangleEncoder = [surface->commandBuffer computeCommandEncoder]; triangleEncoder.label = @"triangle pass"; @@ -229,8 +247,11 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image [triangleEncoder setBuffer: backend->indexBuffer[backend->bufferIndex] offset:backend->indexBufferOffset atIndex: 1]; [triangleEncoder setBuffer: backend->shapeBuffer[backend->bufferIndex] offset:backend->shapeBufferOffset atIndex: 2]; [triangleEncoder setBuffer: backend->triangleArray offset:0 atIndex: 3]; + [triangleEncoder setBuffer: backend->shapeQueueBuffer offset:0 atIndex: 4]; + [triangleEncoder setBuffer: backend->arenaBuffer offset:0 atIndex: 5]; + [triangleEncoder setBuffer: backend->arenaOffset offset:0 atIndex: 6]; - [triangleEncoder setBytes: &scale length: sizeof(float) atIndex: 4]; + [triangleEncoder setBytes: &scale length: sizeof(float) atIndex: 7]; MTLSize triangleGroupSize = MTLSizeMake(backend->trianglePipeline.maxTotalThreadsPerThreadgroup, 1, 1); MTLSize triangleGridSize = MTLSizeMake(triangleCount, 1, 1); @@ -238,56 +259,15 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image [triangleEncoder dispatchThreads: triangleGridSize threadsPerThreadgroup: triangleGroupSize]; [triangleEncoder endEncoding]; - //----------------------------------------------------------- - //NOTE(martin): encode the tiling pass - //----------------------------------------------------------- - - id tileEncoder = [surface->commandBuffer computeCommandEncoder]; - tileEncoder.label = @"tiling pass"; - [tileEncoder setComputePipelineState: backend->tilingPipeline]; - [tileEncoder setBuffer: backend->triangleArray offset:0 atIndex: 0]; - [tileEncoder setBuffer: backend->tileCounters offset:0 atIndex: 1]; - [tileEncoder setBuffer: backend->tileArrayBuffer offset:0 atIndex: 2]; - - [tileEncoder setBytes: &triangleCount length:sizeof(int) atIndex: 3]; - [tileEncoder setBytes: &viewportSize length: sizeof(vector_uint2) atIndex: 4]; - [tileEncoder setBytes: &scale length: sizeof(float) atIndex: 5]; - - MTLSize tileGroupSize = MTLSizeMake(1, 1, 16); - MTLSize tileGridSize = MTLSizeMake(nTilesX, nTilesY, 16); - - [tileEncoder dispatchThreads: tileGridSize threadsPerThreadgroup: tileGroupSize]; - [tileEncoder endEncoding]; - - //----------------------------------------------------------- - //NOTE(martin): encode the sorting pass - //----------------------------------------------------------- -/* - id sortEncoder = [surface->commandBuffer computeCommandEncoder]; - sortEncoder.label = @"sorting pass"; - [sortEncoder setComputePipelineState: backend->sortingPipeline]; - [sortEncoder setBuffer: backend->triangleArray offset:0 atIndex: 0]; - [sortEncoder setBuffer: backend->tileCounters offset:0 atIndex: 1]; - [sortEncoder setBuffer: backend->tileArrayBuffer offset:0 atIndex: 2]; - - u32 nTilesX = (viewportSize.x + RENDERER_TILE_SIZE - 1)/RENDERER_TILE_SIZE; - u32 nTilesY = (viewportSize.y + RENDERER_TILE_SIZE - 1)/RENDERER_TILE_SIZE; - - MTLSize sortGroupSize = MTLSizeMake(backend->sortingPipeline.maxTotalThreadsPerThreadgroup, 1, 1); - MTLSize sortGridSize = MTLSizeMake(nTilesX*nTilesY, 1, 1); - - [sortEncoder dispatchThreads: sortGridSize threadsPerThreadgroup: sortGroupSize]; - [sortEncoder endEncoding]; -*/ //----------------------------------------------------------- //NOTE(martin): encode drawing pass //----------------------------------------------------------- id drawEncoder = [surface->commandBuffer computeCommandEncoder]; drawEncoder.label = @"drawing pass"; [drawEncoder setComputePipelineState:backend->computePipeline]; - [drawEncoder setBuffer: backend->tileCounters offset:0 atIndex: 0]; - [drawEncoder setBuffer: backend->tileArrayBuffer offset:0 atIndex: 1]; - [drawEncoder setBuffer: backend->triangleArray offset:0 atIndex: 2]; + [drawEncoder setBuffer: backend->shapeQueueBuffer offset:0 atIndex: 0]; + [drawEncoder setBuffer: backend->triangleArray offset:0 atIndex: 1]; + [drawEncoder setBuffer: backend->arenaBuffer offset:0 atIndex: 2]; [drawEncoder setTexture: backend->outTexture atIndex: 0]; int useTexture = 0; @@ -298,8 +278,9 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image useTexture = 1; } - [drawEncoder setBytes: &useTexture length:sizeof(int) atIndex: 3]; - [drawEncoder setBytes: &scale length: sizeof(float) atIndex: 4]; + [drawEncoder setBytes: &shapeCount length:sizeof(int) atIndex: 3]; + [drawEncoder setBytes: &useTexture length:sizeof(int) atIndex: 4]; + [drawEncoder setBytes: &scale length: sizeof(float) atIndex: 5]; //TODO: check that we don't exceed maxTotalThreadsPerThreadgroup DEBUG_ASSERT(RENDERER_TILE_SIZE*RENDERER_TILE_SIZE <= backend->computePipeline.maxTotalThreadsPerThreadgroup); @@ -391,8 +372,13 @@ void mg_mtl_canvas_destroy(mg_canvas_backend* interface) } //NOTE: semaphore does not have a destructor? - [backend->tileArrayBuffer release]; + [backend->shapeQueueBuffer release]; [backend->triangleArray release]; + [backend->arenaBuffer release]; + [backend->arenaOffset release]; + + ////////////////////////////////////////// + //TODO release all pipelines [backend->computePipeline release]; } } @@ -524,18 +510,18 @@ mg_canvas_backend* mg_mtl_canvas_create(mg_surface surface) options: bufferOptions]; } - backend->tileArrayBuffer = [metalSurface->device newBufferWithLength: RENDERER_TILE_BUFFER_SIZE*sizeof(int)*RENDERER_MAX_TILES - options: MTLResourceStorageModePrivate]; - backend->triangleArray = [metalSurface->device newBufferWithLength: MG_MTL_CANVAS_DEFAULT_BUFFER_LENGTH*sizeof(mg_triangle_data) options: MTLResourceStorageModePrivate]; - //TODO(martin): retain ? - //----------------------------------------------------------- - //NOTE(martin): create and initialize tile counters - //----------------------------------------------------------- - backend->tileCounters = [metalSurface->device newBufferWithLength: RENDERER_MAX_TILES*sizeof(uint) - options: MTLResourceStorageModePrivate]; + backend->shapeQueueBuffer = [metalSurface->device newBufferWithLength: MG_MTL_CANVAS_DEFAULT_BUFFER_LENGTH*sizeof(mg_shape_queue) + options: MTLResourceStorageModePrivate]; + + backend->arenaBuffer = [metalSurface->device newBufferWithLength: MG_MTL_CANVAS_DEFAULT_BUFFER_LENGTH*sizeof(mg_queue_elt) + options: MTLResourceStorageModePrivate]; + + backend->arenaOffset = [metalSurface->device newBufferWithLength: sizeof(int) + options: MTLResourceStorageModePrivate]; + //----------------------------------------------------------- //NOTE(martin): load the library //----------------------------------------------------------- @@ -551,9 +537,8 @@ mg_canvas_backend* mg_mtl_canvas_create(mg_surface surface) LOG_ERROR("error : %s\n", errStr); return(0); } + id shapeFunction = [library newFunctionWithName:@"ShapeSetup"]; id triangleFunction = [library newFunctionWithName:@"TriangleKernel"]; - id tilingFunction = [library newFunctionWithName:@"TileKernel"]; - id sortingFunction = [library newFunctionWithName:@"SortKernel"]; id computeFunction = [library newFunctionWithName:@"RenderKernel"]; id vertexFunction = [library newFunctionWithName:@"VertexShader"]; id fragmentFunction = [library newFunctionWithName:@"FragmentShader"]; @@ -566,6 +551,14 @@ mg_canvas_backend* mg_mtl_canvas_create(mg_surface surface) error:&error]; ASSERT(backend->computePipeline); + MTLComputePipelineDescriptor* shapePipelineDesc = [[MTLComputePipelineDescriptor alloc] init]; + shapePipelineDesc.computeFunction = shapeFunction; + + backend->shapePipeline = [metalSurface->device newComputePipelineStateWithDescriptor: shapePipelineDesc + options: MTLPipelineOptionNone + reflection: nil + error: &error]; + MTLComputePipelineDescriptor* trianglePipelineDesc = [[MTLComputePipelineDescriptor alloc] init]; trianglePipelineDesc.computeFunction = triangleFunction; @@ -574,22 +567,6 @@ mg_canvas_backend* mg_mtl_canvas_create(mg_surface surface) reflection: nil error: &error]; - MTLComputePipelineDescriptor* tilingPipelineDesc = [[MTLComputePipelineDescriptor alloc] init]; - tilingPipelineDesc.computeFunction = tilingFunction; - - backend->tilingPipeline = [metalSurface->device newComputePipelineStateWithDescriptor: tilingPipelineDesc - options: MTLPipelineOptionNone - reflection: nil - error: &error]; - - MTLComputePipelineDescriptor* sortingPipelineDesc = [[MTLComputePipelineDescriptor alloc] init]; - sortingPipelineDesc.computeFunction = sortingFunction; - - backend->sortingPipeline = [metalSurface->device newComputePipelineStateWithDescriptor: sortingPipelineDesc - options: MTLPipelineOptionNone - reflection: nil - error: &error]; - //----------------------------------------------------------- //NOTE(martin): setup our render pipeline state //----------------------------------------------------------- diff --git a/src/mtl_shader.h b/src/mtl_shader.h index e1ff8c5..abfa981 100644 --- a/src/mtl_shader.h +++ b/src/mtl_shader.h @@ -59,4 +59,32 @@ typedef struct mg_triangle_data } mg_triangle_data; +#ifndef __METAL_VERSION__ +#define device +#else +using namespace metal; +#endif + +typedef struct mg_tile_elt +{ + int triangleIndex; + int next; +} mg_queue_elt; + +typedef struct mg_tile_queue +{ + atomic_int first; +} mg_tile_queue; + +typedef struct mg_shape_queue +{ + vector_int4 area; + device mg_tile_queue* tileQueues; +} mg_shape_queue; + +#ifndef __METAL_VERSION__ +#undef device +#endif + + #endif //__MTL_RENDERER_H_ diff --git a/src/mtl_shader.metal b/src/mtl_shader.metal index 1a69deb..81399d3 100644 --- a/src/mtl_shader.metal +++ b/src/mtl_shader.metal @@ -46,13 +46,60 @@ int orient2d(int2 a, int2 b, int2 c) return((b.x-a.x)*(c.y-a.y) - (b.y-a.y)*(c.x-a.x)); } +device uchar* arena_allocate(device uchar* arenaBuffer, + device volatile atomic_uint* arenaOffset, + uint size) +{ + uint index = atomic_fetch_add_explicit(arenaOffset, size, memory_order_relaxed); + return(&arenaBuffer[index]); +} + +//NOTE: shape setup allocates tile queues for each shape + +kernel void ShapeSetup(constant mg_shape* shapeBuffer [[buffer(0)]], + device mg_shape_queue* shapeQueueBuffer [[buffer(1)]], + device uchar* arenaBuffer [[buffer(2)]], + device volatile atomic_uint* arenaOffset [[buffer(3)]], + constant float* scaling [[buffer(4)]], + uint gid [[thread_position_in_grid]]) +{ + + float4 box = shapeBuffer[gid].clip * scaling[0]; + + int2 firstTile = int2(box.xy)/RENDERER_TILE_SIZE; + + //WARN: the following can result in a 1x1 tile allocated even for empty boxes. But if we didn't allocate + // any tile queue, the tileQueues pointer for that shape would alias the tileQueues pointer of another + // shape, and we would have to detect that in the tiling and drawing kernels. Instead, just accept some + // waste and keep the other kernels more uniforms for now... + int nTilesX = int(box.z)/RENDERER_TILE_SIZE - firstTile.x + 1; + int nTilesY = int(box.w)/RENDERER_TILE_SIZE - firstTile.y + 1; + + int tileCount = nTilesX * nTilesY; + int tileArraySize = tileCount * sizeof(mg_tile_queue); + + shapeQueueBuffer[gid].area = int4(firstTile.x, firstTile.y, nTilesX, nTilesY); + shapeQueueBuffer[gid].tileQueues = (device mg_tile_queue*)arena_allocate(arenaBuffer, arenaOffset, tileArraySize); + + for(int i=0; i 0 ? 1 : -1; triangleArray[gid].cw = cw; @@ -103,92 +149,45 @@ kernel void TriangleKernel(constant mg_vertex* vertexBuffer [[buffer(0)]], triangleArray[gid].bias1 = is_top_left(p2, p0) ? -(1-cw)/2 : -(1+cw)/2; triangleArray[gid].bias2 = is_top_left(p0, p1) ? -(1-cw)/2 : -(1+cw)/2; - triangleArray[gid].tileBox = int4(fbox)/RENDERER_TILE_SIZE; -} + int4 tileBox = int4(fbox)/RENDERER_TILE_SIZE; + triangleArray[gid].tileBox = tileBox; -kernel void TileKernel(const device mg_triangle_data* triangleArray [[buffer(0)]], - device uint* tileCounters [[buffer(1)]], - device uint* tileArrayBuffer [[buffer(2)]], - constant int* triangleCount [[buffer(3)]], - constant uint2* viewport [[buffer(4)]], - constant float* scaling [[buffer(5)]], - uint3 gid [[thread_position_in_grid]]) -{ - uint2 tilesMatrixDim = (*viewport - 1) / RENDERER_TILE_SIZE + 1; - int nTilesX = tilesMatrixDim.x; - int tileX = gid.x; - int tileY = gid.y; - int tileIndex = tileY * nTilesX + tileX; - int groupIndex = gid.z; + //NOTE: bucket triangle into tiles + device mg_shape_queue* shapeQueue = &shapeQueueBuffer[shapeIndex]; - const int groupSize = 16; - int count = 0; - int mask = 0xffff>>(16-groupIndex); + int xMin = max(0, tileBox.x - shapeQueue->area.x); + int yMin = max(0, tileBox.y - shapeQueue->area.y); + int xMax = min(tileBox.z - shapeQueue->area.x, shapeQueue->area.z-1); + int yMax = min(tileBox.w - shapeQueue->area.y, shapeQueue->area.w-1); - for(int triangleBatchIndex=0; triangleBatchIndex= box.x && tileX <= box.z - && tileY >= box.y && tileY <= box.w) - { - active = true; - } - */ - } + int tileIndex = y*shapeQueue->area.z + x; - int vote = uint64_t(simd_ballot(active)); - if(active) - { - int batchOffset = popcount(vote & mask); - tileArrayBuffer[tileIndex*RENDERER_TILE_BUFFER_SIZE + count + batchOffset] = triangleIndex; - } - count += popcount(vote); - } - if(groupIndex == 0) - { - tileCounters[tileIndex] = count; - } -} + device mg_tile_queue* tileQueue = &shapeQueue->tileQueues[tileIndex]; + device mg_queue_elt* elt = (device mg_queue_elt*)arena_allocate(arenaBuffer, arenaOffset, sizeof(mg_queue_elt)); + int eltIndex = (device uchar*)elt - arenaBuffer; -kernel void SortKernel(constant mg_triangle_data* triangleArray [[buffer(0)]], - const device uint* tileCounters [[buffer(1)]], - device uint* tileArrayBuffer [[buffer(2)]], - uint gid [[thread_position_in_grid]]) -{ - uint tileIndex = gid; - uint tileArrayOffset = tileIndex * RENDERER_TILE_BUFFER_SIZE; - uint tileArrayCount = min(tileCounters[tileIndex], (uint)RENDERER_TILE_BUFFER_SIZE); + elt->next = atomic_exchange_explicit(&tileQueue->first, eltIndex, memory_order_relaxed); - for(uint tileArrayIndex=1; tileArrayIndex < tileArrayCount; tileArrayIndex++) - { - for(uint sortIndex = tileArrayIndex; sortIndex > 0; sortIndex--) - { - int shapeIndex = triangleArray[tileArrayBuffer[tileArrayOffset + sortIndex]].shapeIndex; - int prevShapeIndex = triangleArray[tileArrayBuffer[tileArrayOffset + sortIndex - 1]].shapeIndex; - - if(shapeIndex >= prevShapeIndex) - { - break; - } - uint tmp = tileArrayBuffer[tileArrayOffset + sortIndex]; - tileArrayBuffer[tileArrayOffset + sortIndex] = tileArrayBuffer[tileArrayOffset + sortIndex - 1]; - tileArrayBuffer[tileArrayOffset + sortIndex - 1] = tmp; + elt->triangleIndex = gid; } } } -kernel void RenderKernel(const device uint* tileCounters [[buffer(0)]], - const device uint* tileArrayBuffer [[buffer(1)]], - const device mg_triangle_data* triangleArray [[buffer(2)]], +kernel void RenderKernel(const device mg_shape_queue* shapeQueueBuffer [[buffer(0)]], + const device mg_triangle_data* triangleArray [[buffer(1)]], + const device uchar* arenaBuffer [[buffer(2)]], - constant int* useTexture [[buffer(3)]], - constant float* scaling [[buffer(4)]], + constant int* shapeCount [[buffer(3)]], + constant int* useTexture [[buffer(4)]], + constant float* scaling [[buffer(5)]], texture2d outTexture [[texture(0)]], texture2d texAtlas [[texture(1)]], @@ -200,37 +199,7 @@ kernel void RenderKernel(const device uint* tileCounters [[buffer(0)]], { //TODO: guard against thread group size not equal to tile size? const int2 pixelCoord = int2(gid); - const uint2 tileCoord = uint2(pixelCoord)/ RENDERER_TILE_SIZE; - const uint2 tilesMatrixDim = (gridSize - 1) / RENDERER_TILE_SIZE + 1; - const uint tileIndex = tileCoord.y * tilesMatrixDim.x + tileCoord.x; - const uint tileCounter = min(tileCounters[tileIndex], (uint)RENDERER_TILE_BUFFER_SIZE); - -#ifdef RENDERER_DEBUG_TILES - //NOTE(martin): color code debug values and show the tile grid - { - float4 fragColor = float4(0); - - if( pixelCoord.x % 16 == 0 - ||pixelCoord.y % 16 == 0) - { - fragColor = float4(0, 0, 0, 1); - } - else if(tileCounters[tileIndex] == 0xffffu) - { - fragColor = float4(1, 0, 1, 1); - } - else if(tileCounter != 0u) - { - fragColor = float4(0, 1, 0, 1); - } - else - { - fragColor = float4(1, 0, 0, 1); - } - outTexture.write(fragColor, gid); - return; - } -#endif + const int2 tileCoord = pixelCoord/ RENDERER_TILE_SIZE; const int subPixelFactor = 16; const int2 centerPoint = int2((float2(pixelCoord) + float2(0.5, 0.5)) * subPixelFactor); @@ -258,89 +227,108 @@ kernel void RenderKernel(const device uint* tileCounters [[buffer(0)]], currentColor[i] = float4(0, 0, 0, 0); } - for(uint tileArrayIndex=0; tileArrayIndex < tileCounter; tileArrayIndex++) + for(int shapeIndex = 0; shapeIndex < shapeCount[0]; shapeIndex++) { - int triangleIndex = tileArrayBuffer[RENDERER_TILE_BUFFER_SIZE * tileIndex + tileArrayIndex]; - const device mg_triangle_data* triangle = &triangleArray[triangleIndex]; + const device mg_shape_queue* shapeQueue = &shapeQueueBuffer[shapeIndex]; - int2 p0 = triangle->p0; - int2 p1 = triangle->p1; - int2 p2 = triangle->p2; - - int cw = triangle->cw; - - int bias0 = triangle->bias0; - int bias1 = triangle->bias1; - int bias2 = triangle->bias2; - - float4 cubic0 = triangle->cubic0; - float4 cubic1 = triangle->cubic1; - float4 cubic2 = triangle->cubic2; - - int shapeIndex = triangle->shapeIndex; - float4 color = triangle->color; - color.rgb *= color.a; - - int4 clip = triangle->box; - - matrix_float3x3 uvTransform = triangle->uvTransform; - - for(int sampleIndex = 0; sampleIndex < sampleCount; sampleIndex++) + // get the tile queue that corresponds to our tile in the shape area + int2 tileQueueCoord = tileCoord - shapeQueue->area.xy; + if( tileQueueCoord.x >= 0 + && tileQueueCoord.y >= 0 + && tileQueueCoord.x < shapeQueue->area.z + && tileQueueCoord.y < shapeQueue->area.w) { - int2 samplePoint = samplePoints[sampleIndex]; + int tileQueueIndex = tileQueueCoord.y * shapeQueue->area.z + tileQueueCoord.x; + device mg_tile_queue* tileQueue = &shapeQueue->tileQueues[tileQueueIndex]; - if( samplePoint.x < clip.x - || samplePoint.x > clip.z - || samplePoint.y < clip.y - || samplePoint.y > clip.w) + int firstEltIndex = atomic_load_explicit(&tileQueue->first, memory_order_relaxed); + device mg_queue_elt* elt = 0; + + for(int eltIndex = firstEltIndex; eltIndex >= 0; eltIndex = elt->next) { - continue; - } + elt = (device mg_queue_elt*)(arenaBuffer + eltIndex); + const device mg_triangle_data* triangle = &triangleArray[elt->triangleIndex]; - int w0 = cw*orient2d(p1, p2, samplePoint); - int w1 = cw*orient2d(p2, p0, samplePoint); - int w2 = cw*orient2d(p0, p1, samplePoint); + int2 p0 = triangle->p0; + int2 p1 = triangle->p1; + int2 p2 = triangle->p2; - if((w0+bias0) >= 0 && (w1+bias1) >= 0 && (w2+bias2) >= 0) - { - float4 cubic = (cubic0*w0 + cubic1*w1 + cubic2*w2)/(w0+w1+w2); + int cw = triangle->cw; - float eps = 0.0001; - if(cubic.w*(cubic.x*cubic.x*cubic.x - cubic.y*cubic.z) <= eps) + int bias0 = triangle->bias0; + int bias1 = triangle->bias1; + int bias2 = triangle->bias2; + + float4 cubic0 = triangle->cubic0; + float4 cubic1 = triangle->cubic1; + float4 cubic2 = triangle->cubic2; + + int shapeIndex = triangle->shapeIndex; + float4 color = triangle->color; + color.rgb *= color.a; + + int4 clip = triangle->box; + + matrix_float3x3 uvTransform = triangle->uvTransform; + + for(int sampleIndex = 0; sampleIndex < sampleCount; sampleIndex++) { - if(shapeIndex == currentShapeIndex[sampleIndex]) + int2 samplePoint = samplePoints[sampleIndex]; + + if( samplePoint.x < clip.x + || samplePoint.x > clip.z + || samplePoint.y < clip.y + || samplePoint.y > clip.w) { - flipCount[sampleIndex]++; + continue; } - else + + int w0 = cw*orient2d(p1, p2, samplePoint); + int w1 = cw*orient2d(p2, p0, samplePoint); + int w2 = cw*orient2d(p0, p1, samplePoint); + + if((w0+bias0) >= 0 && (w1+bias1) >= 0 && (w2+bias2) >= 0) { - if(flipCount[sampleIndex] & 0x01) + float4 cubic = (cubic0*w0 + cubic1*w1 + cubic2*w2)/(w0+w1+w2); + + float eps = 0.0001; + if(cubic.w*(cubic.x*cubic.x*cubic.x - cubic.y*cubic.z) <= eps) { - sampleColor[sampleIndex] = currentColor[sampleIndex]; + if(shapeIndex == currentShapeIndex[sampleIndex]) + { + flipCount[sampleIndex]++; + } + else + { + if(flipCount[sampleIndex] & 0x01) + { + sampleColor[sampleIndex] = currentColor[sampleIndex]; + } + + float4 nextColor = color; + + if(useTexture[0]) + { + float3 sampleFP = float3(float2(samplePoint).xy/(subPixelFactor*2.), 1); + float2 uv = (uvTransform * sampleFP).xy; + + constexpr sampler smp(mip_filter::nearest, mag_filter::linear, min_filter::linear); + float4 texColor = texAtlas.sample(smp, uv); + + texColor.rgb *= texColor.a; + nextColor *= texColor; + } + + currentColor[sampleIndex] = sampleColor[sampleIndex]*(1.-nextColor.a) + nextColor; + currentShapeIndex[sampleIndex] = shapeIndex; + flipCount[sampleIndex] = 1; + } } - - float4 nextColor = color; - - if(useTexture[0]) - { - float3 sampleFP = float3(float2(samplePoint).xy/(subPixelFactor*2.), 1); - float2 uv = (uvTransform * sampleFP).xy; - - constexpr sampler smp(mip_filter::nearest, mag_filter::linear, min_filter::linear); - float4 texColor = texAtlas.sample(smp, uv); - - texColor.rgb *= texColor.a; - nextColor *= texColor; - } - - currentColor[sampleIndex] = sampleColor[sampleIndex]*(1.-nextColor.a) + nextColor; - currentShapeIndex[sampleIndex] = shapeIndex; - flipCount[sampleIndex] = 1; } } } } - } + } float4 pixelColor = float4(0); for(int sampleIndex = 0; sampleIndex < sampleCount; sampleIndex++) @@ -353,4 +341,5 @@ kernel void RenderKernel(const device uint* tileCounters [[buffer(0)]], } outTexture.write(pixelColor/float(sampleCount), gid); + }