[mtl canvas, exp] try tiling per tile and avoiding sorting pass (worse on simple shapes and text, but degrades slower with complex scenes (eg tiger). Not great though)
This commit is contained in:
parent
92f4909d63
commit
11113f597c
|
@ -34,6 +34,7 @@ typedef struct mg_mtl_canvas_backend
|
||||||
mg_color clearColor;
|
mg_color clearColor;
|
||||||
|
|
||||||
// permanent metal resources
|
// permanent metal resources
|
||||||
|
id<MTLComputePipelineState> trianglePipeline;
|
||||||
id<MTLComputePipelineState> tilingPipeline;
|
id<MTLComputePipelineState> tilingPipeline;
|
||||||
id<MTLComputePipelineState> sortingPipeline;
|
id<MTLComputePipelineState> sortingPipeline;
|
||||||
id<MTLComputePipelineState> computePipeline;
|
id<MTLComputePipelineState> computePipeline;
|
||||||
|
@ -206,6 +207,9 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
|
||||||
{
|
{
|
||||||
f32 scale = surface->mtlLayer.contentsScale;
|
f32 scale = surface->mtlLayer.contentsScale;
|
||||||
vector_uint2 viewportSize = {backend->viewPort.w * scale, backend->viewPort.h * scale};
|
vector_uint2 viewportSize = {backend->viewPort.w * scale, backend->viewPort.h * scale};
|
||||||
|
u32 nTilesX = (viewportSize.x + RENDERER_TILE_SIZE - 1)/RENDERER_TILE_SIZE;
|
||||||
|
u32 nTilesY = (viewportSize.y + RENDERER_TILE_SIZE - 1)/RENDERER_TILE_SIZE;
|
||||||
|
int triangleCount = indexCount/3;
|
||||||
|
|
||||||
//-----------------------------------------------------------
|
//-----------------------------------------------------------
|
||||||
//NOTE(martin): encode the clear counter
|
//NOTE(martin): encode the clear counter
|
||||||
|
@ -215,6 +219,25 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
|
||||||
[blitEncoder fillBuffer: backend->tileCounters range: NSMakeRange(0, RENDERER_MAX_TILES*sizeof(uint)) value: 0];
|
[blitEncoder fillBuffer: backend->tileCounters range: NSMakeRange(0, RENDERER_MAX_TILES*sizeof(uint)) value: 0];
|
||||||
[blitEncoder endEncoding];
|
[blitEncoder endEncoding];
|
||||||
|
|
||||||
|
//-----------------------------------------------------------
|
||||||
|
//NOTE(martin): encode the triangle prepass
|
||||||
|
//-----------------------------------------------------------
|
||||||
|
id<MTLComputeCommandEncoder> triangleEncoder = [surface->commandBuffer computeCommandEncoder];
|
||||||
|
triangleEncoder.label = @"triangle pass";
|
||||||
|
[triangleEncoder setComputePipelineState: backend->trianglePipeline];
|
||||||
|
[triangleEncoder setBuffer: backend->vertexBuffer[backend->bufferIndex] offset:backend->vertexBufferOffset atIndex: 0];
|
||||||
|
[triangleEncoder setBuffer: backend->indexBuffer[backend->bufferIndex] offset:backend->indexBufferOffset atIndex: 1];
|
||||||
|
[triangleEncoder setBuffer: backend->shapeBuffer[backend->bufferIndex] offset:backend->shapeBufferOffset atIndex: 2];
|
||||||
|
[triangleEncoder setBuffer: backend->triangleArray offset:0 atIndex: 3];
|
||||||
|
|
||||||
|
[triangleEncoder setBytes: &scale length: sizeof(float) atIndex: 4];
|
||||||
|
|
||||||
|
MTLSize triangleGroupSize = MTLSizeMake(backend->trianglePipeline.maxTotalThreadsPerThreadgroup, 1, 1);
|
||||||
|
MTLSize triangleGridSize = MTLSizeMake(triangleCount, 1, 1);
|
||||||
|
|
||||||
|
[triangleEncoder dispatchThreads: triangleGridSize threadsPerThreadgroup: triangleGroupSize];
|
||||||
|
[triangleEncoder endEncoding];
|
||||||
|
|
||||||
//-----------------------------------------------------------
|
//-----------------------------------------------------------
|
||||||
//NOTE(martin): encode the tiling pass
|
//NOTE(martin): encode the tiling pass
|
||||||
//-----------------------------------------------------------
|
//-----------------------------------------------------------
|
||||||
|
@ -222,18 +245,16 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
|
||||||
id<MTLComputeCommandEncoder> tileEncoder = [surface->commandBuffer computeCommandEncoder];
|
id<MTLComputeCommandEncoder> tileEncoder = [surface->commandBuffer computeCommandEncoder];
|
||||||
tileEncoder.label = @"tiling pass";
|
tileEncoder.label = @"tiling pass";
|
||||||
[tileEncoder setComputePipelineState: backend->tilingPipeline];
|
[tileEncoder setComputePipelineState: backend->tilingPipeline];
|
||||||
[tileEncoder setBuffer: backend->vertexBuffer[backend->bufferIndex] offset:backend->vertexBufferOffset atIndex: 0];
|
[tileEncoder setBuffer: backend->triangleArray offset:0 atIndex: 0];
|
||||||
[tileEncoder setBuffer: backend->indexBuffer[backend->bufferIndex] offset:backend->indexBufferOffset atIndex: 1];
|
[tileEncoder setBuffer: backend->tileCounters offset:0 atIndex: 1];
|
||||||
[tileEncoder setBuffer: backend->shapeBuffer[backend->bufferIndex] offset:backend->shapeBufferOffset atIndex: 2];
|
[tileEncoder setBuffer: backend->tileArrayBuffer offset:0 atIndex: 2];
|
||||||
[tileEncoder setBuffer: backend->tileCounters offset:0 atIndex: 3];
|
|
||||||
[tileEncoder setBuffer: backend->tileArrayBuffer offset:0 atIndex: 4];
|
|
||||||
[tileEncoder setBuffer: backend->triangleArray offset:0 atIndex: 5];
|
|
||||||
|
|
||||||
[tileEncoder setBytes: &viewportSize length: sizeof(vector_uint2) atIndex: 6];
|
[tileEncoder setBytes: &triangleCount length:sizeof(int) atIndex: 3];
|
||||||
[tileEncoder setBytes: &scale length: sizeof(float) atIndex: 7];
|
[tileEncoder setBytes: &viewportSize length: sizeof(vector_uint2) atIndex: 4];
|
||||||
|
[tileEncoder setBytes: &scale length: sizeof(float) atIndex: 5];
|
||||||
|
|
||||||
MTLSize tileGroupSize = MTLSizeMake(backend->tilingPipeline.maxTotalThreadsPerThreadgroup, 1, 1);
|
MTLSize tileGroupSize = MTLSizeMake(1, 1, 16);
|
||||||
MTLSize tileGridSize = MTLSizeMake(indexCount/3, 1, 1);
|
MTLSize tileGridSize = MTLSizeMake(nTilesX, nTilesY, 16);
|
||||||
|
|
||||||
[tileEncoder dispatchThreads: tileGridSize threadsPerThreadgroup: tileGroupSize];
|
[tileEncoder dispatchThreads: tileGridSize threadsPerThreadgroup: tileGroupSize];
|
||||||
[tileEncoder endEncoding];
|
[tileEncoder endEncoding];
|
||||||
|
@ -241,7 +262,7 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
|
||||||
//-----------------------------------------------------------
|
//-----------------------------------------------------------
|
||||||
//NOTE(martin): encode the sorting pass
|
//NOTE(martin): encode the sorting pass
|
||||||
//-----------------------------------------------------------
|
//-----------------------------------------------------------
|
||||||
|
/*
|
||||||
id<MTLComputeCommandEncoder> sortEncoder = [surface->commandBuffer computeCommandEncoder];
|
id<MTLComputeCommandEncoder> sortEncoder = [surface->commandBuffer computeCommandEncoder];
|
||||||
sortEncoder.label = @"sorting pass";
|
sortEncoder.label = @"sorting pass";
|
||||||
[sortEncoder setComputePipelineState: backend->sortingPipeline];
|
[sortEncoder setComputePipelineState: backend->sortingPipeline];
|
||||||
|
@ -257,7 +278,7 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
|
||||||
|
|
||||||
[sortEncoder dispatchThreads: sortGridSize threadsPerThreadgroup: sortGroupSize];
|
[sortEncoder dispatchThreads: sortGridSize threadsPerThreadgroup: sortGroupSize];
|
||||||
[sortEncoder endEncoding];
|
[sortEncoder endEncoding];
|
||||||
|
*/
|
||||||
//-----------------------------------------------------------
|
//-----------------------------------------------------------
|
||||||
//NOTE(martin): encode drawing pass
|
//NOTE(martin): encode drawing pass
|
||||||
//-----------------------------------------------------------
|
//-----------------------------------------------------------
|
||||||
|
@ -530,6 +551,7 @@ mg_canvas_backend* mg_mtl_canvas_create(mg_surface surface)
|
||||||
LOG_ERROR("error : %s\n", errStr);
|
LOG_ERROR("error : %s\n", errStr);
|
||||||
return(0);
|
return(0);
|
||||||
}
|
}
|
||||||
|
id<MTLFunction> triangleFunction = [library newFunctionWithName:@"TriangleKernel"];
|
||||||
id<MTLFunction> tilingFunction = [library newFunctionWithName:@"TileKernel"];
|
id<MTLFunction> tilingFunction = [library newFunctionWithName:@"TileKernel"];
|
||||||
id<MTLFunction> sortingFunction = [library newFunctionWithName:@"SortKernel"];
|
id<MTLFunction> sortingFunction = [library newFunctionWithName:@"SortKernel"];
|
||||||
id<MTLFunction> computeFunction = [library newFunctionWithName:@"RenderKernel"];
|
id<MTLFunction> computeFunction = [library newFunctionWithName:@"RenderKernel"];
|
||||||
|
@ -544,9 +566,16 @@ mg_canvas_backend* mg_mtl_canvas_create(mg_surface surface)
|
||||||
error:&error];
|
error:&error];
|
||||||
ASSERT(backend->computePipeline);
|
ASSERT(backend->computePipeline);
|
||||||
|
|
||||||
|
MTLComputePipelineDescriptor* trianglePipelineDesc = [[MTLComputePipelineDescriptor alloc] init];
|
||||||
|
trianglePipelineDesc.computeFunction = triangleFunction;
|
||||||
|
|
||||||
|
backend->trianglePipeline = [metalSurface->device newComputePipelineStateWithDescriptor: trianglePipelineDesc
|
||||||
|
options: MTLPipelineOptionNone
|
||||||
|
reflection: nil
|
||||||
|
error: &error];
|
||||||
|
|
||||||
MTLComputePipelineDescriptor* tilingPipelineDesc = [[MTLComputePipelineDescriptor alloc] init];
|
MTLComputePipelineDescriptor* tilingPipelineDesc = [[MTLComputePipelineDescriptor alloc] init];
|
||||||
tilingPipelineDesc.computeFunction = tilingFunction;
|
tilingPipelineDesc.computeFunction = tilingFunction;
|
||||||
// tilingPipelineDesc.threadGroupSizeIsMultipleOfThreadExecutionWidth = true;
|
|
||||||
|
|
||||||
backend->tilingPipeline = [metalSurface->device newComputePipelineStateWithDescriptor: tilingPipelineDesc
|
backend->tilingPipeline = [metalSurface->device newComputePipelineStateWithDescriptor: tilingPipelineDesc
|
||||||
options: MTLPipelineOptionNone
|
options: MTLPipelineOptionNone
|
||||||
|
@ -555,7 +584,6 @@ mg_canvas_backend* mg_mtl_canvas_create(mg_surface surface)
|
||||||
|
|
||||||
MTLComputePipelineDescriptor* sortingPipelineDesc = [[MTLComputePipelineDescriptor alloc] init];
|
MTLComputePipelineDescriptor* sortingPipelineDesc = [[MTLComputePipelineDescriptor alloc] init];
|
||||||
sortingPipelineDesc.computeFunction = sortingFunction;
|
sortingPipelineDesc.computeFunction = sortingFunction;
|
||||||
// sortingPipelineDesc.threadGroupSizeIsMultipleOfThreadExecutionWidth = true;
|
|
||||||
|
|
||||||
backend->sortingPipeline = [metalSurface->device newComputePipelineStateWithDescriptor: sortingPipelineDesc
|
backend->sortingPipeline = [metalSurface->device newComputePipelineStateWithDescriptor: sortingPipelineDesc
|
||||||
options: MTLPipelineOptionNone
|
options: MTLPipelineOptionNone
|
||||||
|
|
|
@ -43,6 +43,7 @@ typedef struct mg_triangle_data
|
||||||
vector_float4 cubic2;
|
vector_float4 cubic2;
|
||||||
|
|
||||||
vector_int4 box;
|
vector_int4 box;
|
||||||
|
vector_int4 tileBox;
|
||||||
|
|
||||||
vector_int2 p0;
|
vector_int2 p0;
|
||||||
vector_int2 p1;
|
vector_int2 p1;
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
|
|
||||||
#include<metal_stdlib>
|
#include<metal_stdlib>
|
||||||
#include<simd/simd.h>
|
#include<simd/simd.h>
|
||||||
|
#include<metal_simdgroup>
|
||||||
|
|
||||||
#include"mtl_shader.h"
|
#include"mtl_shader.h"
|
||||||
|
|
||||||
|
@ -45,21 +46,13 @@ int orient2d(int2 a, int2 b, int2 c)
|
||||||
return((b.x-a.x)*(c.y-a.y) - (b.y-a.y)*(c.x-a.x));
|
return((b.x-a.x)*(c.y-a.y) - (b.y-a.y)*(c.x-a.x));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
kernel void TriangleKernel(constant mg_vertex* vertexBuffer [[buffer(0)]],
|
||||||
kernel void TileKernel(constant mg_vertex* vertexBuffer [[buffer(0)]],
|
|
||||||
constant uint* indexBuffer [[buffer(1)]],
|
constant uint* indexBuffer [[buffer(1)]],
|
||||||
constant mg_shape* shapeBuffer [[buffer(2)]],
|
constant mg_shape* shapeBuffer [[buffer(2)]],
|
||||||
device volatile atomic_uint* tileCounters [[buffer(3)]],
|
device mg_triangle_data* triangleArray [[buffer(3)]],
|
||||||
device uint* tileArrayBuffer [[buffer(4)]],
|
constant float* scaling [[buffer(4)]],
|
||||||
device mg_triangle_data* triangleArray [[buffer(5)]],
|
|
||||||
constant uint2* viewport [[buffer(6)]],
|
|
||||||
constant float* scaling [[buffer(7)]],
|
|
||||||
uint gid [[thread_position_in_grid]])
|
uint gid [[thread_position_in_grid]])
|
||||||
{
|
{
|
||||||
uint2 tilesMatrixDim = (*viewport - 1) / RENDERER_TILE_SIZE + 1;
|
|
||||||
int nTilesX = tilesMatrixDim.x;
|
|
||||||
int nTilesY = tilesMatrixDim.y;
|
|
||||||
|
|
||||||
uint triangleIndex = gid * 3;
|
uint triangleIndex = gid * 3;
|
||||||
|
|
||||||
uint i0 = indexBuffer[triangleIndex];
|
uint i0 = indexBuffer[triangleIndex];
|
||||||
|
@ -110,26 +103,56 @@ kernel void TileKernel(constant mg_vertex* vertexBuffer [[buffer(0)]],
|
||||||
triangleArray[gid].bias1 = is_top_left(p2, p0) ? -(1-cw)/2 : -(1+cw)/2;
|
triangleArray[gid].bias1 = is_top_left(p2, p0) ? -(1-cw)/2 : -(1+cw)/2;
|
||||||
triangleArray[gid].bias2 = is_top_left(p0, p1) ? -(1-cw)/2 : -(1+cw)/2;
|
triangleArray[gid].bias2 = is_top_left(p0, p1) ? -(1-cw)/2 : -(1+cw)/2;
|
||||||
|
|
||||||
//NOTE(martin): it's important to do the computation with signed int, so that we can have negative xMax/yMax
|
triangleArray[gid].tileBox = int4(fbox)/RENDERER_TILE_SIZE;
|
||||||
// otherwise all triangles on the left or below the x/y axis are attributed to tiles on row/column 0.
|
}
|
||||||
int4 tileBox = int4(fbox)/RENDERER_TILE_SIZE;
|
|
||||||
|
|
||||||
int xMin = max(0, tileBox.x);
|
kernel void TileKernel(const device mg_triangle_data* triangleArray [[buffer(0)]],
|
||||||
int yMin = max(0, tileBox.y);
|
device uint* tileCounters [[buffer(1)]],
|
||||||
int xMax = min(tileBox.z, nTilesX-1);
|
device uint* tileArrayBuffer [[buffer(2)]],
|
||||||
int yMax = min(tileBox.w, nTilesY-1);
|
constant int* triangleCount [[buffer(3)]],
|
||||||
|
constant uint2* viewport [[buffer(4)]],
|
||||||
|
constant float* scaling [[buffer(5)]],
|
||||||
|
uint3 gid [[thread_position_in_grid]])
|
||||||
|
{
|
||||||
|
uint2 tilesMatrixDim = (*viewport - 1) / RENDERER_TILE_SIZE + 1;
|
||||||
|
int nTilesX = tilesMatrixDim.x;
|
||||||
|
|
||||||
for(int y = yMin; y <= yMax; y++)
|
int tileX = gid.x;
|
||||||
|
int tileY = gid.y;
|
||||||
|
int tileIndex = tileY * nTilesX + tileX;
|
||||||
|
int groupIndex = gid.z;
|
||||||
|
|
||||||
|
const int groupSize = 16;
|
||||||
|
int count = 0;
|
||||||
|
int mask = 0xffff>>(16-groupIndex);
|
||||||
|
|
||||||
|
for(int triangleBatchIndex=0; triangleBatchIndex<triangleCount[0]; triangleBatchIndex += groupSize)
|
||||||
{
|
{
|
||||||
for(int x = xMin ; x <= xMax; x++)
|
int triangleIndex = triangleBatchIndex + groupIndex;
|
||||||
|
bool active = false;
|
||||||
|
// if(triangleIndex + groupIndex < triangleCount[0])
|
||||||
{
|
{
|
||||||
int tileIndex = y*nTilesX + x;
|
int4 box = triangleArray[triangleIndex].tileBox;
|
||||||
uint counter = atomic_fetch_add_explicit(&(tileCounters[tileIndex]), 1, memory_order_relaxed);
|
/*
|
||||||
if(counter < RENDERER_TILE_BUFFER_SIZE)
|
if( tileX >= box.x && tileX <= box.z
|
||||||
|
&& tileY >= box.y && tileY <= box.w)
|
||||||
{
|
{
|
||||||
tileArrayBuffer[tileIndex*RENDERER_TILE_BUFFER_SIZE + counter] = gid;
|
active = true;
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int vote = uint64_t(simd_ballot(active));
|
||||||
|
if(active)
|
||||||
|
{
|
||||||
|
int batchOffset = popcount(vote & mask);
|
||||||
|
tileArrayBuffer[tileIndex*RENDERER_TILE_BUFFER_SIZE + count + batchOffset] = triangleIndex;
|
||||||
|
}
|
||||||
|
count += popcount(vote);
|
||||||
|
}
|
||||||
|
if(groupIndex == 0)
|
||||||
|
{
|
||||||
|
tileCounters[tileIndex] = count;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue