[osx, canvas] Only dispatch raster shader for tiles that are overlapped by at least one path.

This commit is contained in:
Martin Fouilleul 2023-07-10 20:20:18 +02:00
parent bfc7530bcf
commit 9e8a0f5f69
3 changed files with 67 additions and 29 deletions

View File

@ -97,4 +97,11 @@ typedef struct mg_mtl_tile_queue
} mg_mtl_tile_queue;
typedef struct mg_mtl_screen_tile
{
vector_uint2 tileCoord;
int first;
} mg_mtl_screen_tile;
#endif //__MTL_RENDERER_H_

View File

@ -52,6 +52,7 @@ typedef struct mg_mtl_canvas_backend
id<MTLBuffer> tileOpBuffer;
id<MTLBuffer> tileOpCountBuffer;
id<MTLBuffer> screenTilesBuffer;
id<MTLBuffer> rasterDispatchBuffer;
int msaaCount;
vec2 frameSize;
@ -815,12 +816,24 @@ void mg_mtl_render_batch(mg_mtl_canvas_backend* backend,
//NOTE: encode GPU commands
@autoreleasepool
{
//NOTE: create output texture
MTLRenderPassDescriptor* clearDescriptor = [MTLRenderPassDescriptor renderPassDescriptor];
clearDescriptor.colorAttachments[0].texture = backend->outTexture;
clearDescriptor.colorAttachments[0].loadAction = MTLLoadActionClear;
clearDescriptor.colorAttachments[0].clearColor = MTLClearColorMake(0, 0, 0, 0);
clearDescriptor.colorAttachments[0].storeAction = MTLStoreActionStore;
id<MTLRenderCommandEncoder> clearEncoder = [surface->commandBuffer renderCommandEncoderWithDescriptor:clearDescriptor];
clearEncoder.label = @"clear out texture pass";
[clearEncoder endEncoding];
//NOTE: clear counters
id<MTLBlitCommandEncoder> blitEncoder = [surface->commandBuffer blitCommandEncoder];
blitEncoder.label = @"clear counters";
[blitEncoder fillBuffer: backend->segmentCountBuffer range: NSMakeRange(0, sizeof(int)) value: 0];
[blitEncoder fillBuffer: backend->tileQueueCountBuffer range: NSMakeRange(0, sizeof(int)) value: 0];
[blitEncoder fillBuffer: backend->tileOpCountBuffer range: NSMakeRange(0, sizeof(int)) value: 0];
[blitEncoder fillBuffer: backend->rasterDispatchBuffer range: NSMakeRange(0, sizeof(MTLDispatchThreadgroupsIndirectArguments)) value: 0];
[blitEncoder endEncoding];
//NOTE: path setup pass
@ -893,11 +906,12 @@ void mg_mtl_render_batch(mg_mtl_canvas_backend* backend,
[mergeEncoder setBuffer:backend->tileQueueBuffer offset:0 atIndex:3];
[mergeEncoder setBuffer:backend->tileOpBuffer offset:0 atIndex:4];
[mergeEncoder setBuffer:backend->tileOpCountBuffer offset:0 atIndex:5];
[mergeEncoder setBuffer:backend->screenTilesBuffer offset:0 atIndex:6];
[mergeEncoder setBytes:&tileSize length:sizeof(int) atIndex:7];
[mergeEncoder setBytes:&scale length:sizeof(float) atIndex:8];
[mergeEncoder setBuffer:backend->logBuffer[backend->bufferIndex] offset:0 atIndex:9];
[mergeEncoder setBuffer:backend->logOffsetBuffer[backend->bufferIndex] offset:0 atIndex:10];
[mergeEncoder setBuffer:backend->rasterDispatchBuffer offset:0 atIndex:6];
[mergeEncoder setBuffer:backend->screenTilesBuffer offset:0 atIndex:7];
[mergeEncoder setBytes:&tileSize length:sizeof(int) atIndex:8];
[mergeEncoder setBytes:&scale length:sizeof(float) atIndex:9];
[mergeEncoder setBuffer:backend->logBuffer[backend->bufferIndex] offset:0 atIndex:10];
[mergeEncoder setBuffer:backend->logOffsetBuffer[backend->bufferIndex] offset:0 atIndex:11];
MTLSize mergeGridSize = MTLSizeMake(nTilesX, nTilesY, 1);
MTLSize mergeGroupSize = MTLSizeMake(16, 16, 1);
@ -933,7 +947,11 @@ void mg_mtl_render_batch(mg_mtl_canvas_backend* backend,
MTLSize rasterGridSize = MTLSizeMake(viewportSize.x, viewportSize.y, 1);
MTLSize rasterGroupSize = MTLSizeMake(16, 16, 1);
[rasterEncoder dispatchThreads: rasterGridSize threadsPerThreadgroup: rasterGroupSize];
// [rasterEncoder dispatchThreads: rasterGridSize threadsPerThreadgroup: rasterGroupSize];
[rasterEncoder dispatchThreadgroupsWithIndirectBuffer: backend->rasterDispatchBuffer
indirectBufferOffset: 0
threadsPerThreadgroup: rasterGroupSize];
[rasterEncoder endEncoding];
@ -970,7 +988,7 @@ void mg_mtl_canvas_resize(mg_mtl_canvas_backend* backend, vec2 size)
int nTilesX = (int)(size.x + tileSize - 1)/tileSize;
int nTilesY = (int)(size.y + tileSize - 1)/tileSize;
MTLResourceOptions bufferOptions = MTLResourceStorageModePrivate;
backend->screenTilesBuffer = [backend->surface->device newBufferWithLength: nTilesX*nTilesY*sizeof(int)
backend->screenTilesBuffer = [backend->surface->device newBufferWithLength: nTilesX*nTilesY*sizeof(mg_mtl_screen_tile)
options: bufferOptions];
if(backend->outTexture)
@ -1435,10 +1453,13 @@ mg_canvas_backend* mtl_canvas_backend_create(mg_mtl_surface* surface)
backend->tileOpCountBuffer = [surface->device newBufferWithLength: sizeof(int)
options: bufferOptions];
backend->rasterDispatchBuffer = [surface->device newBufferWithLength: sizeof(MTLDispatchThreadgroupsIndirectArguments)
options: bufferOptions];
int tileSize = MG_MTL_TILE_SIZE;
int nTilesX = (int)(frame.w * scale + tileSize - 1)/tileSize;
int nTilesY = (int)(frame.h * scale + tileSize - 1)/tileSize;
backend->screenTilesBuffer = [surface->device newBufferWithLength: nTilesX*nTilesY*sizeof(int)
backend->screenTilesBuffer = [surface->device newBufferWithLength: nTilesX*nTilesY*sizeof(mg_mtl_screen_tile)
options: bufferOptions];
bufferOptions = MTLResourceStorageModeShared;

View File

@ -1325,24 +1325,27 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]],
const device mg_mtl_tile_queue* tileQueueBuffer [[buffer(3)]],
device mg_mtl_tile_op* tileOpBuffer [[buffer(4)]],
device atomic_int* tileOpCount [[buffer(5)]],
device int* screenTilesBuffer [[buffer(6)]],
constant int* tileSize [[buffer(7)]],
constant float* scale [[buffer(8)]],
device char* logBuffer [[buffer(9)]],
device atomic_int* logOffsetBuffer [[buffer(10)]],
device MTLDispatchThreadgroupsIndirectArguments* dispatchBuffer [[buffer(6)]],
device mg_mtl_screen_tile* screenTilesBuffer [[buffer(7)]],
constant int* tileSize [[buffer(8)]],
constant float* scale [[buffer(9)]],
device char* logBuffer [[buffer(10)]],
device atomic_int* logOffsetBuffer [[buffer(11)]],
uint2 threadCoord [[thread_position_in_grid]],
uint2 gridSize [[threads_per_grid]])
{
int2 tileCoord = int2(threadCoord);
int tileIndex = tileCoord.y * gridSize.x + tileCoord.x;
device int* nextLink = &screenTilesBuffer[tileIndex];
*nextLink = -1;
int tileIndex = -1;
device int* nextLink = 0;
/*
mtl_log_context log = {.buffer = logBuffer,
.offset = logOffsetBuffer,
.enabled = true};
*/
dispatchBuffer[0].threadgroupsPerGrid[1] = 1;
dispatchBuffer[0].threadgroupsPerGrid[2] = 1;
for(int pathIndex = 0; pathIndex < pathCount[0]; pathIndex++)
{
const device mg_mtl_path_queue* pathQueue = &pathQueueBuffer[pathIndex];
@ -1350,14 +1353,22 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]],
const device mg_mtl_path* path = &pathBuffer[pathIndex];
float xMax = min(path->box.z, path->clip.z);
int tileMax = xMax * scale[0] / tileSize[0];
int pathTileMax = tileMax - pathQueue->area.x;
int tileMaxX = xMax * scale[0] / tileSize[0];
int pathTileMaxX = tileMaxX - pathQueue->area.x;
if( pathTileCoord.x >= 0
&& pathTileCoord.x <= pathTileMax
&& pathTileCoord.x <= pathTileMaxX
&& pathTileCoord.y >= 0
&& pathTileCoord.y < pathQueue->area.w)
{
if(tileIndex < 0)
{
tileIndex = atomic_fetch_add_explicit((device atomic_uint*)&dispatchBuffer[0].threadgroupsPerGrid[0], 1, memory_order_relaxed);
screenTilesBuffer[tileIndex].tileCoord = uint2(tileCoord);
nextLink = &screenTilesBuffer[tileIndex].first;
*nextLink = -1;
}
int pathTileIndex = pathTileCoord.y * pathQueue->area.z + pathTileCoord.x;
const device mg_mtl_tile_queue* tileQueue = &tileQueueBuffer[pathQueue->tileQueues + pathTileIndex];
@ -1399,7 +1410,7 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]],
if(pathBuffer[pathIndex].color.a == 1)
{
screenTilesBuffer[tileIndex] = pathOpIndex;
screenTilesBuffer[tileIndex].first = pathOpIndex;
}
}
nextLink = &pathOp->next;
@ -1441,7 +1452,7 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]],
}
}
kernel void mtl_raster(const device int* screenTilesBuffer [[buffer(0)]],
kernel void mtl_raster(const device mg_mtl_screen_tile* screenTilesBuffer [[buffer(0)]],
const device mg_mtl_tile_op* tileOpBuffer [[buffer(1)]],
const device mg_mtl_path* pathBuffer [[buffer(2)]],
const device mg_mtl_segment* segmentBuffer [[buffer(3)]],
@ -1453,18 +1464,19 @@ kernel void mtl_raster(const device int* screenTilesBuffer [[buffer(0)]],
constant int* useTexture [[buffer(9)]],
texture2d<float, access::write> outTexture [[texture(0)]],
texture2d<float> srcTexture [[texture(1)]],
uint2 threadCoord [[thread_position_in_grid]],
uint2 gridSize [[threads_per_grid]])
uint2 threadGroupCoord [[threadgroup_position_in_grid]],
uint2 localCoord [[thread_position_in_threadgroup]])
{
/*
mtl_log_context log = {.buffer = logBuffer,
.offset = logOffsetBuffer,
.enabled = true};
*/
uint2 pixelCoord = threadCoord;
int2 tileCoord = int2(pixelCoord) / tileSize[0];
int nTilesX = (int(gridSize.x) + tileSize[0] - 1)/tileSize[0];
int tileIndex = tileCoord.y * nTilesX + tileCoord.x;
int tileIndex = int(threadGroupCoord.x);
uint2 tileCoord = screenTilesBuffer[tileIndex].tileCoord;
uint2 pixelCoord = tileCoord*tileSize[0] + localCoord.xy;
int opIndex = screenTilesBuffer[tileIndex].first;
const int MG_MTL_MAX_SAMPLE_COUNT = 8;
float2 sampleCoords[MG_MTL_MAX_SAMPLE_COUNT];
@ -1500,7 +1512,6 @@ kernel void mtl_raster(const device int* screenTilesBuffer [[buffer(0)]],
float4 color = {0};
int winding[MG_MTL_MAX_SAMPLE_COUNT] = {0};
int opIndex = screenTilesBuffer[tileIndex];
while(opIndex != -1)
{
@ -1600,7 +1611,6 @@ kernel void mtl_raster(const device int* screenTilesBuffer [[buffer(0)]],
}
opIndex = op->next;
}
outTexture.write(color, pixelCoord);
}