[osx, canvas] Only dispatch raster shader for tiles that are overlapped by at least one path.

This commit is contained in:
Martin Fouilleul 2023-07-10 20:20:18 +02:00
parent bfc7530bcf
commit 9e8a0f5f69
3 changed files with 67 additions and 29 deletions

View File

@ -97,4 +97,11 @@ typedef struct mg_mtl_tile_queue
} mg_mtl_tile_queue; } mg_mtl_tile_queue;
typedef struct mg_mtl_screen_tile
{
vector_uint2 tileCoord;
int first;
} mg_mtl_screen_tile;
#endif //__MTL_RENDERER_H_ #endif //__MTL_RENDERER_H_

View File

@ -52,6 +52,7 @@ typedef struct mg_mtl_canvas_backend
id<MTLBuffer> tileOpBuffer; id<MTLBuffer> tileOpBuffer;
id<MTLBuffer> tileOpCountBuffer; id<MTLBuffer> tileOpCountBuffer;
id<MTLBuffer> screenTilesBuffer; id<MTLBuffer> screenTilesBuffer;
id<MTLBuffer> rasterDispatchBuffer;
int msaaCount; int msaaCount;
vec2 frameSize; vec2 frameSize;
@ -815,12 +816,24 @@ void mg_mtl_render_batch(mg_mtl_canvas_backend* backend,
//NOTE: encode GPU commands //NOTE: encode GPU commands
@autoreleasepool @autoreleasepool
{ {
//NOTE: create output texture
MTLRenderPassDescriptor* clearDescriptor = [MTLRenderPassDescriptor renderPassDescriptor];
clearDescriptor.colorAttachments[0].texture = backend->outTexture;
clearDescriptor.colorAttachments[0].loadAction = MTLLoadActionClear;
clearDescriptor.colorAttachments[0].clearColor = MTLClearColorMake(0, 0, 0, 0);
clearDescriptor.colorAttachments[0].storeAction = MTLStoreActionStore;
id<MTLRenderCommandEncoder> clearEncoder = [surface->commandBuffer renderCommandEncoderWithDescriptor:clearDescriptor];
clearEncoder.label = @"clear out texture pass";
[clearEncoder endEncoding];
//NOTE: clear counters //NOTE: clear counters
id<MTLBlitCommandEncoder> blitEncoder = [surface->commandBuffer blitCommandEncoder]; id<MTLBlitCommandEncoder> blitEncoder = [surface->commandBuffer blitCommandEncoder];
blitEncoder.label = @"clear counters"; blitEncoder.label = @"clear counters";
[blitEncoder fillBuffer: backend->segmentCountBuffer range: NSMakeRange(0, sizeof(int)) value: 0]; [blitEncoder fillBuffer: backend->segmentCountBuffer range: NSMakeRange(0, sizeof(int)) value: 0];
[blitEncoder fillBuffer: backend->tileQueueCountBuffer range: NSMakeRange(0, sizeof(int)) value: 0]; [blitEncoder fillBuffer: backend->tileQueueCountBuffer range: NSMakeRange(0, sizeof(int)) value: 0];
[blitEncoder fillBuffer: backend->tileOpCountBuffer range: NSMakeRange(0, sizeof(int)) value: 0]; [blitEncoder fillBuffer: backend->tileOpCountBuffer range: NSMakeRange(0, sizeof(int)) value: 0];
[blitEncoder fillBuffer: backend->rasterDispatchBuffer range: NSMakeRange(0, sizeof(MTLDispatchThreadgroupsIndirectArguments)) value: 0];
[blitEncoder endEncoding]; [blitEncoder endEncoding];
//NOTE: path setup pass //NOTE: path setup pass
@ -893,11 +906,12 @@ void mg_mtl_render_batch(mg_mtl_canvas_backend* backend,
[mergeEncoder setBuffer:backend->tileQueueBuffer offset:0 atIndex:3]; [mergeEncoder setBuffer:backend->tileQueueBuffer offset:0 atIndex:3];
[mergeEncoder setBuffer:backend->tileOpBuffer offset:0 atIndex:4]; [mergeEncoder setBuffer:backend->tileOpBuffer offset:0 atIndex:4];
[mergeEncoder setBuffer:backend->tileOpCountBuffer offset:0 atIndex:5]; [mergeEncoder setBuffer:backend->tileOpCountBuffer offset:0 atIndex:5];
[mergeEncoder setBuffer:backend->screenTilesBuffer offset:0 atIndex:6]; [mergeEncoder setBuffer:backend->rasterDispatchBuffer offset:0 atIndex:6];
[mergeEncoder setBytes:&tileSize length:sizeof(int) atIndex:7]; [mergeEncoder setBuffer:backend->screenTilesBuffer offset:0 atIndex:7];
[mergeEncoder setBytes:&scale length:sizeof(float) atIndex:8]; [mergeEncoder setBytes:&tileSize length:sizeof(int) atIndex:8];
[mergeEncoder setBuffer:backend->logBuffer[backend->bufferIndex] offset:0 atIndex:9]; [mergeEncoder setBytes:&scale length:sizeof(float) atIndex:9];
[mergeEncoder setBuffer:backend->logOffsetBuffer[backend->bufferIndex] offset:0 atIndex:10]; [mergeEncoder setBuffer:backend->logBuffer[backend->bufferIndex] offset:0 atIndex:10];
[mergeEncoder setBuffer:backend->logOffsetBuffer[backend->bufferIndex] offset:0 atIndex:11];
MTLSize mergeGridSize = MTLSizeMake(nTilesX, nTilesY, 1); MTLSize mergeGridSize = MTLSizeMake(nTilesX, nTilesY, 1);
MTLSize mergeGroupSize = MTLSizeMake(16, 16, 1); MTLSize mergeGroupSize = MTLSizeMake(16, 16, 1);
@ -933,7 +947,11 @@ void mg_mtl_render_batch(mg_mtl_canvas_backend* backend,
MTLSize rasterGridSize = MTLSizeMake(viewportSize.x, viewportSize.y, 1); MTLSize rasterGridSize = MTLSizeMake(viewportSize.x, viewportSize.y, 1);
MTLSize rasterGroupSize = MTLSizeMake(16, 16, 1); MTLSize rasterGroupSize = MTLSizeMake(16, 16, 1);
[rasterEncoder dispatchThreads: rasterGridSize threadsPerThreadgroup: rasterGroupSize]; // [rasterEncoder dispatchThreads: rasterGridSize threadsPerThreadgroup: rasterGroupSize];
[rasterEncoder dispatchThreadgroupsWithIndirectBuffer: backend->rasterDispatchBuffer
indirectBufferOffset: 0
threadsPerThreadgroup: rasterGroupSize];
[rasterEncoder endEncoding]; [rasterEncoder endEncoding];
@ -970,7 +988,7 @@ void mg_mtl_canvas_resize(mg_mtl_canvas_backend* backend, vec2 size)
int nTilesX = (int)(size.x + tileSize - 1)/tileSize; int nTilesX = (int)(size.x + tileSize - 1)/tileSize;
int nTilesY = (int)(size.y + tileSize - 1)/tileSize; int nTilesY = (int)(size.y + tileSize - 1)/tileSize;
MTLResourceOptions bufferOptions = MTLResourceStorageModePrivate; MTLResourceOptions bufferOptions = MTLResourceStorageModePrivate;
backend->screenTilesBuffer = [backend->surface->device newBufferWithLength: nTilesX*nTilesY*sizeof(int) backend->screenTilesBuffer = [backend->surface->device newBufferWithLength: nTilesX*nTilesY*sizeof(mg_mtl_screen_tile)
options: bufferOptions]; options: bufferOptions];
if(backend->outTexture) if(backend->outTexture)
@ -1435,10 +1453,13 @@ mg_canvas_backend* mtl_canvas_backend_create(mg_mtl_surface* surface)
backend->tileOpCountBuffer = [surface->device newBufferWithLength: sizeof(int) backend->tileOpCountBuffer = [surface->device newBufferWithLength: sizeof(int)
options: bufferOptions]; options: bufferOptions];
backend->rasterDispatchBuffer = [surface->device newBufferWithLength: sizeof(MTLDispatchThreadgroupsIndirectArguments)
options: bufferOptions];
int tileSize = MG_MTL_TILE_SIZE; int tileSize = MG_MTL_TILE_SIZE;
int nTilesX = (int)(frame.w * scale + tileSize - 1)/tileSize; int nTilesX = (int)(frame.w * scale + tileSize - 1)/tileSize;
int nTilesY = (int)(frame.h * scale + tileSize - 1)/tileSize; int nTilesY = (int)(frame.h * scale + tileSize - 1)/tileSize;
backend->screenTilesBuffer = [surface->device newBufferWithLength: nTilesX*nTilesY*sizeof(int) backend->screenTilesBuffer = [surface->device newBufferWithLength: nTilesX*nTilesY*sizeof(mg_mtl_screen_tile)
options: bufferOptions]; options: bufferOptions];
bufferOptions = MTLResourceStorageModeShared; bufferOptions = MTLResourceStorageModeShared;

View File

@ -1325,24 +1325,27 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]],
const device mg_mtl_tile_queue* tileQueueBuffer [[buffer(3)]], const device mg_mtl_tile_queue* tileQueueBuffer [[buffer(3)]],
device mg_mtl_tile_op* tileOpBuffer [[buffer(4)]], device mg_mtl_tile_op* tileOpBuffer [[buffer(4)]],
device atomic_int* tileOpCount [[buffer(5)]], device atomic_int* tileOpCount [[buffer(5)]],
device int* screenTilesBuffer [[buffer(6)]], device MTLDispatchThreadgroupsIndirectArguments* dispatchBuffer [[buffer(6)]],
constant int* tileSize [[buffer(7)]], device mg_mtl_screen_tile* screenTilesBuffer [[buffer(7)]],
constant float* scale [[buffer(8)]], constant int* tileSize [[buffer(8)]],
device char* logBuffer [[buffer(9)]], constant float* scale [[buffer(9)]],
device atomic_int* logOffsetBuffer [[buffer(10)]], device char* logBuffer [[buffer(10)]],
device atomic_int* logOffsetBuffer [[buffer(11)]],
uint2 threadCoord [[thread_position_in_grid]], uint2 threadCoord [[thread_position_in_grid]],
uint2 gridSize [[threads_per_grid]]) uint2 gridSize [[threads_per_grid]])
{ {
int2 tileCoord = int2(threadCoord); int2 tileCoord = int2(threadCoord);
int tileIndex = tileCoord.y * gridSize.x + tileCoord.x; int tileIndex = -1;
device int* nextLink = &screenTilesBuffer[tileIndex]; device int* nextLink = 0;
*nextLink = -1;
/* /*
mtl_log_context log = {.buffer = logBuffer, mtl_log_context log = {.buffer = logBuffer,
.offset = logOffsetBuffer, .offset = logOffsetBuffer,
.enabled = true}; .enabled = true};
*/ */
dispatchBuffer[0].threadgroupsPerGrid[1] = 1;
dispatchBuffer[0].threadgroupsPerGrid[2] = 1;
for(int pathIndex = 0; pathIndex < pathCount[0]; pathIndex++) for(int pathIndex = 0; pathIndex < pathCount[0]; pathIndex++)
{ {
const device mg_mtl_path_queue* pathQueue = &pathQueueBuffer[pathIndex]; const device mg_mtl_path_queue* pathQueue = &pathQueueBuffer[pathIndex];
@ -1350,14 +1353,22 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]],
const device mg_mtl_path* path = &pathBuffer[pathIndex]; const device mg_mtl_path* path = &pathBuffer[pathIndex];
float xMax = min(path->box.z, path->clip.z); float xMax = min(path->box.z, path->clip.z);
int tileMax = xMax * scale[0] / tileSize[0]; int tileMaxX = xMax * scale[0] / tileSize[0];
int pathTileMax = tileMax - pathQueue->area.x; int pathTileMaxX = tileMaxX - pathQueue->area.x;
if( pathTileCoord.x >= 0 if( pathTileCoord.x >= 0
&& pathTileCoord.x <= pathTileMax && pathTileCoord.x <= pathTileMaxX
&& pathTileCoord.y >= 0 && pathTileCoord.y >= 0
&& pathTileCoord.y < pathQueue->area.w) && pathTileCoord.y < pathQueue->area.w)
{ {
if(tileIndex < 0)
{
tileIndex = atomic_fetch_add_explicit((device atomic_uint*)&dispatchBuffer[0].threadgroupsPerGrid[0], 1, memory_order_relaxed);
screenTilesBuffer[tileIndex].tileCoord = uint2(tileCoord);
nextLink = &screenTilesBuffer[tileIndex].first;
*nextLink = -1;
}
int pathTileIndex = pathTileCoord.y * pathQueue->area.z + pathTileCoord.x; int pathTileIndex = pathTileCoord.y * pathQueue->area.z + pathTileCoord.x;
const device mg_mtl_tile_queue* tileQueue = &tileQueueBuffer[pathQueue->tileQueues + pathTileIndex]; const device mg_mtl_tile_queue* tileQueue = &tileQueueBuffer[pathQueue->tileQueues + pathTileIndex];
@ -1399,7 +1410,7 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]],
if(pathBuffer[pathIndex].color.a == 1) if(pathBuffer[pathIndex].color.a == 1)
{ {
screenTilesBuffer[tileIndex] = pathOpIndex; screenTilesBuffer[tileIndex].first = pathOpIndex;
} }
} }
nextLink = &pathOp->next; nextLink = &pathOp->next;
@ -1441,7 +1452,7 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]],
} }
} }
kernel void mtl_raster(const device int* screenTilesBuffer [[buffer(0)]], kernel void mtl_raster(const device mg_mtl_screen_tile* screenTilesBuffer [[buffer(0)]],
const device mg_mtl_tile_op* tileOpBuffer [[buffer(1)]], const device mg_mtl_tile_op* tileOpBuffer [[buffer(1)]],
const device mg_mtl_path* pathBuffer [[buffer(2)]], const device mg_mtl_path* pathBuffer [[buffer(2)]],
const device mg_mtl_segment* segmentBuffer [[buffer(3)]], const device mg_mtl_segment* segmentBuffer [[buffer(3)]],
@ -1453,18 +1464,19 @@ kernel void mtl_raster(const device int* screenTilesBuffer [[buffer(0)]],
constant int* useTexture [[buffer(9)]], constant int* useTexture [[buffer(9)]],
texture2d<float, access::write> outTexture [[texture(0)]], texture2d<float, access::write> outTexture [[texture(0)]],
texture2d<float> srcTexture [[texture(1)]], texture2d<float> srcTexture [[texture(1)]],
uint2 threadCoord [[thread_position_in_grid]], uint2 threadGroupCoord [[threadgroup_position_in_grid]],
uint2 gridSize [[threads_per_grid]]) uint2 localCoord [[thread_position_in_threadgroup]])
{ {
/* /*
mtl_log_context log = {.buffer = logBuffer, mtl_log_context log = {.buffer = logBuffer,
.offset = logOffsetBuffer, .offset = logOffsetBuffer,
.enabled = true}; .enabled = true};
*/ */
uint2 pixelCoord = threadCoord; int tileIndex = int(threadGroupCoord.x);
int2 tileCoord = int2(pixelCoord) / tileSize[0]; uint2 tileCoord = screenTilesBuffer[tileIndex].tileCoord;
int nTilesX = (int(gridSize.x) + tileSize[0] - 1)/tileSize[0]; uint2 pixelCoord = tileCoord*tileSize[0] + localCoord.xy;
int tileIndex = tileCoord.y * nTilesX + tileCoord.x;
int opIndex = screenTilesBuffer[tileIndex].first;
const int MG_MTL_MAX_SAMPLE_COUNT = 8; const int MG_MTL_MAX_SAMPLE_COUNT = 8;
float2 sampleCoords[MG_MTL_MAX_SAMPLE_COUNT]; float2 sampleCoords[MG_MTL_MAX_SAMPLE_COUNT];
@ -1500,7 +1512,6 @@ kernel void mtl_raster(const device int* screenTilesBuffer [[buffer(0)]],
float4 color = {0}; float4 color = {0};
int winding[MG_MTL_MAX_SAMPLE_COUNT] = {0}; int winding[MG_MTL_MAX_SAMPLE_COUNT] = {0};
int opIndex = screenTilesBuffer[tileIndex];
while(opIndex != -1) while(opIndex != -1)
{ {
@ -1600,7 +1611,6 @@ kernel void mtl_raster(const device int* screenTilesBuffer [[buffer(0)]],
} }
opIndex = op->next; opIndex = op->next;
} }
outTexture.write(color, pixelCoord); outTexture.write(color, pixelCoord);
} }