[osx, canvas] Only dispatch raster shader for tiles that are overlapped by at least one path.
This commit is contained in:
parent
bfc7530bcf
commit
9e8a0f5f69
|
@ -97,4 +97,11 @@ typedef struct mg_mtl_tile_queue
|
|||
|
||||
} mg_mtl_tile_queue;
|
||||
|
||||
typedef struct mg_mtl_screen_tile
|
||||
{
|
||||
vector_uint2 tileCoord;
|
||||
int first;
|
||||
|
||||
} mg_mtl_screen_tile;
|
||||
|
||||
#endif //__MTL_RENDERER_H_
|
||||
|
|
|
@ -52,6 +52,7 @@ typedef struct mg_mtl_canvas_backend
|
|||
id<MTLBuffer> tileOpBuffer;
|
||||
id<MTLBuffer> tileOpCountBuffer;
|
||||
id<MTLBuffer> screenTilesBuffer;
|
||||
id<MTLBuffer> rasterDispatchBuffer;
|
||||
|
||||
int msaaCount;
|
||||
vec2 frameSize;
|
||||
|
@ -815,12 +816,24 @@ void mg_mtl_render_batch(mg_mtl_canvas_backend* backend,
|
|||
//NOTE: encode GPU commands
|
||||
@autoreleasepool
|
||||
{
|
||||
//NOTE: create output texture
|
||||
MTLRenderPassDescriptor* clearDescriptor = [MTLRenderPassDescriptor renderPassDescriptor];
|
||||
clearDescriptor.colorAttachments[0].texture = backend->outTexture;
|
||||
clearDescriptor.colorAttachments[0].loadAction = MTLLoadActionClear;
|
||||
clearDescriptor.colorAttachments[0].clearColor = MTLClearColorMake(0, 0, 0, 0);
|
||||
clearDescriptor.colorAttachments[0].storeAction = MTLStoreActionStore;
|
||||
|
||||
id<MTLRenderCommandEncoder> clearEncoder = [surface->commandBuffer renderCommandEncoderWithDescriptor:clearDescriptor];
|
||||
clearEncoder.label = @"clear out texture pass";
|
||||
[clearEncoder endEncoding];
|
||||
|
||||
//NOTE: clear counters
|
||||
id<MTLBlitCommandEncoder> blitEncoder = [surface->commandBuffer blitCommandEncoder];
|
||||
blitEncoder.label = @"clear counters";
|
||||
[blitEncoder fillBuffer: backend->segmentCountBuffer range: NSMakeRange(0, sizeof(int)) value: 0];
|
||||
[blitEncoder fillBuffer: backend->tileQueueCountBuffer range: NSMakeRange(0, sizeof(int)) value: 0];
|
||||
[blitEncoder fillBuffer: backend->tileOpCountBuffer range: NSMakeRange(0, sizeof(int)) value: 0];
|
||||
[blitEncoder fillBuffer: backend->rasterDispatchBuffer range: NSMakeRange(0, sizeof(MTLDispatchThreadgroupsIndirectArguments)) value: 0];
|
||||
[blitEncoder endEncoding];
|
||||
|
||||
//NOTE: path setup pass
|
||||
|
@ -893,11 +906,12 @@ void mg_mtl_render_batch(mg_mtl_canvas_backend* backend,
|
|||
[mergeEncoder setBuffer:backend->tileQueueBuffer offset:0 atIndex:3];
|
||||
[mergeEncoder setBuffer:backend->tileOpBuffer offset:0 atIndex:4];
|
||||
[mergeEncoder setBuffer:backend->tileOpCountBuffer offset:0 atIndex:5];
|
||||
[mergeEncoder setBuffer:backend->screenTilesBuffer offset:0 atIndex:6];
|
||||
[mergeEncoder setBytes:&tileSize length:sizeof(int) atIndex:7];
|
||||
[mergeEncoder setBytes:&scale length:sizeof(float) atIndex:8];
|
||||
[mergeEncoder setBuffer:backend->logBuffer[backend->bufferIndex] offset:0 atIndex:9];
|
||||
[mergeEncoder setBuffer:backend->logOffsetBuffer[backend->bufferIndex] offset:0 atIndex:10];
|
||||
[mergeEncoder setBuffer:backend->rasterDispatchBuffer offset:0 atIndex:6];
|
||||
[mergeEncoder setBuffer:backend->screenTilesBuffer offset:0 atIndex:7];
|
||||
[mergeEncoder setBytes:&tileSize length:sizeof(int) atIndex:8];
|
||||
[mergeEncoder setBytes:&scale length:sizeof(float) atIndex:9];
|
||||
[mergeEncoder setBuffer:backend->logBuffer[backend->bufferIndex] offset:0 atIndex:10];
|
||||
[mergeEncoder setBuffer:backend->logOffsetBuffer[backend->bufferIndex] offset:0 atIndex:11];
|
||||
|
||||
MTLSize mergeGridSize = MTLSizeMake(nTilesX, nTilesY, 1);
|
||||
MTLSize mergeGroupSize = MTLSizeMake(16, 16, 1);
|
||||
|
@ -933,7 +947,11 @@ void mg_mtl_render_batch(mg_mtl_canvas_backend* backend,
|
|||
|
||||
MTLSize rasterGridSize = MTLSizeMake(viewportSize.x, viewportSize.y, 1);
|
||||
MTLSize rasterGroupSize = MTLSizeMake(16, 16, 1);
|
||||
[rasterEncoder dispatchThreads: rasterGridSize threadsPerThreadgroup: rasterGroupSize];
|
||||
// [rasterEncoder dispatchThreads: rasterGridSize threadsPerThreadgroup: rasterGroupSize];
|
||||
|
||||
[rasterEncoder dispatchThreadgroupsWithIndirectBuffer: backend->rasterDispatchBuffer
|
||||
indirectBufferOffset: 0
|
||||
threadsPerThreadgroup: rasterGroupSize];
|
||||
|
||||
[rasterEncoder endEncoding];
|
||||
|
||||
|
@ -970,7 +988,7 @@ void mg_mtl_canvas_resize(mg_mtl_canvas_backend* backend, vec2 size)
|
|||
int nTilesX = (int)(size.x + tileSize - 1)/tileSize;
|
||||
int nTilesY = (int)(size.y + tileSize - 1)/tileSize;
|
||||
MTLResourceOptions bufferOptions = MTLResourceStorageModePrivate;
|
||||
backend->screenTilesBuffer = [backend->surface->device newBufferWithLength: nTilesX*nTilesY*sizeof(int)
|
||||
backend->screenTilesBuffer = [backend->surface->device newBufferWithLength: nTilesX*nTilesY*sizeof(mg_mtl_screen_tile)
|
||||
options: bufferOptions];
|
||||
|
||||
if(backend->outTexture)
|
||||
|
@ -1435,10 +1453,13 @@ mg_canvas_backend* mtl_canvas_backend_create(mg_mtl_surface* surface)
|
|||
backend->tileOpCountBuffer = [surface->device newBufferWithLength: sizeof(int)
|
||||
options: bufferOptions];
|
||||
|
||||
backend->rasterDispatchBuffer = [surface->device newBufferWithLength: sizeof(MTLDispatchThreadgroupsIndirectArguments)
|
||||
options: bufferOptions];
|
||||
|
||||
int tileSize = MG_MTL_TILE_SIZE;
|
||||
int nTilesX = (int)(frame.w * scale + tileSize - 1)/tileSize;
|
||||
int nTilesY = (int)(frame.h * scale + tileSize - 1)/tileSize;
|
||||
backend->screenTilesBuffer = [surface->device newBufferWithLength: nTilesX*nTilesY*sizeof(int)
|
||||
backend->screenTilesBuffer = [surface->device newBufferWithLength: nTilesX*nTilesY*sizeof(mg_mtl_screen_tile)
|
||||
options: bufferOptions];
|
||||
|
||||
bufferOptions = MTLResourceStorageModeShared;
|
||||
|
|
|
@ -1325,24 +1325,27 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]],
|
|||
const device mg_mtl_tile_queue* tileQueueBuffer [[buffer(3)]],
|
||||
device mg_mtl_tile_op* tileOpBuffer [[buffer(4)]],
|
||||
device atomic_int* tileOpCount [[buffer(5)]],
|
||||
device int* screenTilesBuffer [[buffer(6)]],
|
||||
constant int* tileSize [[buffer(7)]],
|
||||
constant float* scale [[buffer(8)]],
|
||||
device char* logBuffer [[buffer(9)]],
|
||||
device atomic_int* logOffsetBuffer [[buffer(10)]],
|
||||
device MTLDispatchThreadgroupsIndirectArguments* dispatchBuffer [[buffer(6)]],
|
||||
device mg_mtl_screen_tile* screenTilesBuffer [[buffer(7)]],
|
||||
constant int* tileSize [[buffer(8)]],
|
||||
constant float* scale [[buffer(9)]],
|
||||
device char* logBuffer [[buffer(10)]],
|
||||
device atomic_int* logOffsetBuffer [[buffer(11)]],
|
||||
uint2 threadCoord [[thread_position_in_grid]],
|
||||
uint2 gridSize [[threads_per_grid]])
|
||||
{
|
||||
int2 tileCoord = int2(threadCoord);
|
||||
int tileIndex = tileCoord.y * gridSize.x + tileCoord.x;
|
||||
device int* nextLink = &screenTilesBuffer[tileIndex];
|
||||
*nextLink = -1;
|
||||
int tileIndex = -1;
|
||||
device int* nextLink = 0;
|
||||
|
||||
/*
|
||||
mtl_log_context log = {.buffer = logBuffer,
|
||||
.offset = logOffsetBuffer,
|
||||
.enabled = true};
|
||||
*/
|
||||
dispatchBuffer[0].threadgroupsPerGrid[1] = 1;
|
||||
dispatchBuffer[0].threadgroupsPerGrid[2] = 1;
|
||||
|
||||
for(int pathIndex = 0; pathIndex < pathCount[0]; pathIndex++)
|
||||
{
|
||||
const device mg_mtl_path_queue* pathQueue = &pathQueueBuffer[pathIndex];
|
||||
|
@ -1350,14 +1353,22 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]],
|
|||
|
||||
const device mg_mtl_path* path = &pathBuffer[pathIndex];
|
||||
float xMax = min(path->box.z, path->clip.z);
|
||||
int tileMax = xMax * scale[0] / tileSize[0];
|
||||
int pathTileMax = tileMax - pathQueue->area.x;
|
||||
int tileMaxX = xMax * scale[0] / tileSize[0];
|
||||
int pathTileMaxX = tileMaxX - pathQueue->area.x;
|
||||
|
||||
if( pathTileCoord.x >= 0
|
||||
&& pathTileCoord.x <= pathTileMax
|
||||
&& pathTileCoord.x <= pathTileMaxX
|
||||
&& pathTileCoord.y >= 0
|
||||
&& pathTileCoord.y < pathQueue->area.w)
|
||||
{
|
||||
if(tileIndex < 0)
|
||||
{
|
||||
tileIndex = atomic_fetch_add_explicit((device atomic_uint*)&dispatchBuffer[0].threadgroupsPerGrid[0], 1, memory_order_relaxed);
|
||||
screenTilesBuffer[tileIndex].tileCoord = uint2(tileCoord);
|
||||
nextLink = &screenTilesBuffer[tileIndex].first;
|
||||
*nextLink = -1;
|
||||
}
|
||||
|
||||
int pathTileIndex = pathTileCoord.y * pathQueue->area.z + pathTileCoord.x;
|
||||
const device mg_mtl_tile_queue* tileQueue = &tileQueueBuffer[pathQueue->tileQueues + pathTileIndex];
|
||||
|
||||
|
@ -1399,7 +1410,7 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]],
|
|||
|
||||
if(pathBuffer[pathIndex].color.a == 1)
|
||||
{
|
||||
screenTilesBuffer[tileIndex] = pathOpIndex;
|
||||
screenTilesBuffer[tileIndex].first = pathOpIndex;
|
||||
}
|
||||
}
|
||||
nextLink = &pathOp->next;
|
||||
|
@ -1441,7 +1452,7 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]],
|
|||
}
|
||||
}
|
||||
|
||||
kernel void mtl_raster(const device int* screenTilesBuffer [[buffer(0)]],
|
||||
kernel void mtl_raster(const device mg_mtl_screen_tile* screenTilesBuffer [[buffer(0)]],
|
||||
const device mg_mtl_tile_op* tileOpBuffer [[buffer(1)]],
|
||||
const device mg_mtl_path* pathBuffer [[buffer(2)]],
|
||||
const device mg_mtl_segment* segmentBuffer [[buffer(3)]],
|
||||
|
@ -1453,18 +1464,19 @@ kernel void mtl_raster(const device int* screenTilesBuffer [[buffer(0)]],
|
|||
constant int* useTexture [[buffer(9)]],
|
||||
texture2d<float, access::write> outTexture [[texture(0)]],
|
||||
texture2d<float> srcTexture [[texture(1)]],
|
||||
uint2 threadCoord [[thread_position_in_grid]],
|
||||
uint2 gridSize [[threads_per_grid]])
|
||||
uint2 threadGroupCoord [[threadgroup_position_in_grid]],
|
||||
uint2 localCoord [[thread_position_in_threadgroup]])
|
||||
{
|
||||
/*
|
||||
mtl_log_context log = {.buffer = logBuffer,
|
||||
.offset = logOffsetBuffer,
|
||||
.enabled = true};
|
||||
*/
|
||||
uint2 pixelCoord = threadCoord;
|
||||
int2 tileCoord = int2(pixelCoord) / tileSize[0];
|
||||
int nTilesX = (int(gridSize.x) + tileSize[0] - 1)/tileSize[0];
|
||||
int tileIndex = tileCoord.y * nTilesX + tileCoord.x;
|
||||
int tileIndex = int(threadGroupCoord.x);
|
||||
uint2 tileCoord = screenTilesBuffer[tileIndex].tileCoord;
|
||||
uint2 pixelCoord = tileCoord*tileSize[0] + localCoord.xy;
|
||||
|
||||
int opIndex = screenTilesBuffer[tileIndex].first;
|
||||
|
||||
const int MG_MTL_MAX_SAMPLE_COUNT = 8;
|
||||
float2 sampleCoords[MG_MTL_MAX_SAMPLE_COUNT];
|
||||
|
@ -1500,7 +1512,6 @@ kernel void mtl_raster(const device int* screenTilesBuffer [[buffer(0)]],
|
|||
|
||||
float4 color = {0};
|
||||
int winding[MG_MTL_MAX_SAMPLE_COUNT] = {0};
|
||||
int opIndex = screenTilesBuffer[tileIndex];
|
||||
|
||||
while(opIndex != -1)
|
||||
{
|
||||
|
@ -1600,7 +1611,6 @@ kernel void mtl_raster(const device int* screenTilesBuffer [[buffer(0)]],
|
|||
}
|
||||
opIndex = op->next;
|
||||
}
|
||||
|
||||
outTexture.write(color, pixelCoord);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue