[osx, canvas] Only dispatch raster shader for tiles that are overlapped by at least one path.
This commit is contained in:
parent
bfc7530bcf
commit
9e8a0f5f69
|
@ -97,4 +97,11 @@ typedef struct mg_mtl_tile_queue
|
||||||
|
|
||||||
} mg_mtl_tile_queue;
|
} mg_mtl_tile_queue;
|
||||||
|
|
||||||
|
typedef struct mg_mtl_screen_tile
|
||||||
|
{
|
||||||
|
vector_uint2 tileCoord;
|
||||||
|
int first;
|
||||||
|
|
||||||
|
} mg_mtl_screen_tile;
|
||||||
|
|
||||||
#endif //__MTL_RENDERER_H_
|
#endif //__MTL_RENDERER_H_
|
||||||
|
|
|
@ -52,6 +52,7 @@ typedef struct mg_mtl_canvas_backend
|
||||||
id<MTLBuffer> tileOpBuffer;
|
id<MTLBuffer> tileOpBuffer;
|
||||||
id<MTLBuffer> tileOpCountBuffer;
|
id<MTLBuffer> tileOpCountBuffer;
|
||||||
id<MTLBuffer> screenTilesBuffer;
|
id<MTLBuffer> screenTilesBuffer;
|
||||||
|
id<MTLBuffer> rasterDispatchBuffer;
|
||||||
|
|
||||||
int msaaCount;
|
int msaaCount;
|
||||||
vec2 frameSize;
|
vec2 frameSize;
|
||||||
|
@ -815,12 +816,24 @@ void mg_mtl_render_batch(mg_mtl_canvas_backend* backend,
|
||||||
//NOTE: encode GPU commands
|
//NOTE: encode GPU commands
|
||||||
@autoreleasepool
|
@autoreleasepool
|
||||||
{
|
{
|
||||||
|
//NOTE: create output texture
|
||||||
|
MTLRenderPassDescriptor* clearDescriptor = [MTLRenderPassDescriptor renderPassDescriptor];
|
||||||
|
clearDescriptor.colorAttachments[0].texture = backend->outTexture;
|
||||||
|
clearDescriptor.colorAttachments[0].loadAction = MTLLoadActionClear;
|
||||||
|
clearDescriptor.colorAttachments[0].clearColor = MTLClearColorMake(0, 0, 0, 0);
|
||||||
|
clearDescriptor.colorAttachments[0].storeAction = MTLStoreActionStore;
|
||||||
|
|
||||||
|
id<MTLRenderCommandEncoder> clearEncoder = [surface->commandBuffer renderCommandEncoderWithDescriptor:clearDescriptor];
|
||||||
|
clearEncoder.label = @"clear out texture pass";
|
||||||
|
[clearEncoder endEncoding];
|
||||||
|
|
||||||
//NOTE: clear counters
|
//NOTE: clear counters
|
||||||
id<MTLBlitCommandEncoder> blitEncoder = [surface->commandBuffer blitCommandEncoder];
|
id<MTLBlitCommandEncoder> blitEncoder = [surface->commandBuffer blitCommandEncoder];
|
||||||
blitEncoder.label = @"clear counters";
|
blitEncoder.label = @"clear counters";
|
||||||
[blitEncoder fillBuffer: backend->segmentCountBuffer range: NSMakeRange(0, sizeof(int)) value: 0];
|
[blitEncoder fillBuffer: backend->segmentCountBuffer range: NSMakeRange(0, sizeof(int)) value: 0];
|
||||||
[blitEncoder fillBuffer: backend->tileQueueCountBuffer range: NSMakeRange(0, sizeof(int)) value: 0];
|
[blitEncoder fillBuffer: backend->tileQueueCountBuffer range: NSMakeRange(0, sizeof(int)) value: 0];
|
||||||
[blitEncoder fillBuffer: backend->tileOpCountBuffer range: NSMakeRange(0, sizeof(int)) value: 0];
|
[blitEncoder fillBuffer: backend->tileOpCountBuffer range: NSMakeRange(0, sizeof(int)) value: 0];
|
||||||
|
[blitEncoder fillBuffer: backend->rasterDispatchBuffer range: NSMakeRange(0, sizeof(MTLDispatchThreadgroupsIndirectArguments)) value: 0];
|
||||||
[blitEncoder endEncoding];
|
[blitEncoder endEncoding];
|
||||||
|
|
||||||
//NOTE: path setup pass
|
//NOTE: path setup pass
|
||||||
|
@ -893,11 +906,12 @@ void mg_mtl_render_batch(mg_mtl_canvas_backend* backend,
|
||||||
[mergeEncoder setBuffer:backend->tileQueueBuffer offset:0 atIndex:3];
|
[mergeEncoder setBuffer:backend->tileQueueBuffer offset:0 atIndex:3];
|
||||||
[mergeEncoder setBuffer:backend->tileOpBuffer offset:0 atIndex:4];
|
[mergeEncoder setBuffer:backend->tileOpBuffer offset:0 atIndex:4];
|
||||||
[mergeEncoder setBuffer:backend->tileOpCountBuffer offset:0 atIndex:5];
|
[mergeEncoder setBuffer:backend->tileOpCountBuffer offset:0 atIndex:5];
|
||||||
[mergeEncoder setBuffer:backend->screenTilesBuffer offset:0 atIndex:6];
|
[mergeEncoder setBuffer:backend->rasterDispatchBuffer offset:0 atIndex:6];
|
||||||
[mergeEncoder setBytes:&tileSize length:sizeof(int) atIndex:7];
|
[mergeEncoder setBuffer:backend->screenTilesBuffer offset:0 atIndex:7];
|
||||||
[mergeEncoder setBytes:&scale length:sizeof(float) atIndex:8];
|
[mergeEncoder setBytes:&tileSize length:sizeof(int) atIndex:8];
|
||||||
[mergeEncoder setBuffer:backend->logBuffer[backend->bufferIndex] offset:0 atIndex:9];
|
[mergeEncoder setBytes:&scale length:sizeof(float) atIndex:9];
|
||||||
[mergeEncoder setBuffer:backend->logOffsetBuffer[backend->bufferIndex] offset:0 atIndex:10];
|
[mergeEncoder setBuffer:backend->logBuffer[backend->bufferIndex] offset:0 atIndex:10];
|
||||||
|
[mergeEncoder setBuffer:backend->logOffsetBuffer[backend->bufferIndex] offset:0 atIndex:11];
|
||||||
|
|
||||||
MTLSize mergeGridSize = MTLSizeMake(nTilesX, nTilesY, 1);
|
MTLSize mergeGridSize = MTLSizeMake(nTilesX, nTilesY, 1);
|
||||||
MTLSize mergeGroupSize = MTLSizeMake(16, 16, 1);
|
MTLSize mergeGroupSize = MTLSizeMake(16, 16, 1);
|
||||||
|
@ -933,7 +947,11 @@ void mg_mtl_render_batch(mg_mtl_canvas_backend* backend,
|
||||||
|
|
||||||
MTLSize rasterGridSize = MTLSizeMake(viewportSize.x, viewportSize.y, 1);
|
MTLSize rasterGridSize = MTLSizeMake(viewportSize.x, viewportSize.y, 1);
|
||||||
MTLSize rasterGroupSize = MTLSizeMake(16, 16, 1);
|
MTLSize rasterGroupSize = MTLSizeMake(16, 16, 1);
|
||||||
[rasterEncoder dispatchThreads: rasterGridSize threadsPerThreadgroup: rasterGroupSize];
|
// [rasterEncoder dispatchThreads: rasterGridSize threadsPerThreadgroup: rasterGroupSize];
|
||||||
|
|
||||||
|
[rasterEncoder dispatchThreadgroupsWithIndirectBuffer: backend->rasterDispatchBuffer
|
||||||
|
indirectBufferOffset: 0
|
||||||
|
threadsPerThreadgroup: rasterGroupSize];
|
||||||
|
|
||||||
[rasterEncoder endEncoding];
|
[rasterEncoder endEncoding];
|
||||||
|
|
||||||
|
@ -970,7 +988,7 @@ void mg_mtl_canvas_resize(mg_mtl_canvas_backend* backend, vec2 size)
|
||||||
int nTilesX = (int)(size.x + tileSize - 1)/tileSize;
|
int nTilesX = (int)(size.x + tileSize - 1)/tileSize;
|
||||||
int nTilesY = (int)(size.y + tileSize - 1)/tileSize;
|
int nTilesY = (int)(size.y + tileSize - 1)/tileSize;
|
||||||
MTLResourceOptions bufferOptions = MTLResourceStorageModePrivate;
|
MTLResourceOptions bufferOptions = MTLResourceStorageModePrivate;
|
||||||
backend->screenTilesBuffer = [backend->surface->device newBufferWithLength: nTilesX*nTilesY*sizeof(int)
|
backend->screenTilesBuffer = [backend->surface->device newBufferWithLength: nTilesX*nTilesY*sizeof(mg_mtl_screen_tile)
|
||||||
options: bufferOptions];
|
options: bufferOptions];
|
||||||
|
|
||||||
if(backend->outTexture)
|
if(backend->outTexture)
|
||||||
|
@ -1435,10 +1453,13 @@ mg_canvas_backend* mtl_canvas_backend_create(mg_mtl_surface* surface)
|
||||||
backend->tileOpCountBuffer = [surface->device newBufferWithLength: sizeof(int)
|
backend->tileOpCountBuffer = [surface->device newBufferWithLength: sizeof(int)
|
||||||
options: bufferOptions];
|
options: bufferOptions];
|
||||||
|
|
||||||
|
backend->rasterDispatchBuffer = [surface->device newBufferWithLength: sizeof(MTLDispatchThreadgroupsIndirectArguments)
|
||||||
|
options: bufferOptions];
|
||||||
|
|
||||||
int tileSize = MG_MTL_TILE_SIZE;
|
int tileSize = MG_MTL_TILE_SIZE;
|
||||||
int nTilesX = (int)(frame.w * scale + tileSize - 1)/tileSize;
|
int nTilesX = (int)(frame.w * scale + tileSize - 1)/tileSize;
|
||||||
int nTilesY = (int)(frame.h * scale + tileSize - 1)/tileSize;
|
int nTilesY = (int)(frame.h * scale + tileSize - 1)/tileSize;
|
||||||
backend->screenTilesBuffer = [surface->device newBufferWithLength: nTilesX*nTilesY*sizeof(int)
|
backend->screenTilesBuffer = [surface->device newBufferWithLength: nTilesX*nTilesY*sizeof(mg_mtl_screen_tile)
|
||||||
options: bufferOptions];
|
options: bufferOptions];
|
||||||
|
|
||||||
bufferOptions = MTLResourceStorageModeShared;
|
bufferOptions = MTLResourceStorageModeShared;
|
||||||
|
|
|
@ -1325,24 +1325,27 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]],
|
||||||
const device mg_mtl_tile_queue* tileQueueBuffer [[buffer(3)]],
|
const device mg_mtl_tile_queue* tileQueueBuffer [[buffer(3)]],
|
||||||
device mg_mtl_tile_op* tileOpBuffer [[buffer(4)]],
|
device mg_mtl_tile_op* tileOpBuffer [[buffer(4)]],
|
||||||
device atomic_int* tileOpCount [[buffer(5)]],
|
device atomic_int* tileOpCount [[buffer(5)]],
|
||||||
device int* screenTilesBuffer [[buffer(6)]],
|
device MTLDispatchThreadgroupsIndirectArguments* dispatchBuffer [[buffer(6)]],
|
||||||
constant int* tileSize [[buffer(7)]],
|
device mg_mtl_screen_tile* screenTilesBuffer [[buffer(7)]],
|
||||||
constant float* scale [[buffer(8)]],
|
constant int* tileSize [[buffer(8)]],
|
||||||
device char* logBuffer [[buffer(9)]],
|
constant float* scale [[buffer(9)]],
|
||||||
device atomic_int* logOffsetBuffer [[buffer(10)]],
|
device char* logBuffer [[buffer(10)]],
|
||||||
|
device atomic_int* logOffsetBuffer [[buffer(11)]],
|
||||||
uint2 threadCoord [[thread_position_in_grid]],
|
uint2 threadCoord [[thread_position_in_grid]],
|
||||||
uint2 gridSize [[threads_per_grid]])
|
uint2 gridSize [[threads_per_grid]])
|
||||||
{
|
{
|
||||||
int2 tileCoord = int2(threadCoord);
|
int2 tileCoord = int2(threadCoord);
|
||||||
int tileIndex = tileCoord.y * gridSize.x + tileCoord.x;
|
int tileIndex = -1;
|
||||||
device int* nextLink = &screenTilesBuffer[tileIndex];
|
device int* nextLink = 0;
|
||||||
*nextLink = -1;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
mtl_log_context log = {.buffer = logBuffer,
|
mtl_log_context log = {.buffer = logBuffer,
|
||||||
.offset = logOffsetBuffer,
|
.offset = logOffsetBuffer,
|
||||||
.enabled = true};
|
.enabled = true};
|
||||||
*/
|
*/
|
||||||
|
dispatchBuffer[0].threadgroupsPerGrid[1] = 1;
|
||||||
|
dispatchBuffer[0].threadgroupsPerGrid[2] = 1;
|
||||||
|
|
||||||
for(int pathIndex = 0; pathIndex < pathCount[0]; pathIndex++)
|
for(int pathIndex = 0; pathIndex < pathCount[0]; pathIndex++)
|
||||||
{
|
{
|
||||||
const device mg_mtl_path_queue* pathQueue = &pathQueueBuffer[pathIndex];
|
const device mg_mtl_path_queue* pathQueue = &pathQueueBuffer[pathIndex];
|
||||||
|
@ -1350,14 +1353,22 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]],
|
||||||
|
|
||||||
const device mg_mtl_path* path = &pathBuffer[pathIndex];
|
const device mg_mtl_path* path = &pathBuffer[pathIndex];
|
||||||
float xMax = min(path->box.z, path->clip.z);
|
float xMax = min(path->box.z, path->clip.z);
|
||||||
int tileMax = xMax * scale[0] / tileSize[0];
|
int tileMaxX = xMax * scale[0] / tileSize[0];
|
||||||
int pathTileMax = tileMax - pathQueue->area.x;
|
int pathTileMaxX = tileMaxX - pathQueue->area.x;
|
||||||
|
|
||||||
if( pathTileCoord.x >= 0
|
if( pathTileCoord.x >= 0
|
||||||
&& pathTileCoord.x <= pathTileMax
|
&& pathTileCoord.x <= pathTileMaxX
|
||||||
&& pathTileCoord.y >= 0
|
&& pathTileCoord.y >= 0
|
||||||
&& pathTileCoord.y < pathQueue->area.w)
|
&& pathTileCoord.y < pathQueue->area.w)
|
||||||
{
|
{
|
||||||
|
if(tileIndex < 0)
|
||||||
|
{
|
||||||
|
tileIndex = atomic_fetch_add_explicit((device atomic_uint*)&dispatchBuffer[0].threadgroupsPerGrid[0], 1, memory_order_relaxed);
|
||||||
|
screenTilesBuffer[tileIndex].tileCoord = uint2(tileCoord);
|
||||||
|
nextLink = &screenTilesBuffer[tileIndex].first;
|
||||||
|
*nextLink = -1;
|
||||||
|
}
|
||||||
|
|
||||||
int pathTileIndex = pathTileCoord.y * pathQueue->area.z + pathTileCoord.x;
|
int pathTileIndex = pathTileCoord.y * pathQueue->area.z + pathTileCoord.x;
|
||||||
const device mg_mtl_tile_queue* tileQueue = &tileQueueBuffer[pathQueue->tileQueues + pathTileIndex];
|
const device mg_mtl_tile_queue* tileQueue = &tileQueueBuffer[pathQueue->tileQueues + pathTileIndex];
|
||||||
|
|
||||||
|
@ -1399,7 +1410,7 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]],
|
||||||
|
|
||||||
if(pathBuffer[pathIndex].color.a == 1)
|
if(pathBuffer[pathIndex].color.a == 1)
|
||||||
{
|
{
|
||||||
screenTilesBuffer[tileIndex] = pathOpIndex;
|
screenTilesBuffer[tileIndex].first = pathOpIndex;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
nextLink = &pathOp->next;
|
nextLink = &pathOp->next;
|
||||||
|
@ -1441,7 +1452,7 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
kernel void mtl_raster(const device int* screenTilesBuffer [[buffer(0)]],
|
kernel void mtl_raster(const device mg_mtl_screen_tile* screenTilesBuffer [[buffer(0)]],
|
||||||
const device mg_mtl_tile_op* tileOpBuffer [[buffer(1)]],
|
const device mg_mtl_tile_op* tileOpBuffer [[buffer(1)]],
|
||||||
const device mg_mtl_path* pathBuffer [[buffer(2)]],
|
const device mg_mtl_path* pathBuffer [[buffer(2)]],
|
||||||
const device mg_mtl_segment* segmentBuffer [[buffer(3)]],
|
const device mg_mtl_segment* segmentBuffer [[buffer(3)]],
|
||||||
|
@ -1453,18 +1464,19 @@ kernel void mtl_raster(const device int* screenTilesBuffer [[buffer(0)]],
|
||||||
constant int* useTexture [[buffer(9)]],
|
constant int* useTexture [[buffer(9)]],
|
||||||
texture2d<float, access::write> outTexture [[texture(0)]],
|
texture2d<float, access::write> outTexture [[texture(0)]],
|
||||||
texture2d<float> srcTexture [[texture(1)]],
|
texture2d<float> srcTexture [[texture(1)]],
|
||||||
uint2 threadCoord [[thread_position_in_grid]],
|
uint2 threadGroupCoord [[threadgroup_position_in_grid]],
|
||||||
uint2 gridSize [[threads_per_grid]])
|
uint2 localCoord [[thread_position_in_threadgroup]])
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
mtl_log_context log = {.buffer = logBuffer,
|
mtl_log_context log = {.buffer = logBuffer,
|
||||||
.offset = logOffsetBuffer,
|
.offset = logOffsetBuffer,
|
||||||
.enabled = true};
|
.enabled = true};
|
||||||
*/
|
*/
|
||||||
uint2 pixelCoord = threadCoord;
|
int tileIndex = int(threadGroupCoord.x);
|
||||||
int2 tileCoord = int2(pixelCoord) / tileSize[0];
|
uint2 tileCoord = screenTilesBuffer[tileIndex].tileCoord;
|
||||||
int nTilesX = (int(gridSize.x) + tileSize[0] - 1)/tileSize[0];
|
uint2 pixelCoord = tileCoord*tileSize[0] + localCoord.xy;
|
||||||
int tileIndex = tileCoord.y * nTilesX + tileCoord.x;
|
|
||||||
|
int opIndex = screenTilesBuffer[tileIndex].first;
|
||||||
|
|
||||||
const int MG_MTL_MAX_SAMPLE_COUNT = 8;
|
const int MG_MTL_MAX_SAMPLE_COUNT = 8;
|
||||||
float2 sampleCoords[MG_MTL_MAX_SAMPLE_COUNT];
|
float2 sampleCoords[MG_MTL_MAX_SAMPLE_COUNT];
|
||||||
|
@ -1500,7 +1512,6 @@ kernel void mtl_raster(const device int* screenTilesBuffer [[buffer(0)]],
|
||||||
|
|
||||||
float4 color = {0};
|
float4 color = {0};
|
||||||
int winding[MG_MTL_MAX_SAMPLE_COUNT] = {0};
|
int winding[MG_MTL_MAX_SAMPLE_COUNT] = {0};
|
||||||
int opIndex = screenTilesBuffer[tileIndex];
|
|
||||||
|
|
||||||
while(opIndex != -1)
|
while(opIndex != -1)
|
||||||
{
|
{
|
||||||
|
@ -1600,7 +1611,6 @@ kernel void mtl_raster(const device int* screenTilesBuffer [[buffer(0)]],
|
||||||
}
|
}
|
||||||
opIndex = op->next;
|
opIndex = op->next;
|
||||||
}
|
}
|
||||||
|
|
||||||
outTexture.write(color, pixelCoord);
|
outTexture.write(color, pixelCoord);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue