[mtl canvas] fixed bug where tiles left and below screen where all bucketed to the first row/column (same as gl version, see 4a8c77f02)

This commit is contained in:
Martin Fouilleul 2023-03-13 16:53:12 +01:00
parent 6da2494c81
commit e9d64166a3
3 changed files with 41 additions and 35 deletions

View File

@ -1,6 +1,6 @@
#!/bin/bash #!/bin/bash
DEBUG_FLAGS="-g -DDEBUG -DLOG_COMPILE_DEBUG" DEBUG_FLAGS="-g -O1 -DDEBUG -DLOG_COMPILE_DEBUG"
#DEBUG_FLAGS="-O3" #DEBUG_FLAGS="-O3"
#-------------------------------------------------------------- #--------------------------------------------------------------

View File

@ -159,6 +159,7 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
//NOTE(martin): encode the clear counter //NOTE(martin): encode the clear counter
//----------------------------------------------------------- //-----------------------------------------------------------
id<MTLBlitCommandEncoder> blitEncoder = [surface->commandBuffer blitCommandEncoder]; id<MTLBlitCommandEncoder> blitEncoder = [surface->commandBuffer blitCommandEncoder];
blitEncoder.label = @"clear counters";
[blitEncoder fillBuffer: backend->tileCounters range: NSMakeRange(0, RENDERER_MAX_TILES*sizeof(uint)) value: 0]; [blitEncoder fillBuffer: backend->tileCounters range: NSMakeRange(0, RENDERER_MAX_TILES*sizeof(uint)) value: 0];
[blitEncoder endEncoding]; [blitEncoder endEncoding];
@ -166,6 +167,7 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
//NOTE(martin): encode the boxing pass //NOTE(martin): encode the boxing pass
//----------------------------------------------------------- //-----------------------------------------------------------
id<MTLComputeCommandEncoder> boxEncoder = [surface->commandBuffer computeCommandEncoder]; id<MTLComputeCommandEncoder> boxEncoder = [surface->commandBuffer computeCommandEncoder];
boxEncoder.label = @"boxing pass";
[boxEncoder setComputePipelineState: backend->boxingPipeline]; [boxEncoder setComputePipelineState: backend->boxingPipeline];
[boxEncoder setBuffer: backend->vertexBuffer offset:backend->vertexBufferOffset atIndex: 0]; [boxEncoder setBuffer: backend->vertexBuffer offset:backend->vertexBufferOffset atIndex: 0];
@ -187,6 +189,7 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
//----------------------------------------------------------- //-----------------------------------------------------------
id<MTLComputeCommandEncoder> tileEncoder = [surface->commandBuffer computeCommandEncoder]; id<MTLComputeCommandEncoder> tileEncoder = [surface->commandBuffer computeCommandEncoder];
tileEncoder.label = @"tiling pass";
[tileEncoder setComputePipelineState: backend->tilingPipeline]; [tileEncoder setComputePipelineState: backend->tilingPipeline];
[tileEncoder setBuffer: backend->boxArray offset:0 atIndex: 0]; [tileEncoder setBuffer: backend->boxArray offset:0 atIndex: 0];
[tileEncoder setBuffer: backend->tileCounters offset:0 atIndex: 1]; [tileEncoder setBuffer: backend->tileCounters offset:0 atIndex: 1];
@ -201,6 +204,7 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
//----------------------------------------------------------- //-----------------------------------------------------------
id<MTLComputeCommandEncoder> sortEncoder = [surface->commandBuffer computeCommandEncoder]; id<MTLComputeCommandEncoder> sortEncoder = [surface->commandBuffer computeCommandEncoder];
sortEncoder.label = @"sorting pass";
[sortEncoder setComputePipelineState: backend->sortingPipeline]; [sortEncoder setComputePipelineState: backend->sortingPipeline];
[sortEncoder setBuffer: backend->tileCounters offset:0 atIndex: 0]; [sortEncoder setBuffer: backend->tileCounters offset:0 atIndex: 0];
[sortEncoder setBuffer: backend->triangleArray offset:0 atIndex: 1]; [sortEncoder setBuffer: backend->triangleArray offset:0 atIndex: 1];
@ -223,6 +227,7 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
vector_float4 clearColorVec4 = {backend->clearColor.r, backend->clearColor.g, backend->clearColor.b, backend->clearColor.a}; vector_float4 clearColorVec4 = {backend->clearColor.r, backend->clearColor.g, backend->clearColor.b, backend->clearColor.a};
id<MTLComputeCommandEncoder> encoder = [surface->commandBuffer computeCommandEncoder]; id<MTLComputeCommandEncoder> encoder = [surface->commandBuffer computeCommandEncoder];
encoder.label = @"drawing pass";
[encoder setComputePipelineState:backend->computePipeline]; [encoder setComputePipelineState:backend->computePipeline];
[encoder setTexture: backend->outTexture atIndex: 0]; [encoder setTexture: backend->outTexture atIndex: 0];
int useTexture = 0; int useTexture = 0;
@ -269,6 +274,7 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
renderPassDescriptor.colorAttachments[0].storeAction = MTLStoreActionStore; renderPassDescriptor.colorAttachments[0].storeAction = MTLStoreActionStore;
id<MTLRenderCommandEncoder> renderEncoder = [surface->commandBuffer renderCommandEncoderWithDescriptor:renderPassDescriptor]; id<MTLRenderCommandEncoder> renderEncoder = [surface->commandBuffer renderCommandEncoderWithDescriptor:renderPassDescriptor];
renderEncoder.label = @"blit pass";
[renderEncoder setViewport: viewport]; [renderEncoder setViewport: viewport];
[renderEncoder setRenderPipelineState: backend->renderPipeline]; [renderEncoder setRenderPipelineState: backend->renderPipeline];
[renderEncoder setFragmentTexture: backend->outTexture atIndex: 0]; [renderEncoder setFragmentTexture: backend->outTexture atIndex: 0];

View File

@ -102,24 +102,26 @@ kernel void TileKernel(const device float4* boxArray [[buffer(0)]],
uint gid [[thread_position_in_grid]]) uint gid [[thread_position_in_grid]])
{ {
uint2 tilesMatrixDim = (*viewport - 1) / RENDERER_TILE_SIZE + 1; uint2 tilesMatrixDim = (*viewport - 1) / RENDERER_TILE_SIZE + 1;
uint nTilesX = tilesMatrixDim.x; int nTilesX = tilesMatrixDim.x;
uint nTilesY = tilesMatrixDim.y; int nTilesY = tilesMatrixDim.y;
uint triangleIndex = gid; uint triangleIndex = gid;
uint4 box = uint4(floor(boxArray[triangleIndex]))/RENDERER_TILE_SIZE; int4 box = int4(floor(boxArray[triangleIndex]))/RENDERER_TILE_SIZE;
uint xMin = max((uint)0, box.x); int xMin = max(0, box.x);
uint yMin = max((uint)0, box.y); int yMin = max(0, box.y);
uint xMax = min(box.z, nTilesX-1); int xMax = min(box.z, nTilesX-1);
uint yMax = min(box.w, nTilesY-1); int yMax = min(box.w, nTilesY-1);
for(uint y = yMin; y <= yMax; y++) for(int y = yMin; y <= yMax; y++)
{ {
for(uint x = xMin ; x <= xMax; x++) for(int x = xMin ; x <= xMax; x++)
{ {
uint tileIndex = y*nTilesX + x; int tileIndex = y*nTilesX + x;
device uint* tileBuffer = tilesArray + tileIndex*RENDERER_TILE_BUFFER_SIZE;
uint counter = atomic_fetch_add_explicit(&(tileCounters[tileIndex]), 1, memory_order_relaxed); uint counter = atomic_fetch_add_explicit(&(tileCounters[tileIndex]), 1, memory_order_relaxed);
tileBuffer[counter] = triangleIndex; if(counter < RENDERER_TILE_BUFFER_SIZE)
{
tilesArray[tileIndex*RENDERER_TILE_BUFFER_SIZE + counter] = triangleIndex;
}
} }
} }
} }
@ -241,7 +243,6 @@ kernel void RenderKernel(texture2d<float, access::write> outTexture [[texture(0)
for(uint tileBufferIndex=0; tileBufferIndex < tileBufferSize; tileBufferIndex++) for(uint tileBufferIndex=0; tileBufferIndex < tileBufferSize; tileBufferIndex++)
{ {
// float4 box = boxArray[tileBuffer[tileBufferIndex]];
const device mg_triangle_data* triangle = &triangleArray[tileBuffer[tileBufferIndex]]; const device mg_triangle_data* triangle = &triangleArray[tileBuffer[tileBufferIndex]];
int2 p0 = int2(triangle->p0 * subPixelFactor); int2 p0 = int2(triangle->p0 * subPixelFactor);
@ -269,9 +270,9 @@ kernel void RenderKernel(texture2d<float, access::write> outTexture [[texture(0)
{uvTransform2x3[1], uvTransform2x3[4], 0}, {uvTransform2x3[1], uvTransform2x3[4], 0},
{uvTransform2x3[2], uvTransform2x3[5], 1}}; {uvTransform2x3[2], uvTransform2x3[5], 1}};
for(int i=0; i<sampleCount; i++) for(int sampleIndex=0; sampleIndex<sampleCount; sampleIndex++)
{ {
int2 samplePoint = samplePoints[i]; int2 samplePoint = samplePoints[sampleIndex];
//NOTE(martin): cull if pixel is outside box //NOTE(martin): cull if pixel is outside box
/* /*
@ -290,17 +291,6 @@ kernel void RenderKernel(texture2d<float, access::write> outTexture [[texture(0)
{ {
float4 cubic = (cubic0*w0 + cubic1*w1 + cubic2*w2)/(w0+w1+w2); float4 cubic = (cubic0*w0 + cubic1*w1 + cubic2*w2)/(w0+w1+w2);
//float2 uv = (uv0*w0 + uv1*w1 + uv2*w2)/(w0+w1+w2);
float2 sampleFP = float2(samplePoint)/subPixelFactor;
float2 uv = (uvTransform*(float3(sampleFP/contentsScaling[0], 1))).xy;
float4 texColor = float4(1, 1, 1, 1);
if(*useTexture)
{
constexpr sampler smp(mip_filter::nearest, mag_filter::linear, min_filter::linear);
texColor = texAtlas.sample(smp, uv);
texColor.rgb *= texColor.a;
}
//TODO(martin): this is a quick and dirty fix for solid polygons where we use //TODO(martin): this is a quick and dirty fix for solid polygons where we use
// cubic = (1, 1, 1, 1) on all vertices, which can cause small errors to // cubic = (1, 1, 1, 1) on all vertices, which can cause small errors to
// flip the sign. // flip the sign.
@ -312,22 +302,32 @@ kernel void RenderKernel(texture2d<float, access::write> outTexture [[texture(0)
float eps = 0.0001; float eps = 0.0001;
if(cubic.w*(cubic.x*cubic.x*cubic.x - cubic.y*cubic.z) <= eps) if(cubic.w*(cubic.x*cubic.x*cubic.x - cubic.y*cubic.z) <= eps)
{ {
if(shapeIndex == zIndices[i]) if(shapeIndex == zIndices[sampleIndex])
{ {
flipCounts[i]++; flipCounts[sampleIndex]++;
} }
else else
{ {
if(flipCounts[i] & 0x01) if(flipCounts[sampleIndex] & 0x01)
{ {
pixelColors[i] = nextColors[i]; pixelColors[sampleIndex] = nextColors[sampleIndex];
} }
float4 nextCol = color*texColor; float4 nextColor = color;
nextColors[i] = pixelColors[i]*(1-nextCol.a) +nextCol.a*nextCol; if(*useTexture)
{
float2 sampleFP = float2(samplePoint)/subPixelFactor;
float2 uv = (uvTransform*(float3(sampleFP/contentsScaling[0], 1))).xy;
zIndices[i] = shapeIndex; constexpr sampler smp(mip_filter::nearest, mag_filter::linear, min_filter::linear);
flipCounts[i] = 1; float4 texColor = texAtlas.sample(smp, uv);
texColor.rgb *= texColor.a;
nextColor *= texColor;
}
nextColors[sampleIndex] = pixelColors[sampleIndex]*(1-nextColor.a) + nextColor;
zIndices[sampleIndex] = shapeIndex;
flipCounts[sampleIndex] = 1;
} }
} }
} }