[wip] trying to simplify metal shader
This commit is contained in:
parent
cee294d8ad
commit
fd5a4d4cd8
|
@ -45,7 +45,7 @@ typedef struct mg_mtl_canvas_backend
|
||||||
id<MTLBuffer> vertexBuffer;
|
id<MTLBuffer> vertexBuffer;
|
||||||
id<MTLBuffer> indexBuffer;
|
id<MTLBuffer> indexBuffer;
|
||||||
id<MTLBuffer> tileCounters;
|
id<MTLBuffer> tileCounters;
|
||||||
id<MTLBuffer> tilesArray;
|
id<MTLBuffer> tileArrayBuffer;
|
||||||
id<MTLBuffer> triangleArray;
|
id<MTLBuffer> triangleArray;
|
||||||
id<MTLBuffer> boxArray;
|
id<MTLBuffer> boxArray;
|
||||||
|
|
||||||
|
@ -163,6 +163,7 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
|
||||||
[blitEncoder fillBuffer: backend->tileCounters range: NSMakeRange(0, RENDERER_MAX_TILES*sizeof(uint)) value: 0];
|
[blitEncoder fillBuffer: backend->tileCounters range: NSMakeRange(0, RENDERER_MAX_TILES*sizeof(uint)) value: 0];
|
||||||
[blitEncoder endEncoding];
|
[blitEncoder endEncoding];
|
||||||
|
|
||||||
|
/*
|
||||||
//-----------------------------------------------------------
|
//-----------------------------------------------------------
|
||||||
//NOTE(martin): encode the boxing pass
|
//NOTE(martin): encode the boxing pass
|
||||||
//-----------------------------------------------------------
|
//-----------------------------------------------------------
|
||||||
|
@ -183,6 +184,7 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
|
||||||
|
|
||||||
[boxEncoder dispatchThreads: boxGridSize threadsPerThreadgroup: boxGroupSize];
|
[boxEncoder dispatchThreads: boxGridSize threadsPerThreadgroup: boxGroupSize];
|
||||||
[boxEncoder endEncoding];
|
[boxEncoder endEncoding];
|
||||||
|
*/
|
||||||
|
|
||||||
//-----------------------------------------------------------
|
//-----------------------------------------------------------
|
||||||
//NOTE(martin): encode the tiling pass
|
//NOTE(martin): encode the tiling pass
|
||||||
|
@ -191,12 +193,19 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
|
||||||
id<MTLComputeCommandEncoder> tileEncoder = [surface->commandBuffer computeCommandEncoder];
|
id<MTLComputeCommandEncoder> tileEncoder = [surface->commandBuffer computeCommandEncoder];
|
||||||
tileEncoder.label = @"tiling pass";
|
tileEncoder.label = @"tiling pass";
|
||||||
[tileEncoder setComputePipelineState: backend->tilingPipeline];
|
[tileEncoder setComputePipelineState: backend->tilingPipeline];
|
||||||
[tileEncoder setBuffer: backend->boxArray offset:0 atIndex: 0];
|
[tileEncoder setBuffer: backend->vertexBuffer offset:backend->vertexBufferOffset atIndex: 0];
|
||||||
[tileEncoder setBuffer: backend->tileCounters offset:0 atIndex: 1];
|
[tileEncoder setBuffer: backend->indexBuffer offset:backend->indexBufferOffset atIndex: 1];
|
||||||
[tileEncoder setBuffer: backend->tilesArray offset:0 atIndex: 2];
|
[tileEncoder setBuffer: backend->shapeBuffer offset:backend->shapeBufferOffset atIndex: 2];
|
||||||
[tileEncoder setBytes: &viewportSize length: sizeof(vector_uint2) atIndex: 3];
|
[tileEncoder setBuffer: backend->tileCounters offset:0 atIndex: 3];
|
||||||
|
[tileEncoder setBuffer: backend->tileArrayBuffer offset:0 atIndex: 4];
|
||||||
|
|
||||||
[tileEncoder dispatchThreads: boxGridSize threadsPerThreadgroup: boxGroupSize];
|
[tileEncoder setBytes: &viewportSize length: sizeof(vector_uint2) atIndex: 5];
|
||||||
|
[tileEncoder setBytes: &scale length: sizeof(float) atIndex: 6];
|
||||||
|
|
||||||
|
MTLSize tileGroupSize = MTLSizeMake(backend->tilingPipeline.maxTotalThreadsPerThreadgroup, 1, 1);
|
||||||
|
MTLSize tileGridSize = MTLSizeMake(indexCount/3, 1, 1);
|
||||||
|
|
||||||
|
[tileEncoder dispatchThreads: tileGridSize threadsPerThreadgroup: tileGroupSize];
|
||||||
[tileEncoder endEncoding];
|
[tileEncoder endEncoding];
|
||||||
|
|
||||||
//-----------------------------------------------------------
|
//-----------------------------------------------------------
|
||||||
|
@ -206,15 +215,16 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
|
||||||
id<MTLComputeCommandEncoder> sortEncoder = [surface->commandBuffer computeCommandEncoder];
|
id<MTLComputeCommandEncoder> sortEncoder = [surface->commandBuffer computeCommandEncoder];
|
||||||
sortEncoder.label = @"sorting pass";
|
sortEncoder.label = @"sorting pass";
|
||||||
[sortEncoder setComputePipelineState: backend->sortingPipeline];
|
[sortEncoder setComputePipelineState: backend->sortingPipeline];
|
||||||
[sortEncoder setBuffer: backend->tileCounters offset:0 atIndex: 0];
|
[sortEncoder setBuffer: backend->vertexBuffer offset:backend->vertexBufferOffset atIndex: 0];
|
||||||
[sortEncoder setBuffer: backend->triangleArray offset:0 atIndex: 1];
|
[sortEncoder setBuffer: backend->indexBuffer offset:backend->indexBufferOffset atIndex: 1];
|
||||||
[sortEncoder setBuffer: backend->tilesArray offset:0 atIndex: 2];
|
[sortEncoder setBuffer: backend->shapeBuffer offset:backend->shapeBufferOffset atIndex: 2];
|
||||||
[sortEncoder setBytes: &viewportSize length: sizeof(vector_uint2) atIndex: 3];
|
[sortEncoder setBuffer: backend->tileCounters offset:0 atIndex: 3];
|
||||||
|
[sortEncoder setBuffer: backend->tileArrayBuffer offset:0 atIndex: 4];
|
||||||
|
|
||||||
u32 nTilesX = (viewportSize.x + RENDERER_TILE_SIZE - 1)/RENDERER_TILE_SIZE;
|
u32 nTilesX = (viewportSize.x + RENDERER_TILE_SIZE - 1)/RENDERER_TILE_SIZE;
|
||||||
u32 nTilesY = (viewportSize.y + RENDERER_TILE_SIZE - 1)/RENDERER_TILE_SIZE;
|
u32 nTilesY = (viewportSize.y + RENDERER_TILE_SIZE - 1)/RENDERER_TILE_SIZE;
|
||||||
|
|
||||||
MTLSize sortGroupSize = MTLSizeMake(backend->boxingPipeline.maxTotalThreadsPerThreadgroup, 1, 1);
|
MTLSize sortGroupSize = MTLSizeMake(backend->sortingPipeline.maxTotalThreadsPerThreadgroup, 1, 1);
|
||||||
MTLSize sortGridSize = MTLSizeMake(nTilesX*nTilesY, 1, 1);
|
MTLSize sortGridSize = MTLSizeMake(nTilesX*nTilesY, 1, 1);
|
||||||
|
|
||||||
[sortEncoder dispatchThreads: sortGridSize threadsPerThreadgroup: sortGroupSize];
|
[sortEncoder dispatchThreads: sortGridSize threadsPerThreadgroup: sortGroupSize];
|
||||||
|
@ -226,35 +236,35 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
|
||||||
//TODO: remove that
|
//TODO: remove that
|
||||||
vector_float4 clearColorVec4 = {backend->clearColor.r, backend->clearColor.g, backend->clearColor.b, backend->clearColor.a};
|
vector_float4 clearColorVec4 = {backend->clearColor.r, backend->clearColor.g, backend->clearColor.b, backend->clearColor.a};
|
||||||
|
|
||||||
id<MTLComputeCommandEncoder> encoder = [surface->commandBuffer computeCommandEncoder];
|
id<MTLComputeCommandEncoder> drawEncoder = [surface->commandBuffer computeCommandEncoder];
|
||||||
encoder.label = @"drawing pass";
|
drawEncoder.label = @"drawing pass";
|
||||||
[encoder setComputePipelineState:backend->computePipeline];
|
[drawEncoder setComputePipelineState:backend->computePipeline];
|
||||||
[encoder setTexture: backend->outTexture atIndex: 0];
|
[drawEncoder setBuffer: backend->vertexBuffer offset:backend->vertexBufferOffset atIndex: 0];
|
||||||
|
[drawEncoder setBuffer: backend->indexBuffer offset:backend->indexBufferOffset atIndex: 1];
|
||||||
|
[drawEncoder setBuffer: backend->shapeBuffer offset:backend->shapeBufferOffset atIndex: 2];
|
||||||
|
[drawEncoder setBuffer: backend->tileCounters offset:0 atIndex: 3];
|
||||||
|
[drawEncoder setBuffer: backend->tileArrayBuffer offset:0 atIndex: 4];
|
||||||
|
|
||||||
|
[drawEncoder setTexture: backend->outTexture atIndex: 0];
|
||||||
int useTexture = 0;
|
int useTexture = 0;
|
||||||
if(image)
|
if(image)
|
||||||
{
|
{
|
||||||
mg_mtl_image_data* mtlImage = (mg_mtl_image_data*)image;
|
mg_mtl_image_data* mtlImage = (mg_mtl_image_data*)image;
|
||||||
[encoder setTexture: mtlImage->texture atIndex: 1];
|
[drawEncoder setTexture: mtlImage->texture atIndex: 1];
|
||||||
useTexture = 1;
|
useTexture = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
[encoder setBuffer: backend->vertexBuffer offset:backend->vertexBufferOffset atIndex: 0];
|
[drawEncoder setBytes: &clearColorVec4 length: sizeof(vector_float4) atIndex: 5];
|
||||||
[encoder setBuffer: backend->shapeBuffer offset:backend->shapeBufferOffset atIndex: 1];
|
[drawEncoder setBytes: &useTexture length:sizeof(int) atIndex:6];
|
||||||
[encoder setBuffer: backend->tileCounters offset:0 atIndex: 2];
|
[drawEncoder setBytes: &scale length: sizeof(float) atIndex: 7];
|
||||||
[encoder setBuffer: backend->tilesArray offset:0 atIndex: 3];
|
|
||||||
[encoder setBuffer: backend->triangleArray offset:0 atIndex: 4];
|
|
||||||
[encoder setBuffer: backend->boxArray offset:0 atIndex: 5];
|
|
||||||
[encoder setBytes: &clearColorVec4 length: sizeof(vector_float4) atIndex: 6];
|
|
||||||
[encoder setBytes: &useTexture length:sizeof(int) atIndex:7];
|
|
||||||
[encoder setBytes: &scale length: sizeof(float) atIndex: 8];
|
|
||||||
|
|
||||||
//TODO: check that we don't exceed maxTotalThreadsPerThreadgroup
|
//TODO: check that we don't exceed maxTotalThreadsPerThreadgroup
|
||||||
DEBUG_ASSERT(RENDERER_TILE_SIZE*RENDERER_TILE_SIZE <= backend->computePipeline.maxTotalThreadsPerThreadgroup);
|
DEBUG_ASSERT(RENDERER_TILE_SIZE*RENDERER_TILE_SIZE <= backend->computePipeline.maxTotalThreadsPerThreadgroup);
|
||||||
MTLSize threadGridSize = MTLSizeMake(viewportSize.x, viewportSize.y, 1);
|
MTLSize threadGridSize = MTLSizeMake(viewportSize.x, viewportSize.y, 1);
|
||||||
MTLSize threadGroupSize = MTLSizeMake(RENDERER_TILE_SIZE, RENDERER_TILE_SIZE, 1);
|
MTLSize threadGroupSize = MTLSizeMake(RENDERER_TILE_SIZE, RENDERER_TILE_SIZE, 1);
|
||||||
|
|
||||||
[encoder dispatchThreads: threadGridSize threadsPerThreadgroup:threadGroupSize];
|
[drawEncoder dispatchThreads: threadGridSize threadsPerThreadgroup:threadGroupSize];
|
||||||
[encoder endEncoding];
|
[drawEncoder endEncoding];
|
||||||
|
|
||||||
//-----------------------------------------------------------
|
//-----------------------------------------------------------
|
||||||
//NOTE(martin): blit texture to framebuffer
|
//NOTE(martin): blit texture to framebuffer
|
||||||
|
@ -331,7 +341,7 @@ void mg_mtl_canvas_destroy(mg_canvas_backend* interface)
|
||||||
[backend->outTexture release];
|
[backend->outTexture release];
|
||||||
[backend->vertexBuffer release];
|
[backend->vertexBuffer release];
|
||||||
[backend->indexBuffer release];
|
[backend->indexBuffer release];
|
||||||
[backend->tilesArray release];
|
[backend->tileArrayBuffer release];
|
||||||
[backend->triangleArray release];
|
[backend->triangleArray release];
|
||||||
[backend->boxArray release];
|
[backend->boxArray release];
|
||||||
[backend->computePipeline release];
|
[backend->computePipeline release];
|
||||||
|
@ -459,7 +469,7 @@ mg_canvas_backend* mg_mtl_canvas_create(mg_surface surface)
|
||||||
backend->shapeBuffer = [metalSurface->device newBufferWithLength: MG_MTL_CANVAS_DEFAULT_BUFFER_LENGTH*sizeof(mg_shape)
|
backend->shapeBuffer = [metalSurface->device newBufferWithLength: MG_MTL_CANVAS_DEFAULT_BUFFER_LENGTH*sizeof(mg_shape)
|
||||||
options: bufferOptions];
|
options: bufferOptions];
|
||||||
|
|
||||||
backend->tilesArray = [metalSurface->device newBufferWithLength: RENDERER_TILE_BUFFER_SIZE*sizeof(int)*RENDERER_MAX_TILES
|
backend->tileArrayBuffer = [metalSurface->device newBufferWithLength: RENDERER_TILE_BUFFER_SIZE*sizeof(int)*RENDERER_MAX_TILES
|
||||||
options: MTLResourceStorageModePrivate];
|
options: MTLResourceStorageModePrivate];
|
||||||
|
|
||||||
backend->triangleArray = [metalSurface->device newBufferWithLength: MG_MTL_CANVAS_DEFAULT_BUFFER_LENGTH*sizeof(mg_triangle_data)
|
backend->triangleArray = [metalSurface->device newBufferWithLength: MG_MTL_CANVAS_DEFAULT_BUFFER_LENGTH*sizeof(mg_triangle_data)
|
||||||
|
@ -522,6 +532,7 @@ mg_canvas_backend* mg_mtl_canvas_create(mg_surface surface)
|
||||||
reflection: nil
|
reflection: nil
|
||||||
error: &error];
|
error: &error];
|
||||||
|
|
||||||
|
/*
|
||||||
MTLComputePipelineDescriptor* boxingPipelineDesc = [[MTLComputePipelineDescriptor alloc] init];
|
MTLComputePipelineDescriptor* boxingPipelineDesc = [[MTLComputePipelineDescriptor alloc] init];
|
||||||
boxingPipelineDesc.computeFunction = boxingFunction;
|
boxingPipelineDesc.computeFunction = boxingFunction;
|
||||||
// boxingPipelineDesc.threadGroupSizeIsMultipleOfThreadExecutionWidth = true;
|
// boxingPipelineDesc.threadGroupSizeIsMultipleOfThreadExecutionWidth = true;
|
||||||
|
@ -530,6 +541,7 @@ mg_canvas_backend* mg_mtl_canvas_create(mg_surface surface)
|
||||||
options: MTLPipelineOptionNone
|
options: MTLPipelineOptionNone
|
||||||
reflection: nil
|
reflection: nil
|
||||||
error: &error];
|
error: &error];
|
||||||
|
*/
|
||||||
//-----------------------------------------------------------
|
//-----------------------------------------------------------
|
||||||
//NOTE(martin): setup our render pipeline state
|
//NOTE(martin): setup our render pipeline state
|
||||||
//-----------------------------------------------------------
|
//-----------------------------------------------------------
|
||||||
|
|
|
@ -31,7 +31,7 @@ bool is_top_left(float2 a, float2 b)
|
||||||
return( (a.y == b.y && b.x < a.x)
|
return( (a.y == b.y && b.x < a.x)
|
||||||
||(b.y < a.y));
|
||(b.y < a.y));
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
kernel void BoundingBoxKernel(constant mg_vertex* vertexBuffer [[buffer(0)]],
|
kernel void BoundingBoxKernel(constant mg_vertex* vertexBuffer [[buffer(0)]],
|
||||||
constant uint* indexBuffer [[buffer(1)]],
|
constant uint* indexBuffer [[buffer(1)]],
|
||||||
constant mg_shape* shapeBuffer [[buffer(2)]],
|
constant mg_shape* shapeBuffer [[buffer(2)]],
|
||||||
|
@ -94,19 +94,43 @@ kernel void BoundingBoxKernel(constant mg_vertex* vertexBuffer [[buffer(0)]],
|
||||||
triangleArray[triangleIndex].bias1 = bias1;
|
triangleArray[triangleIndex].bias1 = bias1;
|
||||||
triangleArray[triangleIndex].bias2 = bias2;
|
triangleArray[triangleIndex].bias2 = bias2;
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
kernel void TileKernel(const device float4* boxArray [[buffer(0)]],
|
kernel void TileKernel(constant mg_vertex* vertexBuffer [[buffer(0)]],
|
||||||
device volatile atomic_uint* tileCounters [[buffer(1)]],
|
constant uint* indexBuffer [[buffer(1)]],
|
||||||
device uint* tilesArray [[buffer(2)]],
|
constant mg_shape* shapeBuffer [[buffer(2)]],
|
||||||
constant vector_uint2* viewport [[buffer(3)]],
|
device volatile atomic_uint* tileCounters [[buffer(3)]],
|
||||||
|
device uint* tileArrayBuffer [[buffer(4)]],
|
||||||
|
constant uint2* viewport [[buffer(5)]],
|
||||||
|
constant float* scaling [[buffer(6)]],
|
||||||
uint gid [[thread_position_in_grid]])
|
uint gid [[thread_position_in_grid]])
|
||||||
{
|
{
|
||||||
uint2 tilesMatrixDim = (*viewport - 1) / RENDERER_TILE_SIZE + 1;
|
uint2 tilesMatrixDim = (*viewport - 1) / RENDERER_TILE_SIZE + 1;
|
||||||
int nTilesX = tilesMatrixDim.x;
|
int nTilesX = tilesMatrixDim.x;
|
||||||
int nTilesY = tilesMatrixDim.y;
|
int nTilesY = tilesMatrixDim.y;
|
||||||
|
|
||||||
uint triangleIndex = gid;
|
uint triangleIndex = gid * 3;
|
||||||
int4 box = int4(floor(boxArray[triangleIndex]))/RENDERER_TILE_SIZE;
|
|
||||||
|
uint i0 = indexBuffer[triangleIndex];
|
||||||
|
uint i1 = indexBuffer[triangleIndex+1u];
|
||||||
|
uint i2 = indexBuffer[triangleIndex+2u];
|
||||||
|
|
||||||
|
float2 p0 = vertexBuffer[i0].pos * scaling[0];
|
||||||
|
float2 p1 = vertexBuffer[i1].pos * scaling[0];
|
||||||
|
float2 p2 = vertexBuffer[i2].pos * scaling[0];
|
||||||
|
|
||||||
|
int shapeIndex = vertexBuffer[i0].shapeIndex;
|
||||||
|
float4 clip = shapeBuffer[shapeIndex].clip * scaling[0];
|
||||||
|
|
||||||
|
float4 fbox = float4(max(min(min(p0.x, p1.x), p2.x), clip.x),
|
||||||
|
max(min(min(p0.y, p1.y), p2.y), clip.y),
|
||||||
|
min(max(max(p0.x, p1.x), p2.x), clip.z),
|
||||||
|
min(max(max(p0.y, p1.y), p2.y), clip.w));
|
||||||
|
|
||||||
|
int4 box = int4(floor(fbox))/int(RENDERER_TILE_SIZE);
|
||||||
|
|
||||||
|
//NOTE(martin): it's importat to do the computation with signed int, so that we can have negative xMax/yMax
|
||||||
|
// otherwise all triangles on the left or below the x/y axis are attributed to tiles on row/column 0.
|
||||||
int xMin = max(0, box.x);
|
int xMin = max(0, box.x);
|
||||||
int yMin = max(0, box.y);
|
int yMin = max(0, box.y);
|
||||||
int xMax = min(box.z, nTilesX-1);
|
int xMax = min(box.z, nTilesX-1);
|
||||||
|
@ -120,105 +144,125 @@ kernel void TileKernel(const device float4* boxArray [[buffer(0)]],
|
||||||
uint counter = atomic_fetch_add_explicit(&(tileCounters[tileIndex]), 1, memory_order_relaxed);
|
uint counter = atomic_fetch_add_explicit(&(tileCounters[tileIndex]), 1, memory_order_relaxed);
|
||||||
if(counter < RENDERER_TILE_BUFFER_SIZE)
|
if(counter < RENDERER_TILE_BUFFER_SIZE)
|
||||||
{
|
{
|
||||||
tilesArray[tileIndex*RENDERER_TILE_BUFFER_SIZE + counter] = triangleIndex;
|
tileArrayBuffer[tileIndex*RENDERER_TILE_BUFFER_SIZE + counter] = triangleIndex;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
kernel void SortKernel(const device uint* tileCounters [[buffer(0)]],
|
kernel void SortKernel(constant mg_vertex* vertexBuffer [[buffer(0)]],
|
||||||
const device mg_triangle_data* triangleArray [[buffer(1)]],
|
constant uint* indexBuffer [[buffer(1)]],
|
||||||
device uint* tilesArray [[buffer(2)]],
|
constant mg_shape* shapeBuffer [[buffer(2)]],
|
||||||
constant vector_uint2* viewport [[buffer(3)]],
|
const device uint* tileCounters [[buffer(3)]],
|
||||||
|
device uint* tileArrayBuffer [[buffer(4)]],
|
||||||
uint gid [[thread_position_in_grid]])
|
uint gid [[thread_position_in_grid]])
|
||||||
{
|
{
|
||||||
uint tileIndex = gid;
|
uint tileIndex = gid;
|
||||||
device uint* tileBuffer = tilesArray + tileIndex*RENDERER_TILE_BUFFER_SIZE;
|
uint tileArrayOffset = tileIndex * RENDERER_TILE_BUFFER_SIZE;
|
||||||
uint tileBufferSize = tileCounters[tileIndex];
|
uint tileArrayCount = min(tileCounters[tileIndex], (uint)RENDERER_TILE_BUFFER_SIZE);
|
||||||
|
|
||||||
for(int eltIndex=0; eltIndex < (int)tileBufferSize; eltIndex++)
|
for(uint tileArrayIndex=1; tileArrayIndex < tileArrayCount; tileArrayIndex++)
|
||||||
{
|
{
|
||||||
uint elt = tileBuffer[eltIndex];
|
for(uint sortIndex = tileArrayIndex; sortIndex > 0u; sortIndex--)
|
||||||
uint eltZIndex = triangleArray[elt].shapeIndex;
|
|
||||||
|
|
||||||
int backIndex = eltIndex-1;
|
|
||||||
for(; backIndex >= 0; backIndex--)
|
|
||||||
{
|
{
|
||||||
uint backElt = tileBuffer[backIndex];
|
uint triangleIndex = indexBuffer[tileArrayBuffer[tileArrayOffset + sortIndex]];
|
||||||
uint backEltZIndex = triangleArray[backElt].shapeIndex;
|
uint prevTriangleIndex = indexBuffer[tileArrayBuffer[tileArrayOffset + sortIndex - 1]];
|
||||||
if(eltZIndex >= backEltZIndex)
|
|
||||||
|
int shapeIndex = vertexBuffer[triangleIndex].shapeIndex;
|
||||||
|
int prevShapeIndex = vertexBuffer[prevTriangleIndex].shapeIndex;
|
||||||
|
|
||||||
|
if(shapeIndex >= prevShapeIndex)
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
else
|
uint tmp = tileArrayBuffer[tileArrayOffset + sortIndex];
|
||||||
{
|
tileArrayBuffer[tileArrayOffset + sortIndex] = tileArrayBuffer[tileArrayOffset + sortIndex - 1];
|
||||||
tileBuffer[backIndex+1] = backElt;
|
tileArrayBuffer[tileArrayOffset + sortIndex - 1] = tmp;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
tileBuffer[backIndex+1] = elt;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool is_top_left(int2 a, int2 b)
|
||||||
|
{
|
||||||
|
return( (a.y == b.y && b.x < a.x)
|
||||||
|
||(b.y < a.y));
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
//TODO: we should do these computations on 64bits, because otherwise
|
||||||
|
// we might overflow for values > 2048.
|
||||||
|
// Unfortunately this is costly.
|
||||||
|
// Another way is to precompute triangle edges (b - a) in full precision
|
||||||
|
// once to avoid doing it all the time...
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
//TODO: coalesce
|
||||||
int orient2d(int2 a, int2 b, int2 c)
|
int orient2d(int2 a, int2 b, int2 c)
|
||||||
{
|
{
|
||||||
return((b.x-a.x)*(c.y-a.y) - (b.y-a.y)*(c.x-a.x));
|
return((b.x-a.x)*(c.y-a.y) - (b.y-a.y)*(c.x-a.x));
|
||||||
}
|
}
|
||||||
|
|
||||||
kernel void RenderKernel(texture2d<float, access::write> outTexture [[texture(0)]],
|
int is_clockwise(int2 p0, int2 p1, int2 p2)
|
||||||
|
{
|
||||||
|
return((p1 - p0).x*(p2 - p0).y - (p1 - p0).y*(p2 - p0).x);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
kernel void RenderKernel(const device mg_vertex* vertexBuffer [[buffer(0)]],
|
||||||
|
const device uint* indexBuffer [[buffer(1)]],
|
||||||
|
const device mg_shape* shapeBuffer [[buffer(2)]],
|
||||||
|
const device uint* tileCounters [[buffer(3)]],
|
||||||
|
const device uint* tileArrayBuffer [[buffer(4)]],
|
||||||
|
|
||||||
|
constant float4* clearColor [[buffer(5)]],
|
||||||
|
constant int* useTexture [[buffer(6)]],
|
||||||
|
constant float* scaling [[buffer(7)]],
|
||||||
|
|
||||||
|
texture2d<float, access::write> outTexture [[texture(0)]],
|
||||||
texture2d<float> texAtlas [[texture(1)]],
|
texture2d<float> texAtlas [[texture(1)]],
|
||||||
const device mg_vertex* vertexBuffer [[buffer(0)]],
|
|
||||||
const device mg_shape* shapeBuffer [[buffer(1)]],
|
|
||||||
device uint* tileCounters [[buffer(2)]],
|
|
||||||
const device uint* tilesArray [[buffer(3)]],
|
|
||||||
const device mg_triangle_data* triangleArray [[buffer(4)]],
|
|
||||||
const device float4* boxArray [[buffer(5)]],
|
|
||||||
constant vector_float4* clearColor [[buffer(6)]],
|
|
||||||
constant int* useTexture [[buffer(7)]],
|
|
||||||
constant float* contentsScaling [[buffer(8)]],
|
|
||||||
uint2 gid [[thread_position_in_grid]],
|
uint2 gid [[thread_position_in_grid]],
|
||||||
uint2 tgid [[threadgroup_position_in_grid]],
|
uint2 tgid [[threadgroup_position_in_grid]],
|
||||||
uint2 threadsPerThreadgroup [[threads_per_threadgroup]],
|
uint2 threadsPerThreadgroup [[threads_per_threadgroup]],
|
||||||
uint2 gridSize [[threads_per_grid]])
|
uint2 gridSize [[threads_per_grid]])
|
||||||
{
|
{
|
||||||
//TODO: guard against thread group size not equal to tile size?
|
//TODO: guard against thread group size not equal to tile size?
|
||||||
|
const int2 pixelCoord = int2(gid);
|
||||||
|
const uint2 tileCoord = uint2(pixelCoord)/ RENDERER_TILE_SIZE;
|
||||||
const uint2 tilesMatrixDim = (gridSize - 1) / RENDERER_TILE_SIZE + 1;
|
const uint2 tilesMatrixDim = (gridSize - 1) / RENDERER_TILE_SIZE + 1;
|
||||||
const uint2 tilePos = gid/RENDERER_TILE_SIZE;
|
const uint tileIndex = tileCoord.y * tilesMatrixDim.x + tileCoord.x;
|
||||||
const uint tileIndex = tilePos.y * tilesMatrixDim.x + tilePos.x;
|
const uint tileCounter = min(tileCounters[tileIndex], (uint)RENDERER_TILE_BUFFER_SIZE);
|
||||||
const device uint* tileBuffer = tilesArray + tileIndex * RENDERER_TILE_BUFFER_SIZE;
|
|
||||||
|
|
||||||
const uint tileBufferSize = tileCounters[tileIndex];
|
|
||||||
|
|
||||||
#ifdef RENDERER_DEBUG_TILES
|
#ifdef RENDERER_DEBUG_TILES
|
||||||
//NOTE(martin): color code debug values and show the tile grid
|
//NOTE(martin): color code debug values and show the tile grid
|
||||||
uint nTileX = tilesMatrixDim.x;
|
{
|
||||||
uint nTileY = tilesMatrixDim.y;
|
float4 fragColor = float4(0);
|
||||||
|
|
||||||
if(tilePos.x > nTileX || tilePos.y > nTileY)
|
if( pixelCoord.x % 16 == 0
|
||||||
{
|
||pixelCoord.y % 16 == 0)
|
||||||
outTexture.write(float4(0, 1, 1, 1), gid);
|
{
|
||||||
return;
|
fragColor = float4(0, 0, 0, 1);
|
||||||
}
|
}
|
||||||
|
else if(tileCounters[tileIndex] == 0xffffu)
|
||||||
if((gid.x % RENDERER_TILE_SIZE == 0) || (gid.y % RENDERER_TILE_SIZE == 0))
|
{
|
||||||
{
|
fragColor = float4(1, 0, 1, 1);
|
||||||
outTexture.write(float4(0, 0, 0, 1), gid);
|
}
|
||||||
return;
|
else if(tileCounter != 0u)
|
||||||
}
|
{
|
||||||
if(tileBufferSize <= 0)
|
fragColor = float4(0, 1, 0, 1);
|
||||||
{
|
}
|
||||||
outTexture.write(float4(0, 1, 0, 1), gid);
|
else
|
||||||
return;
|
{
|
||||||
}
|
fragColor = float4(1, 0, 0, 1);
|
||||||
else
|
}
|
||||||
{
|
outTexture.write(fragColor, gid);
|
||||||
outTexture.write(float4(1, 0, 0, 1), gid);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int subPixelFactor = 16;
|
const int subPixelFactor = 16;
|
||||||
int2 pixelCoord = int2(gid);
|
const int2 centerPoint = int2((float2(pixelCoord) + float2(0.5, 0.5)) * subPixelFactor);
|
||||||
int2 centerPoint = int2((float2(pixelCoord) + float2(0.5, 0.5)) * subPixelFactor);
|
|
||||||
|
|
||||||
const int sampleCount = 8;
|
const int sampleCount = 8;
|
||||||
int2 samplePoints[sampleCount] = {centerPoint + int2(1, 3),
|
int2 samplePoints[sampleCount] = {centerPoint + int2(1, 3),
|
||||||
|
@ -229,59 +273,75 @@ kernel void RenderKernel(texture2d<float, access::write> outTexture [[texture(0)
|
||||||
centerPoint + int2(-7, 1),
|
centerPoint + int2(-7, 1),
|
||||||
centerPoint + int2(3, -7),
|
centerPoint + int2(3, -7),
|
||||||
centerPoint + int2(7, 7)};
|
centerPoint + int2(7, 7)};
|
||||||
int zIndices[sampleCount];
|
|
||||||
uint flipCounts[sampleCount];
|
|
||||||
float4 pixelColors[sampleCount];
|
|
||||||
float4 nextColors[sampleCount];
|
|
||||||
for(int i=0; i<sampleCount; i++)
|
|
||||||
{
|
|
||||||
zIndices[i] = -1;
|
|
||||||
flipCounts[i] = 0;
|
|
||||||
pixelColors[i] = float4(0, 0, 0, 0);
|
|
||||||
nextColors[i] = float4(0, 0, 0, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
for(uint tileBufferIndex=0; tileBufferIndex < tileBufferSize; tileBufferIndex++)
|
float4 sampleColor[sampleCount];
|
||||||
{
|
float4 currentColor[sampleCount];
|
||||||
const device mg_triangle_data* triangle = &triangleArray[tileBuffer[tileBufferIndex]];
|
int currentShapeIndex[sampleCount];
|
||||||
|
int flipCount[sampleCount];
|
||||||
|
|
||||||
int2 p0 = int2(triangle->p0 * subPixelFactor);
|
for(int i=0; i<sampleCount; i++)
|
||||||
int2 p1 = int2(triangle->p1 * subPixelFactor);
|
{
|
||||||
int2 p2 = int2(triangle->p2 * subPixelFactor);
|
currentShapeIndex[i] = -1;
|
||||||
|
flipCount[i] = 0;
|
||||||
|
sampleColor[i] = float4(0, 0, 0, 0);
|
||||||
|
currentColor[i] = float4(0, 0, 0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
int bias0 = triangle->bias0;
|
for(uint tileArrayIndex=0; tileArrayIndex < tileCounter; tileArrayIndex++)
|
||||||
int bias1 = triangle->bias1;
|
{
|
||||||
int bias2 = triangle->bias2;
|
int triangleIndex = tileArrayBuffer[RENDERER_TILE_BUFFER_SIZE * tileIndex + tileArrayIndex];
|
||||||
|
|
||||||
const device mg_vertex* v0 = &(vertexBuffer[triangle->i0]);
|
uint i0 = indexBuffer[triangleIndex];
|
||||||
const device mg_vertex* v1 = &(vertexBuffer[triangle->i1]);
|
uint i1 = indexBuffer[triangleIndex+1];
|
||||||
const device mg_vertex* v2 = &(vertexBuffer[triangle->i2]);
|
uint i2 = indexBuffer[triangleIndex+2];
|
||||||
|
|
||||||
float4 cubic0 = v0->cubic;
|
int2 p0 = int2((vertexBuffer[i0].pos * scaling[0]) * subPixelFactor);
|
||||||
float4 cubic1 = v1->cubic;
|
int2 p1 = int2((vertexBuffer[i1].pos * scaling[0]) * subPixelFactor);
|
||||||
float4 cubic2 = v2->cubic;
|
int2 p2 = int2((vertexBuffer[i2].pos * scaling[0]) * subPixelFactor);
|
||||||
|
|
||||||
int shapeIndex = v0->shapeIndex;
|
int shapeIndex = vertexBuffer[i0].shapeIndex;
|
||||||
float4 color = shapeBuffer[shapeIndex].color;
|
float4 color = shapeBuffer[shapeIndex].color;
|
||||||
color.rgb *= color.a;
|
color.rgb *= color.a;
|
||||||
|
|
||||||
|
int4 clip = int4(round((shapeBuffer[shapeIndex].clip * scaling[0] + float4(0.5, 0.5, 0.5, 0.5)) * subPixelFactor));
|
||||||
|
|
||||||
const device float* uvTransform2x3 = shapeBuffer[shapeIndex].uvTransform;
|
const device float* uvTransform2x3 = shapeBuffer[shapeIndex].uvTransform;
|
||||||
matrix_float3x3 uvTransform = {{uvTransform2x3[0], uvTransform2x3[3], 0},
|
matrix_float3x3 uvTransform = {{uvTransform2x3[0], uvTransform2x3[3], 0},
|
||||||
{uvTransform2x3[1], uvTransform2x3[4], 0},
|
{uvTransform2x3[1], uvTransform2x3[4], 0},
|
||||||
{uvTransform2x3[2], uvTransform2x3[5], 1}};
|
{uvTransform2x3[2], uvTransform2x3[5], 1}};
|
||||||
|
|
||||||
for(int sampleIndex=0; sampleIndex<sampleCount; sampleIndex++)
|
//NOTE(martin): reorder triangle counter-clockwise and compute bias for each edge
|
||||||
|
int cw = is_clockwise(p0, p1, p2);
|
||||||
|
if(cw < 0)
|
||||||
|
{
|
||||||
|
uint tmpIndex = i1;
|
||||||
|
i1 = i2;
|
||||||
|
i2 = tmpIndex;
|
||||||
|
|
||||||
|
int2 tmpPoint = p1;
|
||||||
|
p1 = p2;
|
||||||
|
p2 = tmpPoint;
|
||||||
|
}
|
||||||
|
|
||||||
|
float4 cubic0 = vertexBuffer[i0].cubic;
|
||||||
|
float4 cubic1 = vertexBuffer[i1].cubic;
|
||||||
|
float4 cubic2 = vertexBuffer[i2].cubic;
|
||||||
|
|
||||||
|
int bias0 = is_top_left(p1, p2) ? 0 : -1;
|
||||||
|
int bias1 = is_top_left(p2, p0) ? 0 : -1;
|
||||||
|
int bias2 = is_top_left(p0, p1) ? 0 : -1;
|
||||||
|
|
||||||
|
for(int sampleIndex = 0; sampleIndex < sampleCount; sampleIndex++)
|
||||||
{
|
{
|
||||||
int2 samplePoint = samplePoints[sampleIndex];
|
int2 samplePoint = samplePoints[sampleIndex];
|
||||||
|
|
||||||
//NOTE(martin): cull if pixel is outside box
|
if( samplePoint.x < clip.x
|
||||||
/*
|
|| samplePoint.x > clip.z
|
||||||
// if we use this, make sure box is in fixed points coords
|
|| samplePoint.y < clip.y
|
||||||
if(samplePoint.x < box.x || samplePoint.x > box.z || samplePoint.y < box.y || samplePoint.y > box.w)
|
|| samplePoint.y > clip.w)
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
|
||||||
int w0 = orient2d(p1, p2, samplePoint);
|
int w0 = orient2d(p1, p2, samplePoint);
|
||||||
int w1 = orient2d(p2, p0, samplePoint);
|
int w1 = orient2d(p2, p0, samplePoint);
|
||||||
|
@ -291,57 +351,52 @@ kernel void RenderKernel(texture2d<float, access::write> outTexture [[texture(0)
|
||||||
{
|
{
|
||||||
float4 cubic = (cubic0*w0 + cubic1*w1 + cubic2*w2)/(w0+w1+w2);
|
float4 cubic = (cubic0*w0 + cubic1*w1 + cubic2*w2)/(w0+w1+w2);
|
||||||
|
|
||||||
//TODO(martin): this is a quick and dirty fix for solid polygons where we use
|
|
||||||
// cubic = (1, 1, 1, 1) on all vertices, which can cause small errors to
|
|
||||||
// flip the sign.
|
|
||||||
// We should really use another value that always lead to <= 0, but we must
|
|
||||||
// make sure we never share these vertices with bezier shapes.
|
|
||||||
// Alternatively, an ugly (but maybe less than this one) solution would be
|
|
||||||
// to check if uvs are equal on all vertices of the triangle and always render
|
|
||||||
// those triangles.
|
|
||||||
float eps = 0.0001;
|
float eps = 0.0001;
|
||||||
if(cubic.w*(cubic.x*cubic.x*cubic.x - cubic.y*cubic.z) <= eps)
|
if(cubic.w*(cubic.x*cubic.x*cubic.x - cubic.y*cubic.z) <= eps)
|
||||||
{
|
{
|
||||||
if(shapeIndex == zIndices[sampleIndex])
|
if(shapeIndex == currentShapeIndex[sampleIndex])
|
||||||
{
|
{
|
||||||
flipCounts[sampleIndex]++;
|
flipCount[sampleIndex]++;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if(flipCounts[sampleIndex] & 0x01)
|
if(flipCount[sampleIndex] & 0x01)
|
||||||
{
|
{
|
||||||
pixelColors[sampleIndex] = nextColors[sampleIndex];
|
sampleColor[sampleIndex] = currentColor[sampleIndex];
|
||||||
}
|
}
|
||||||
|
|
||||||
float4 nextColor = color;
|
float4 nextColor = color;
|
||||||
if(*useTexture)
|
|
||||||
|
if(useTexture[0])
|
||||||
{
|
{
|
||||||
float2 sampleFP = float2(samplePoint)/subPixelFactor;
|
float3 sampleFP = float3(float2(samplePoint).xy/(subPixelFactor*2.), 1);
|
||||||
float2 uv = (uvTransform*(float3(sampleFP/contentsScaling[0], 1))).xy;
|
float2 uv = (uvTransform * sampleFP).xy;
|
||||||
|
|
||||||
constexpr sampler smp(mip_filter::nearest, mag_filter::linear, min_filter::linear);
|
constexpr sampler smp(mip_filter::nearest, mag_filter::linear, min_filter::linear);
|
||||||
float4 texColor = texAtlas.sample(smp, uv);
|
float4 texColor = texAtlas.sample(smp, uv);
|
||||||
|
|
||||||
texColor.rgb *= texColor.a;
|
texColor.rgb *= texColor.a;
|
||||||
nextColor *= texColor;
|
nextColor *= texColor;
|
||||||
}
|
}
|
||||||
|
|
||||||
nextColors[sampleIndex] = pixelColors[sampleIndex]*(1-nextColor.a) + nextColor;
|
currentColor[sampleIndex] = sampleColor[sampleIndex]*(1.-nextColor.a) + nextColor;
|
||||||
zIndices[sampleIndex] = shapeIndex;
|
currentShapeIndex[sampleIndex] = shapeIndex;
|
||||||
flipCounts[sampleIndex] = 1;
|
flipCount[sampleIndex] = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
float4 pixelColor = float4(0);
|
||||||
|
for(int sampleIndex = 0; sampleIndex < sampleCount; sampleIndex++)
|
||||||
|
{
|
||||||
|
if(flipCount[sampleIndex] & 0x01)
|
||||||
|
{
|
||||||
|
sampleColor[sampleIndex] = currentColor[sampleIndex];
|
||||||
|
}
|
||||||
|
pixelColor += sampleColor[sampleIndex];
|
||||||
}
|
}
|
||||||
float4 out = float4(0, 0, 0, 0);
|
|
||||||
for(int i=0; i<sampleCount; i++)
|
outTexture.write(pixelColor/float(sampleCount), gid);
|
||||||
{
|
|
||||||
if(flipCounts[i] & 0x01)
|
|
||||||
{
|
|
||||||
pixelColors[i] = nextColors[i];
|
|
||||||
}
|
|
||||||
out += pixelColors[i];
|
|
||||||
}
|
|
||||||
out = out/sampleCount;
|
|
||||||
outTexture.write(out, gid);
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue