[mtl canvas] trying fused boxing/tiling passes
This commit is contained in:
parent
fd5a4d4cd8
commit
ee4a667258
|
@ -198,9 +198,11 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
|
|||
[tileEncoder setBuffer: backend->shapeBuffer offset:backend->shapeBufferOffset atIndex: 2];
|
||||
[tileEncoder setBuffer: backend->tileCounters offset:0 atIndex: 3];
|
||||
[tileEncoder setBuffer: backend->tileArrayBuffer offset:0 atIndex: 4];
|
||||
[tileEncoder setBuffer: backend->triangleArray offset:0 atIndex: 5];
|
||||
[tileEncoder setBuffer: backend->boxArray offset:0 atIndex: 6];
|
||||
|
||||
[tileEncoder setBytes: &viewportSize length: sizeof(vector_uint2) atIndex: 5];
|
||||
[tileEncoder setBytes: &scale length: sizeof(float) atIndex: 6];
|
||||
[tileEncoder setBytes: &viewportSize length: sizeof(vector_uint2) atIndex: 7];
|
||||
[tileEncoder setBytes: &scale length: sizeof(float) atIndex: 8];
|
||||
|
||||
MTLSize tileGroupSize = MTLSizeMake(backend->tilingPipeline.maxTotalThreadsPerThreadgroup, 1, 1);
|
||||
MTLSize tileGridSize = MTLSizeMake(indexCount/3, 1, 1);
|
||||
|
@ -215,11 +217,9 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
|
|||
id<MTLComputeCommandEncoder> sortEncoder = [surface->commandBuffer computeCommandEncoder];
|
||||
sortEncoder.label = @"sorting pass";
|
||||
[sortEncoder setComputePipelineState: backend->sortingPipeline];
|
||||
[sortEncoder setBuffer: backend->vertexBuffer offset:backend->vertexBufferOffset atIndex: 0];
|
||||
[sortEncoder setBuffer: backend->indexBuffer offset:backend->indexBufferOffset atIndex: 1];
|
||||
[sortEncoder setBuffer: backend->shapeBuffer offset:backend->shapeBufferOffset atIndex: 2];
|
||||
[sortEncoder setBuffer: backend->tileCounters offset:0 atIndex: 3];
|
||||
[sortEncoder setBuffer: backend->tileArrayBuffer offset:0 atIndex: 4];
|
||||
[sortEncoder setBuffer: backend->triangleArray offset:0 atIndex: 0];
|
||||
[sortEncoder setBuffer: backend->tileCounters offset:0 atIndex: 1];
|
||||
[sortEncoder setBuffer: backend->tileArrayBuffer offset:0 atIndex: 2];
|
||||
|
||||
u32 nTilesX = (viewportSize.x + RENDERER_TILE_SIZE - 1)/RENDERER_TILE_SIZE;
|
||||
u32 nTilesY = (viewportSize.y + RENDERER_TILE_SIZE - 1)/RENDERER_TILE_SIZE;
|
||||
|
@ -239,11 +239,11 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
|
|||
id<MTLComputeCommandEncoder> drawEncoder = [surface->commandBuffer computeCommandEncoder];
|
||||
drawEncoder.label = @"drawing pass";
|
||||
[drawEncoder setComputePipelineState:backend->computePipeline];
|
||||
[drawEncoder setBuffer: backend->vertexBuffer offset:backend->vertexBufferOffset atIndex: 0];
|
||||
[drawEncoder setBuffer: backend->indexBuffer offset:backend->indexBufferOffset atIndex: 1];
|
||||
[drawEncoder setBuffer: backend->shapeBuffer offset:backend->shapeBufferOffset atIndex: 2];
|
||||
[drawEncoder setBuffer: backend->tileCounters offset:0 atIndex: 3];
|
||||
[drawEncoder setBuffer: backend->tileArrayBuffer offset:0 atIndex: 4];
|
||||
[drawEncoder setBuffer: backend->shapeBuffer offset:backend->shapeBufferOffset atIndex: 0];
|
||||
[drawEncoder setBuffer: backend->tileCounters offset:0 atIndex: 1];
|
||||
[drawEncoder setBuffer: backend->tileArrayBuffer offset:0 atIndex: 2];
|
||||
[sortEncoder setBuffer: backend->triangleArray offset:0 atIndex: 3];
|
||||
[tileEncoder setBuffer: backend->boxArray offset:0 atIndex: 4];
|
||||
|
||||
[drawEncoder setTexture: backend->outTexture atIndex: 0];
|
||||
int useTexture = 0;
|
||||
|
@ -255,7 +255,7 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
|
|||
}
|
||||
|
||||
[drawEncoder setBytes: &clearColorVec4 length: sizeof(vector_float4) atIndex: 5];
|
||||
[drawEncoder setBytes: &useTexture length:sizeof(int) atIndex:6];
|
||||
[drawEncoder setBytes: &useTexture length:sizeof(int) atIndex: 6];
|
||||
[drawEncoder setBytes: &scale length: sizeof(float) atIndex: 7];
|
||||
|
||||
//TODO: check that we don't exceed maxTotalThreadsPerThreadgroup
|
||||
|
|
|
@ -35,9 +35,10 @@ typedef struct mg_shape
|
|||
|
||||
typedef struct mg_triangle_data
|
||||
{
|
||||
uint i0;
|
||||
uint i1;
|
||||
uint i2;
|
||||
vector_float4 cubic0;
|
||||
vector_float4 cubic1;
|
||||
vector_float4 cubic2;
|
||||
|
||||
uint shapeIndex;
|
||||
|
||||
vector_float2 p0;
|
||||
|
|
|
@ -101,8 +101,10 @@ kernel void TileKernel(constant mg_vertex* vertexBuffer [[buffer(0)]],
|
|||
constant mg_shape* shapeBuffer [[buffer(2)]],
|
||||
device volatile atomic_uint* tileCounters [[buffer(3)]],
|
||||
device uint* tileArrayBuffer [[buffer(4)]],
|
||||
constant uint2* viewport [[buffer(5)]],
|
||||
constant float* scaling [[buffer(6)]],
|
||||
device mg_triangle_data* triangleArray [[buffer(5)]],
|
||||
device float4* boxArray [[buffer(6)]],
|
||||
constant uint2* viewport [[buffer(7)]],
|
||||
constant float* scaling [[buffer(8)]],
|
||||
uint gid [[thread_position_in_grid]])
|
||||
{
|
||||
uint2 tilesMatrixDim = (*viewport - 1) / RENDERER_TILE_SIZE + 1;
|
||||
|
@ -112,22 +114,57 @@ kernel void TileKernel(constant mg_vertex* vertexBuffer [[buffer(0)]],
|
|||
uint triangleIndex = gid * 3;
|
||||
|
||||
uint i0 = indexBuffer[triangleIndex];
|
||||
uint i1 = indexBuffer[triangleIndex+1u];
|
||||
uint i2 = indexBuffer[triangleIndex+2u];
|
||||
uint i1 = indexBuffer[triangleIndex+1];
|
||||
uint i2 = indexBuffer[triangleIndex+2];
|
||||
|
||||
float2 p0 = vertexBuffer[i0].pos * scaling[0];
|
||||
float2 p1 = vertexBuffer[i1].pos * scaling[0];
|
||||
float2 p2 = vertexBuffer[i2].pos * scaling[0];
|
||||
|
||||
//NOTE(martin): compute triangle bounding box
|
||||
float2 boxMin = min(min(p0, p1), p2);
|
||||
float2 boxMax = max(max(p0, p1), p2);
|
||||
|
||||
//NOTE(martin): clip bounding box against clip rect
|
||||
int shapeIndex = vertexBuffer[i0].shapeIndex;
|
||||
float4 clip = shapeBuffer[shapeIndex].clip * scaling[0];
|
||||
|
||||
float4 fbox = float4(max(min(min(p0.x, p1.x), p2.x), clip.x),
|
||||
max(min(min(p0.y, p1.y), p2.y), clip.y),
|
||||
min(max(max(p0.x, p1.x), p2.x), clip.z),
|
||||
min(max(max(p0.y, p1.y), p2.y), clip.w));
|
||||
vector_float4 clip = scaling[0]*shapeBuffer[shapeIndex].clip;
|
||||
|
||||
int4 box = int4(floor(fbox))/int(RENDERER_TILE_SIZE);
|
||||
//NOTE(martin): intersect with current clip
|
||||
boxMin = max(boxMin, clip.xy);
|
||||
boxMax = min(boxMax, clip.zw);
|
||||
|
||||
//NOTE(martin): reorder triangle counter-clockwise and compute bias for each edge
|
||||
float cw = (p1 - p0).x*(p2 - p0).y - (p1 - p0).y*(p2 - p0).x;
|
||||
if(cw < 0)
|
||||
{
|
||||
uint tmpIndex = i1;
|
||||
i1 = i2;
|
||||
i2 = tmpIndex;
|
||||
|
||||
float2 tmpPoint = p1;
|
||||
p1 = p2;
|
||||
p2 = tmpPoint;
|
||||
}
|
||||
int bias0 = is_top_left(p1, p2) ? 0 : -1;
|
||||
int bias1 = is_top_left(p2, p0) ? 0 : -1;
|
||||
int bias2 = is_top_left(p0, p1) ? 0 : -1;
|
||||
|
||||
//NOTE(martin): fill triangle data
|
||||
boxArray[gid] = float4(boxMin.x, boxMin.y, boxMax.x, boxMax.y);
|
||||
|
||||
triangleArray[gid].shapeIndex = shapeIndex;
|
||||
triangleArray[gid].cubic0 = vertexBuffer[i0].cubic;
|
||||
triangleArray[gid].cubic1 = vertexBuffer[i1].cubic;
|
||||
triangleArray[gid].cubic2 = vertexBuffer[i2].cubic;
|
||||
triangleArray[gid].p0 = p0;
|
||||
triangleArray[gid].p1 = p1;
|
||||
triangleArray[gid].p2 = p2;
|
||||
triangleArray[gid].bias0 = bias0;
|
||||
triangleArray[gid].bias1 = bias1;
|
||||
triangleArray[gid].bias2 = bias2;
|
||||
|
||||
int4 box = int4(floor(boxArray[gid]))/RENDERER_TILE_SIZE;
|
||||
|
||||
//NOTE(martin): it's importat to do the computation with signed int, so that we can have negative xMax/yMax
|
||||
// otherwise all triangles on the left or below the x/y axis are attributed to tiles on row/column 0.
|
||||
|
@ -144,17 +181,15 @@ kernel void TileKernel(constant mg_vertex* vertexBuffer [[buffer(0)]],
|
|||
uint counter = atomic_fetch_add_explicit(&(tileCounters[tileIndex]), 1, memory_order_relaxed);
|
||||
if(counter < RENDERER_TILE_BUFFER_SIZE)
|
||||
{
|
||||
tileArrayBuffer[tileIndex*RENDERER_TILE_BUFFER_SIZE + counter] = triangleIndex;
|
||||
tileArrayBuffer[tileIndex*RENDERER_TILE_BUFFER_SIZE + counter] = gid;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
kernel void SortKernel(constant mg_vertex* vertexBuffer [[buffer(0)]],
|
||||
constant uint* indexBuffer [[buffer(1)]],
|
||||
constant mg_shape* shapeBuffer [[buffer(2)]],
|
||||
const device uint* tileCounters [[buffer(3)]],
|
||||
device uint* tileArrayBuffer [[buffer(4)]],
|
||||
kernel void SortKernel(const device mg_triangle_data* triangleArray [[buffer(0)]],
|
||||
const device uint* tileCounters [[buffer(1)]],
|
||||
device uint* tileArrayBuffer [[buffer(2)]],
|
||||
uint gid [[thread_position_in_grid]])
|
||||
{
|
||||
uint tileIndex = gid;
|
||||
|
@ -163,13 +198,10 @@ kernel void SortKernel(constant mg_vertex* vertexBuffer [[buffer(0)]],
|
|||
|
||||
for(uint tileArrayIndex=1; tileArrayIndex < tileArrayCount; tileArrayIndex++)
|
||||
{
|
||||
for(uint sortIndex = tileArrayIndex; sortIndex > 0u; sortIndex--)
|
||||
for(uint sortIndex = tileArrayIndex; sortIndex > 0; sortIndex--)
|
||||
{
|
||||
uint triangleIndex = indexBuffer[tileArrayBuffer[tileArrayOffset + sortIndex]];
|
||||
uint prevTriangleIndex = indexBuffer[tileArrayBuffer[tileArrayOffset + sortIndex - 1]];
|
||||
|
||||
int shapeIndex = vertexBuffer[triangleIndex].shapeIndex;
|
||||
int prevShapeIndex = vertexBuffer[prevTriangleIndex].shapeIndex;
|
||||
int shapeIndex = triangleArray[tileArrayBuffer[tileArrayOffset + sortIndex]].shapeIndex;
|
||||
int prevShapeIndex = triangleArray[tileArrayBuffer[tileArrayOffset + sortIndex - 1]].shapeIndex;
|
||||
|
||||
if(shapeIndex >= prevShapeIndex)
|
||||
{
|
||||
|
@ -209,11 +241,11 @@ int is_clockwise(int2 p0, int2 p1, int2 p2)
|
|||
}
|
||||
|
||||
|
||||
kernel void RenderKernel(const device mg_vertex* vertexBuffer [[buffer(0)]],
|
||||
const device uint* indexBuffer [[buffer(1)]],
|
||||
const device mg_shape* shapeBuffer [[buffer(2)]],
|
||||
const device uint* tileCounters [[buffer(3)]],
|
||||
const device uint* tileArrayBuffer [[buffer(4)]],
|
||||
kernel void RenderKernel(const device mg_shape* shapeBuffer [[buffer(0)]],
|
||||
device uint* tileCounters [[buffer(1)]],
|
||||
const device uint* tileArrayBuffer [[buffer(2)]],
|
||||
const device mg_triangle_data* triangleArray [[buffer(3)]],
|
||||
const device float4* boxArray [[buffer(4)]],
|
||||
|
||||
constant float4* clearColor [[buffer(5)]],
|
||||
constant int* useTexture [[buffer(6)]],
|
||||
|
@ -290,47 +322,31 @@ kernel void RenderKernel(const device mg_vertex* vertexBuffer [[buffer(0)]],
|
|||
for(uint tileArrayIndex=0; tileArrayIndex < tileCounter; tileArrayIndex++)
|
||||
{
|
||||
int triangleIndex = tileArrayBuffer[RENDERER_TILE_BUFFER_SIZE * tileIndex + tileArrayIndex];
|
||||
const device mg_triangle_data* triangle = &triangleArray[triangleIndex];
|
||||
|
||||
uint i0 = indexBuffer[triangleIndex];
|
||||
uint i1 = indexBuffer[triangleIndex+1];
|
||||
uint i2 = indexBuffer[triangleIndex+2];
|
||||
int2 p0 = int2(triangle->p0 * subPixelFactor);
|
||||
int2 p1 = int2(triangle->p1 * subPixelFactor);
|
||||
int2 p2 = int2(triangle->p2 * subPixelFactor);
|
||||
|
||||
int2 p0 = int2((vertexBuffer[i0].pos * scaling[0]) * subPixelFactor);
|
||||
int2 p1 = int2((vertexBuffer[i1].pos * scaling[0]) * subPixelFactor);
|
||||
int2 p2 = int2((vertexBuffer[i2].pos * scaling[0]) * subPixelFactor);
|
||||
int bias0 = triangle->bias0;
|
||||
int bias1 = triangle->bias1;
|
||||
int bias2 = triangle->bias2;
|
||||
|
||||
int shapeIndex = vertexBuffer[i0].shapeIndex;
|
||||
float4 cubic0 = triangle->cubic0;
|
||||
float4 cubic1 = triangle->cubic1;
|
||||
float4 cubic2 = triangle->cubic2;
|
||||
|
||||
int shapeIndex = triangle->shapeIndex;
|
||||
float4 color = shapeBuffer[shapeIndex].color;
|
||||
color.rgb *= color.a;
|
||||
|
||||
int4 clip = int4(round((shapeBuffer[shapeIndex].clip * scaling[0] + float4(0.5, 0.5, 0.5, 0.5)) * subPixelFactor));
|
||||
int4 clip = int4(boxArray[triangleIndex] * subPixelFactor);
|
||||
|
||||
const device float* uvTransform2x3 = shapeBuffer[shapeIndex].uvTransform;
|
||||
matrix_float3x3 uvTransform = {{uvTransform2x3[0], uvTransform2x3[3], 0},
|
||||
{uvTransform2x3[1], uvTransform2x3[4], 0},
|
||||
{uvTransform2x3[2], uvTransform2x3[5], 1}};
|
||||
|
||||
//NOTE(martin): reorder triangle counter-clockwise and compute bias for each edge
|
||||
int cw = is_clockwise(p0, p1, p2);
|
||||
if(cw < 0)
|
||||
{
|
||||
uint tmpIndex = i1;
|
||||
i1 = i2;
|
||||
i2 = tmpIndex;
|
||||
|
||||
int2 tmpPoint = p1;
|
||||
p1 = p2;
|
||||
p2 = tmpPoint;
|
||||
}
|
||||
|
||||
float4 cubic0 = vertexBuffer[i0].cubic;
|
||||
float4 cubic1 = vertexBuffer[i1].cubic;
|
||||
float4 cubic2 = vertexBuffer[i2].cubic;
|
||||
|
||||
int bias0 = is_top_left(p1, p2) ? 0 : -1;
|
||||
int bias1 = is_top_left(p2, p0) ? 0 : -1;
|
||||
int bias2 = is_top_left(p0, p1) ? 0 : -1;
|
||||
|
||||
for(int sampleIndex = 0; sampleIndex < sampleCount; sampleIndex++)
|
||||
{
|
||||
int2 samplePoint = samplePoints[sampleIndex];
|
||||
|
|
Loading…
Reference in New Issue