[mtl canvas] remove box array, cache box in triangle struct

This commit is contained in:
Martin Fouilleul 2023-03-13 20:12:06 +01:00
parent 6536f4ac64
commit 1b3583cc6d
3 changed files with 33 additions and 93 deletions

View File

@ -199,10 +199,9 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
[tileEncoder setBuffer: backend->tileCounters offset:0 atIndex: 3]; [tileEncoder setBuffer: backend->tileCounters offset:0 atIndex: 3];
[tileEncoder setBuffer: backend->tileArrayBuffer offset:0 atIndex: 4]; [tileEncoder setBuffer: backend->tileArrayBuffer offset:0 atIndex: 4];
[tileEncoder setBuffer: backend->triangleArray offset:0 atIndex: 5]; [tileEncoder setBuffer: backend->triangleArray offset:0 atIndex: 5];
[tileEncoder setBuffer: backend->boxArray offset:0 atIndex: 6];
[tileEncoder setBytes: &viewportSize length: sizeof(vector_uint2) atIndex: 7]; [tileEncoder setBytes: &viewportSize length: sizeof(vector_uint2) atIndex: 6];
[tileEncoder setBytes: &scale length: sizeof(float) atIndex: 8]; [tileEncoder setBytes: &scale length: sizeof(float) atIndex: 7];
MTLSize tileGroupSize = MTLSizeMake(backend->tilingPipeline.maxTotalThreadsPerThreadgroup, 1, 1); MTLSize tileGroupSize = MTLSizeMake(backend->tilingPipeline.maxTotalThreadsPerThreadgroup, 1, 1);
MTLSize tileGridSize = MTLSizeMake(indexCount/3, 1, 1); MTLSize tileGridSize = MTLSizeMake(indexCount/3, 1, 1);
@ -243,7 +242,6 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
[drawEncoder setBuffer: backend->tileCounters offset:0 atIndex: 1]; [drawEncoder setBuffer: backend->tileCounters offset:0 atIndex: 1];
[drawEncoder setBuffer: backend->tileArrayBuffer offset:0 atIndex: 2]; [drawEncoder setBuffer: backend->tileArrayBuffer offset:0 atIndex: 2];
[drawEncoder setBuffer: backend->triangleArray offset:0 atIndex: 3]; [drawEncoder setBuffer: backend->triangleArray offset:0 atIndex: 3];
[drawEncoder setBuffer: backend->boxArray offset:0 atIndex: 4];
[drawEncoder setTexture: backend->outTexture atIndex: 0]; [drawEncoder setTexture: backend->outTexture atIndex: 0];
int useTexture = 0; int useTexture = 0;
@ -254,9 +252,9 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
useTexture = 1; useTexture = 1;
} }
[drawEncoder setBytes: &clearColorVec4 length: sizeof(vector_float4) atIndex: 5]; [drawEncoder setBytes: &clearColorVec4 length: sizeof(vector_float4) atIndex: 4];
[drawEncoder setBytes: &useTexture length:sizeof(int) atIndex: 6]; [drawEncoder setBytes: &useTexture length:sizeof(int) atIndex: 5];
[drawEncoder setBytes: &scale length: sizeof(float) atIndex: 7]; [drawEncoder setBytes: &scale length: sizeof(float) atIndex: 6];
//TODO: check that we don't exceed maxTotalThreadsPerThreadgroup //TODO: check that we don't exceed maxTotalThreadsPerThreadgroup
DEBUG_ASSERT(RENDERER_TILE_SIZE*RENDERER_TILE_SIZE <= backend->computePipeline.maxTotalThreadsPerThreadgroup); DEBUG_ASSERT(RENDERER_TILE_SIZE*RENDERER_TILE_SIZE <= backend->computePipeline.maxTotalThreadsPerThreadgroup);

View File

@ -35,20 +35,22 @@ typedef struct mg_shape
typedef struct mg_triangle_data typedef struct mg_triangle_data
{ {
vector_int2 p0;
vector_int2 p1;
vector_int2 p2;
vector_float4 cubic0; vector_float4 cubic0;
vector_float4 cubic1; vector_float4 cubic1;
vector_float4 cubic2; vector_float4 cubic2;
uint shapeIndex; vector_int4 box;
vector_float2 p0;
vector_float2 p1;
vector_float2 p2;
int bias0; int bias0;
int bias1; int bias1;
int bias2; int bias2;
int shapeIndex;
} mg_triangle_data; } mg_triangle_data;
#endif //__MTL_RENDERER_H_ #endif //__MTL_RENDERER_H_

View File

@ -31,70 +31,6 @@ bool is_top_left(float2 a, float2 b)
return( (a.y == b.y && b.x < a.x) return( (a.y == b.y && b.x < a.x)
||(b.y < a.y)); ||(b.y < a.y));
} }
/*
kernel void BoundingBoxKernel(constant mg_vertex* vertexBuffer [[buffer(0)]],
constant uint* indexBuffer [[buffer(1)]],
constant mg_shape* shapeBuffer [[buffer(2)]],
device mg_triangle_data* triangleArray [[buffer(3)]],
device float4* boxArray [[buffer(4)]],
constant float* contentsScaling [[buffer(5)]],
uint gid [[thread_position_in_grid]])
{
uint triangleIndex = gid;
uint vertexIndex = triangleIndex*3;
uint i0 = indexBuffer[vertexIndex];
uint i1 = indexBuffer[vertexIndex+1];
uint i2 = indexBuffer[vertexIndex+2];
float2 p0 = vertexBuffer[i0].pos * contentsScaling[0];
float2 p1 = vertexBuffer[i1].pos * contentsScaling[0];
float2 p2 = vertexBuffer[i2].pos * contentsScaling[0];
//NOTE(martin): compute triangle bounding box
float2 boxMin = min(min(p0, p1), p2);
float2 boxMax = max(max(p0, p1), p2);
//NOTE(martin): clip bounding box against clip rect
int shapeIndex = vertexBuffer[i0].shapeIndex;
vector_float4 clip = contentsScaling[0]*shapeBuffer[shapeIndex].clip;
//NOTE(martin): intersect with current clip
boxMin = max(boxMin, clip.xy);
boxMax = min(boxMax, clip.zw);
//NOTE(martin): reorder triangle counter-clockwise and compute bias for each edge
float cw = (p1 - p0).x*(p2 - p0).y - (p1 - p0).y*(p2 - p0).x;
if(cw < 0)
{
uint tmpIndex = i1;
i1 = i2;
i2 = tmpIndex;
float2 tmpPoint = p1;
p1 = p2;
p2 = tmpPoint;
}
int bias0 = is_top_left(p1, p2) ? 0 : -1;
int bias1 = is_top_left(p2, p0) ? 0 : -1;
int bias2 = is_top_left(p0, p1) ? 0 : -1;
//NOTE(martin): fill triangle data
boxArray[triangleIndex] = float4(boxMin.x, boxMin.y, boxMax.x, boxMax.y);
triangleArray[triangleIndex].shapeIndex = shapeIndex;
triangleArray[triangleIndex].i0 = i0;
triangleArray[triangleIndex].i1 = i1;
triangleArray[triangleIndex].i2 = i2;
triangleArray[triangleIndex].p0 = p0;
triangleArray[triangleIndex].p1 = p1;
triangleArray[triangleIndex].p2 = p2;
triangleArray[triangleIndex].bias0 = bias0;
triangleArray[triangleIndex].bias1 = bias1;
triangleArray[triangleIndex].bias2 = bias2;
}
*/
kernel void TileKernel(constant mg_vertex* vertexBuffer [[buffer(0)]], kernel void TileKernel(constant mg_vertex* vertexBuffer [[buffer(0)]],
constant uint* indexBuffer [[buffer(1)]], constant uint* indexBuffer [[buffer(1)]],
@ -102,9 +38,8 @@ kernel void TileKernel(constant mg_vertex* vertexBuffer [[buffer(0)]],
device volatile atomic_uint* tileCounters [[buffer(3)]], device volatile atomic_uint* tileCounters [[buffer(3)]],
device uint* tileArrayBuffer [[buffer(4)]], device uint* tileArrayBuffer [[buffer(4)]],
device mg_triangle_data* triangleArray [[buffer(5)]], device mg_triangle_data* triangleArray [[buffer(5)]],
device float4* boxArray [[buffer(6)]], constant uint2* viewport [[buffer(6)]],
constant uint2* viewport [[buffer(7)]], constant float* scaling [[buffer(7)]],
constant float* scaling [[buffer(8)]],
uint gid [[thread_position_in_grid]]) uint gid [[thread_position_in_grid]])
{ {
uint2 tilesMatrixDim = (*viewport - 1) / RENDERER_TILE_SIZE + 1; uint2 tilesMatrixDim = (*viewport - 1) / RENDERER_TILE_SIZE + 1;
@ -151,20 +86,26 @@ kernel void TileKernel(constant mg_vertex* vertexBuffer [[buffer(0)]],
int bias2 = is_top_left(p0, p1) ? 0 : -1; int bias2 = is_top_left(p0, p1) ? 0 : -1;
//NOTE(martin): fill triangle data //NOTE(martin): fill triangle data
boxArray[gid] = float4(boxMin.x, boxMin.y, boxMax.x, boxMax.y); const float subPixelFactor = 16;
float4 fbox = float4(boxMin.x, boxMin.y, boxMax.x, boxMax.y);
triangleArray[gid].box = int4(fbox * subPixelFactor);
triangleArray[gid].shapeIndex = shapeIndex; triangleArray[gid].shapeIndex = shapeIndex;
triangleArray[gid].cubic0 = vertexBuffer[i0].cubic; triangleArray[gid].cubic0 = vertexBuffer[i0].cubic;
triangleArray[gid].cubic1 = vertexBuffer[i1].cubic; triangleArray[gid].cubic1 = vertexBuffer[i1].cubic;
triangleArray[gid].cubic2 = vertexBuffer[i2].cubic; triangleArray[gid].cubic2 = vertexBuffer[i2].cubic;
triangleArray[gid].p0 = p0;
triangleArray[gid].p1 = p1; triangleArray[gid].p0 = int2(p0 * subPixelFactor);
triangleArray[gid].p2 = p2; triangleArray[gid].p1 = int2(p1 * subPixelFactor);
triangleArray[gid].p2 = int2(p2 * subPixelFactor);
triangleArray[gid].bias0 = bias0; triangleArray[gid].bias0 = bias0;
triangleArray[gid].bias1 = bias1; triangleArray[gid].bias1 = bias1;
triangleArray[gid].bias2 = bias2; triangleArray[gid].bias2 = bias2;
int4 box = int4(floor(boxArray[gid]))/RENDERER_TILE_SIZE; int4 box = int4(floor(fbox))/RENDERER_TILE_SIZE;
//NOTE(martin): it's importat to do the computation with signed int, so that we can have negative xMax/yMax //NOTE(martin): it's importat to do the computation with signed int, so that we can have negative xMax/yMax
// otherwise all triangles on the left or below the x/y axis are attributed to tiles on row/column 0. // otherwise all triangles on the left or below the x/y axis are attributed to tiles on row/column 0.
@ -187,7 +128,7 @@ kernel void TileKernel(constant mg_vertex* vertexBuffer [[buffer(0)]],
} }
} }
kernel void SortKernel(const device mg_triangle_data* triangleArray [[buffer(0)]], kernel void SortKernel(constant mg_triangle_data* triangleArray [[buffer(0)]],
const device uint* tileCounters [[buffer(1)]], const device uint* tileCounters [[buffer(1)]],
device uint* tileArrayBuffer [[buffer(2)]], device uint* tileArrayBuffer [[buffer(2)]],
uint gid [[thread_position_in_grid]]) uint gid [[thread_position_in_grid]])
@ -245,11 +186,10 @@ kernel void RenderKernel(const device mg_shape* shapeBuffer [[buffer(0)]],
device uint* tileCounters [[buffer(1)]], device uint* tileCounters [[buffer(1)]],
const device uint* tileArrayBuffer [[buffer(2)]], const device uint* tileArrayBuffer [[buffer(2)]],
const device mg_triangle_data* triangleArray [[buffer(3)]], const device mg_triangle_data* triangleArray [[buffer(3)]],
const device float4* boxArray [[buffer(4)]],
constant float4* clearColor [[buffer(5)]], constant float4* clearColor [[buffer(4)]],
constant int* useTexture [[buffer(6)]], constant int* useTexture [[buffer(5)]],
constant float* scaling [[buffer(7)]], constant float* scaling [[buffer(6)]],
texture2d<float, access::write> outTexture [[texture(0)]], texture2d<float, access::write> outTexture [[texture(0)]],
texture2d<float> texAtlas [[texture(1)]], texture2d<float> texAtlas [[texture(1)]],
@ -324,9 +264,9 @@ kernel void RenderKernel(const device mg_shape* shapeBuffer [[buffer(0)]],
int triangleIndex = tileArrayBuffer[RENDERER_TILE_BUFFER_SIZE * tileIndex + tileArrayIndex]; int triangleIndex = tileArrayBuffer[RENDERER_TILE_BUFFER_SIZE * tileIndex + tileArrayIndex];
const device mg_triangle_data* triangle = &triangleArray[triangleIndex]; const device mg_triangle_data* triangle = &triangleArray[triangleIndex];
int2 p0 = int2(triangle->p0 * subPixelFactor); int2 p0 = triangle->p0;
int2 p1 = int2(triangle->p1 * subPixelFactor); int2 p1 = triangle->p1;
int2 p2 = int2(triangle->p2 * subPixelFactor); int2 p2 = triangle->p2;
int bias0 = triangle->bias0; int bias0 = triangle->bias0;
int bias1 = triangle->bias1; int bias1 = triangle->bias1;
@ -340,7 +280,7 @@ kernel void RenderKernel(const device mg_shape* shapeBuffer [[buffer(0)]],
float4 color = shapeBuffer[shapeIndex].color; float4 color = shapeBuffer[shapeIndex].color;
color.rgb *= color.a; color.rgb *= color.a;
int4 clip = int4(boxArray[triangleIndex] * subPixelFactor); int4 clip = triangle->box;
const device float* uvTransform2x3 = shapeBuffer[shapeIndex].uvTransform; const device float* uvTransform2x3 = shapeBuffer[shapeIndex].uvTransform;
matrix_float3x3 uvTransform = {{uvTransform2x3[0], uvTransform2x3[3], 0}, matrix_float3x3 uvTransform = {{uvTransform2x3[0], uvTransform2x3[3], 0},