diff --git a/examples/tiger/main.c b/examples/tiger/main.c index 44d1d62..55e8164 100644 --- a/examples/tiger/main.c +++ b/examples/tiger/main.c @@ -65,7 +65,7 @@ int main() //NOTE: create surface mg_surface surface = mg_surface_create_for_window(window, MG_BACKEND_DEFAULT); - mg_surface_swap_interval(surface, 1); + mg_surface_swap_interval(surface, 0); //TODO: create canvas mg_canvas canvas = mg_canvas_create(surface); diff --git a/src/mtl_canvas.m b/src/mtl_canvas.m index 6da7310..0516f2f 100644 --- a/src/mtl_canvas.m +++ b/src/mtl_canvas.m @@ -244,6 +244,7 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image [shapeEncoder setBuffer: backend->tilesBuffer offset:0 atIndex: 2]; [shapeEncoder setBuffer: backend->tilesOffset offset:0 atIndex: 3]; [shapeEncoder setBytes: &scale length: sizeof(float) atIndex: 4]; + [shapeEncoder setBytes: &viewportSize length: sizeof(vector_uint2) atIndex: 5]; MTLSize shapeGroupSize = MTLSizeMake(backend->shapePipeline.maxTotalThreadsPerThreadgroup, 1, 1); MTLSize shapeGridSize = MTLSizeMake(shapeCount, 1, 1); diff --git a/src/mtl_shader.h b/src/mtl_shader.h index 604488a..7317800 100644 --- a/src/mtl_shader.h +++ b/src/mtl_shader.h @@ -65,12 +65,13 @@ typedef struct mg_triangle_data using namespace metal; #endif -#define MG_TILE_CMD_MASK (1<<31) +#define MG_TILE_CMD_MASK (3<<30) typedef enum mg_tile_cmd_kind { mg_cmd_triangle = 0, - mg_cmd_color = 1<<31, + mg_cmd_color = 1<<30, + mg_cmd_flip = 2<<30 } mg_tile_cmd_kind; typedef int mg_tile_cmd; diff --git a/src/mtl_shader.metal b/src/mtl_shader.metal index 1ee6241..7df34d3 100644 --- a/src/mtl_shader.metal +++ b/src/mtl_shader.metal @@ -61,9 +61,10 @@ kernel void ShapeSetup(constant mg_shape* shapeBuffer [[buffer(0)]], device mg_tile* tilesBuffer [[buffer(2)]], device volatile atomic_uint* tilesOffset [[buffer(3)]], constant float* scaling [[buffer(4)]], + constant int2* viewport [[buffer(5)]], uint gid [[thread_position_in_grid]]) { - + int2 tilesMatrixDim = (*viewport - 1) / RENDERER_TILE_SIZE + 1; float4 box = shapeBuffer[gid].clip * scaling[0]; int2 firstTile = int2(box.xy)/RENDERER_TILE_SIZE; @@ -72,8 +73,11 @@ kernel void ShapeSetup(constant mg_shape* shapeBuffer [[buffer(0)]], // any tile queue, the tileQueues pointer for that shape would alias the tileQueues pointer of another // shape, and we would have to detect that in the tiling and drawing kernels. Instead, just accept some // waste and keep the other kernels more uniforms for now... - int nTilesX = int(box.z)/RENDERER_TILE_SIZE - firstTile.x + 1; - int nTilesY = int(box.w)/RENDERER_TILE_SIZE - firstTile.y + 1; + //TODO limit to screen + int2 lastTile = max(firstTile, min(int2(box.zw)/RENDERER_TILE_SIZE, tilesMatrixDim)); + + int nTilesX = lastTile.x - firstTile.x + 1; + int nTilesY = lastTile.y - firstTile.y + 1; int tileCount = nTilesX * nTilesY; @@ -172,8 +176,8 @@ kernel void TriangleKernel(constant mg_vertex* vertexBuffer [[buffer(0)]], int xMin = max(0, coarseBox.x - shapeQueue->area.x); int yMin = max(0, coarseBox.y - shapeQueue->area.y); - int xMax = min(coarseBox.z - shapeQueue->area.x, shapeQueue->area.z); - int yMax = min(coarseBox.w - shapeQueue->area.y, shapeQueue->area.w); + int xMax = min(coarseBox.z - shapeQueue->area.x, shapeQueue->area.z-1); + int yMax = min(coarseBox.w - shapeQueue->area.y, shapeQueue->area.w-1); //NOTE(martin): it's important to do the computation with signed int, so that we can have negative xMax/yMax // otherwise all triangles on the left or below the x/y axis are attributed to tiles on row/column 0. @@ -233,6 +237,7 @@ kernel void TriangleKernel(constant mg_vertex* vertexBuffer [[buffer(0)]], device mg_tile_elt* elt = &eltBuffer[eltIndex]; elt->triangleIndex = gid; + elt->next = atomic_exchange_explicit(&tile->firstElt, eltIndex, memory_order_relaxed); atomic_fetch_add_explicit(&tile->eltCount, 1, memory_order_relaxed); @@ -254,6 +259,7 @@ kernel void TriangleKernel(constant mg_vertex* vertexBuffer [[buffer(0)]], if(allLeftFromEdge0 && allLeftFromEdge1 && allLeftFromEdge2 && triangleFull) { + elt->triangleIndex |= mg_cmd_flip; atomic_fetch_add_explicit(&tile->flipCount, 1, memory_order_relaxed); } else @@ -310,7 +316,7 @@ kernel void GatherKernel(const device mg_shape_queue* shapeQueueBuffer [[buffer( const device mg_tile_elt* elt = &eltBuffer[firstEltIndex]; count = 0; - tileArray[count] = mg_cmd_color | elt->triangleIndex; + tileArray[count] = mg_cmd_color | (elt->triangleIndex & ~MG_TILE_CMD_MASK); count++; continue; } @@ -458,63 +464,75 @@ kernel void RenderKernel(const device uint* tileCounters [[buffer(0)]], currentShapeIndex = triangle->shapeIndex; } - if(cmdKind == mg_cmd_color) + switch(cmdKind) { - for(int sampleIndex=0; sampleIndexcolor; - nextColor.rgb *= nextColor.a; - sampleColor[sampleIndex] = nextColor; - flipCount[sampleIndex] = 0; - } - } - else - { - int2 p0 = triangle->p0; - int2 p1 = triangle->p1; - int2 p2 = triangle->p2; - - int cw = triangle->cw; - - int bias0 = triangle->bias0; - int bias1 = triangle->bias1; - int bias2 = triangle->bias2; - - float4 cubic0 = triangle->cubic0; - float4 cubic1 = triangle->cubic1; - float4 cubic2 = triangle->cubic2; - - bool fullTriangle = triangle->full; - - int4 clip = triangle->box; - - for(int sampleIndex = 0; sampleIndex < sampleCount; sampleIndex++) - { - int2 samplePoint = samplePoints[sampleIndex]; - - if( samplePoint.x < clip.x - || samplePoint.x > clip.z - || samplePoint.y < clip.y - || samplePoint.y > clip.w) + for(int sampleIndex=0; sampleIndexcolor; + nextColor.rgb *= nextColor.a; + sampleColor[sampleIndex] = nextColor; + flipCount[sampleIndex] = 0; } + } break; - int w0 = cw*orient2d(p1, p2, samplePoint); - int w1 = cw*orient2d(p2, p0, samplePoint); - int w2 = cw*orient2d(p0, p1, samplePoint); - - if((w0+bias0) >= 0 && (w1+bias1) >= 0 && (w2+bias2) >= 0) + case mg_cmd_flip: + { + for(int sampleIndex=0; sampleIndexp0; + int2 p1 = triangle->p1; + int2 p2 = triangle->p2; + + int cw = triangle->cw; + + int bias0 = triangle->bias0; + int bias1 = triangle->bias1; + int bias2 = triangle->bias2; + + float4 cubic0 = triangle->cubic0; + float4 cubic1 = triangle->cubic1; + float4 cubic2 = triangle->cubic2; + + bool fullTriangle = triangle->full; + + int4 clip = triangle->box; + + for(int sampleIndex = 0; sampleIndex < sampleCount; sampleIndex++) + { + int2 samplePoint = samplePoints[sampleIndex]; + + if( samplePoint.x < clip.x + || samplePoint.x > clip.z + || samplePoint.y < clip.y + || samplePoint.y > clip.w) { - flipCount[sampleIndex]++; + continue; + } + + int w0 = cw*orient2d(p1, p2, samplePoint); + int w1 = cw*orient2d(p2, p0, samplePoint); + int w2 = cw*orient2d(p0, p1, samplePoint); + + if((w0+bias0) >= 0 && (w1+bias1) >= 0 && (w2+bias2) >= 0) + { + float4 cubic = (cubic0*w0 + cubic1*w1 + cubic2*w2)/(w0+w1+w2); + + if( fullTriangle + ||(cubic.w*(cubic.x*cubic.x*cubic.x - cubic.y*cubic.z) <= 0)) + { + flipCount[sampleIndex]++; + } } } - } + } break; } }