diff --git a/build.sh b/build.sh
index c8f12c9..d658aba 100755
--- a/build.sh
+++ b/build.sh
@@ -1,6 +1,6 @@
#!/bin/bash
-DEBUG_FLAGS="-g -DDEBUG -DLOG_COMPILE_DEBUG"
+DEBUG_FLAGS="-g -O2 -DDEBUG -DLOG_COMPILE_DEBUG"
#DEBUG_FLAGS="-O3"
#--------------------------------------------------------------
diff --git a/examples/tiger/Ghostscript_Tiger.svg b/examples/tiger/Ghostscript_Tiger.svg
new file mode 100644
index 0000000..679edec
--- /dev/null
+++ b/examples/tiger/Ghostscript_Tiger.svg
@@ -0,0 +1,725 @@
+
+
diff --git a/examples/tiger/main.c b/examples/tiger/main.c
index 830dee4..2d2df6a 100644
--- a/examples/tiger/main.c
+++ b/examples/tiger/main.c
@@ -158,6 +158,7 @@ int main()
draw_tiger();
mg_matrix_pop();
+/*
// text
mg_set_color_rgba(0, 0, 1, 1);
mg_set_font(font);
@@ -170,7 +171,7 @@ int main()
1./frameTime);
mg_text_outlines(text);
mg_fill();
-
+*/
printf("Milepost vector graphics test program (frame time = %fs, fps = %f)...\n",
frameTime,
1./frameTime);
diff --git a/src/graphics.c b/src/graphics.c
index 66e09f2..f24fbcd 100644
--- a/src/graphics.c
+++ b/src/graphics.c
@@ -216,6 +216,8 @@ typedef struct mg_canvas_data
mg_surface surface;
mg_canvas_backend* backend;
+
+ int splitCount;
} mg_canvas_data;
static mg_data __mgData = {0};
@@ -894,6 +896,9 @@ void mg_render_fill_quadratic(mg_canvas_data* canvas, vec2 p[3])
void mg_split_and_fill_cubic(mg_canvas_data* canvas, vec2 p[4], f32 tSplit)
{
+ //DEBUG
+ __mgCurrentCanvas->splitCount++;
+
int subVertexCount = 0;
int subIndexCount = 0;
@@ -1639,6 +1644,9 @@ vec2 mg_quadratic_get_point(vec2 p[3], f32 t)
void mg_quadratic_split(vec2 p[3], f32 t, vec2 outLeft[3], vec2 outRight[3])
{
+ //DEBUG
+ __mgCurrentCanvas->splitCount++;
+
//NOTE(martin): split bezier curve p at parameter t, using De Casteljau's algorithm
// the q_n are the points along the hull's segments at parameter t
// s is the split point.
@@ -3055,6 +3063,9 @@ void mg_flush_commands(int primitiveCount, mg_primitive* primitives, mg_path_elt
canvas->backend->begin(canvas->backend, canvas->clearColor);
+ //DEBUG
+ canvas->splitCount = 0;
+
for(int i=0; i= primitiveCount)
@@ -3149,6 +3160,9 @@ void mg_flush_commands(int primitiveCount, mg_primitive* primitives, mg_path_elt
}
exit_command_loop: ;
+ printf("path elements: %i, splitCount = %i\n", canvas->path.startIndex + canvas->path.count, canvas->splitCount);
+
+
mg_image_data* imageData = mg_image_data_from_handle(canvas->image);
mg_draw_batch(canvas, imageData);
diff --git a/src/mtl_canvas.m b/src/mtl_canvas.m
index 5911829..154ebd5 100644
--- a/src/mtl_canvas.m
+++ b/src/mtl_canvas.m
@@ -36,6 +36,7 @@ typedef struct mg_mtl_canvas_backend
// permanent metal resources
id shapePipeline;
id trianglePipeline;
+ id gatherPipeline;
id computePipeline;
id renderPipeline;
@@ -54,8 +55,13 @@ typedef struct mg_mtl_canvas_backend
id indexBuffer[MG_MTL_MAX_BUFFER_AVAILABLE];
id shapeQueueBuffer;
id triangleArray;
- id arenaBuffer;
- id arenaOffset;
+ id tilesBuffer;
+ id tilesOffset;
+ id eltBuffer;
+ id eltOffset;
+
+ id tileArrayBuffer;
+ id tileCounters;
} mg_mtl_canvas_backend;
@@ -211,12 +217,16 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
u32 nTilesY = (viewportSize.y + RENDERER_TILE_SIZE - 1)/RENDERER_TILE_SIZE;
int triangleCount = indexCount/3;
+ printf("triangle count: %i, shape count: %i\n", triangleCount, shapeCount);
+
//-----------------------------------------------------------
//NOTE(martin): encode the clear arena offset
//-----------------------------------------------------------
id blitEncoder = [surface->commandBuffer blitCommandEncoder];
blitEncoder.label = @"clear arena";
- [blitEncoder fillBuffer: backend->arenaOffset range: NSMakeRange(0, sizeof(int)) value: 0];
+ [blitEncoder fillBuffer: backend->tilesOffset range: NSMakeRange(0, sizeof(int)) value: 0];
+ [blitEncoder fillBuffer: backend->eltOffset range: NSMakeRange(0, sizeof(int)) value: 0];
+ [blitEncoder fillBuffer: backend->tileCounters range: NSMakeRange(0, RENDERER_MAX_TILES*sizeof(uint)) value: 0];
[blitEncoder endEncoding];
//-----------------------------------------------------------
@@ -227,8 +237,8 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
[shapeEncoder setComputePipelineState: backend->shapePipeline];
[shapeEncoder setBuffer: backend->shapeBuffer[backend->bufferIndex] offset:backend->shapeBufferOffset atIndex: 0];
[shapeEncoder setBuffer: backend->shapeQueueBuffer offset:0 atIndex: 1];
- [shapeEncoder setBuffer: backend->arenaBuffer offset:0 atIndex: 2];
- [shapeEncoder setBuffer: backend->arenaOffset offset:0 atIndex: 3];
+ [shapeEncoder setBuffer: backend->tilesBuffer offset:0 atIndex: 2];
+ [shapeEncoder setBuffer: backend->tilesOffset offset:0 atIndex: 3];
[shapeEncoder setBytes: &scale length: sizeof(float) atIndex: 4];
MTLSize shapeGroupSize = MTLSizeMake(backend->shapePipeline.maxTotalThreadsPerThreadgroup, 1, 1);
@@ -248,10 +258,11 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
[triangleEncoder setBuffer: backend->shapeBuffer[backend->bufferIndex] offset:backend->shapeBufferOffset atIndex: 2];
[triangleEncoder setBuffer: backend->triangleArray offset:0 atIndex: 3];
[triangleEncoder setBuffer: backend->shapeQueueBuffer offset:0 atIndex: 4];
- [triangleEncoder setBuffer: backend->arenaBuffer offset:0 atIndex: 5];
- [triangleEncoder setBuffer: backend->arenaOffset offset:0 atIndex: 6];
+ [triangleEncoder setBuffer: backend->tilesBuffer offset:0 atIndex: 5];
+ [triangleEncoder setBuffer: backend->eltBuffer offset:0 atIndex: 6];
+ [triangleEncoder setBuffer: backend->eltOffset offset:0 atIndex: 7];
- [triangleEncoder setBytes: &scale length: sizeof(float) atIndex: 7];
+ [triangleEncoder setBytes: &scale length: sizeof(float) atIndex: 8];
MTLSize triangleGroupSize = MTLSizeMake(backend->trianglePipeline.maxTotalThreadsPerThreadgroup, 1, 1);
MTLSize triangleGridSize = MTLSizeMake(triangleCount, 1, 1);
@@ -259,15 +270,36 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
[triangleEncoder dispatchThreads: triangleGridSize threadsPerThreadgroup: triangleGroupSize];
[triangleEncoder endEncoding];
+ //-----------------------------------------------------------
+ //NOTE(martin): encode gathering pass
+ //-----------------------------------------------------------
+ id gatherEncoder = [surface->commandBuffer computeCommandEncoder];
+ gatherEncoder.label = @"gather pass";
+ [gatherEncoder setComputePipelineState: backend->gatherPipeline];
+ [gatherEncoder setBuffer: backend->shapeQueueBuffer offset:0 atIndex: 0];
+ [gatherEncoder setBuffer: backend->tilesBuffer offset:0 atIndex: 1];
+ [gatherEncoder setBuffer: backend->eltBuffer offset:0 atIndex: 2];
+ [gatherEncoder setBuffer: backend->tileCounters offset:0 atIndex: 3];
+ [gatherEncoder setBuffer: backend->tileArrayBuffer offset:0 atIndex: 4];
+
+ [gatherEncoder setBytes: &shapeCount length: sizeof(int) atIndex: 5];
+ [gatherEncoder setBytes: &viewportSize length: sizeof(vector_uint2) atIndex: 6];
+
+ MTLSize gatherGroupSize = MTLSizeMake(16, 16, 1);
+ MTLSize gatherGridSize = MTLSizeMake(nTilesX, nTilesY, 1);
+
+ [gatherEncoder dispatchThreads: gatherGridSize threadsPerThreadgroup: gatherGroupSize];
+ [gatherEncoder endEncoding];
+
//-----------------------------------------------------------
//NOTE(martin): encode drawing pass
//-----------------------------------------------------------
id drawEncoder = [surface->commandBuffer computeCommandEncoder];
drawEncoder.label = @"drawing pass";
[drawEncoder setComputePipelineState:backend->computePipeline];
- [drawEncoder setBuffer: backend->shapeQueueBuffer offset:0 atIndex: 0];
- [drawEncoder setBuffer: backend->triangleArray offset:0 atIndex: 1];
- [drawEncoder setBuffer: backend->arenaBuffer offset:0 atIndex: 2];
+ [drawEncoder setBuffer: backend->tileCounters offset:0 atIndex: 0];
+ [drawEncoder setBuffer: backend->tileArrayBuffer offset:0 atIndex: 1];
+ [drawEncoder setBuffer: backend->triangleArray offset:0 atIndex: 2];
[drawEncoder setTexture: backend->outTexture atIndex: 0];
int useTexture = 0;
@@ -278,9 +310,8 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
useTexture = 1;
}
- [drawEncoder setBytes: &shapeCount length:sizeof(int) atIndex: 3];
- [drawEncoder setBytes: &useTexture length:sizeof(int) atIndex: 4];
- [drawEncoder setBytes: &scale length: sizeof(float) atIndex: 5];
+ [drawEncoder setBytes: &useTexture length:sizeof(int) atIndex: 3];
+ [drawEncoder setBytes: &scale length: sizeof(float) atIndex: 4];
//TODO: check that we don't exceed maxTotalThreadsPerThreadgroup
DEBUG_ASSERT(RENDERER_TILE_SIZE*RENDERER_TILE_SIZE <= backend->computePipeline.maxTotalThreadsPerThreadgroup);
@@ -374,8 +405,10 @@ void mg_mtl_canvas_destroy(mg_canvas_backend* interface)
[backend->shapeQueueBuffer release];
[backend->triangleArray release];
- [backend->arenaBuffer release];
- [backend->arenaOffset release];
+ [backend->tilesBuffer release];
+ [backend->tilesOffset release];
+ [backend->eltBuffer release];
+ [backend->eltOffset release];
//////////////////////////////////////////
//TODO release all pipelines
@@ -485,6 +518,8 @@ mg_canvas_backend* mg_mtl_canvas_create(mg_surface surface)
texDesc.height = drawableSize.height;
backend->outTexture = [metalSurface->device newTextureWithDescriptor:texDesc];
+
+ texDesc.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead;
backend->backbuffer = [metalSurface->device newTextureWithDescriptor:texDesc];
//TODO(martin): retain ?
@@ -516,12 +551,24 @@ mg_canvas_backend* mg_mtl_canvas_create(mg_surface surface)
backend->shapeQueueBuffer = [metalSurface->device newBufferWithLength: MG_MTL_CANVAS_DEFAULT_BUFFER_LENGTH*sizeof(mg_shape_queue)
options: MTLResourceStorageModePrivate];
- backend->arenaBuffer = [metalSurface->device newBufferWithLength: MG_MTL_CANVAS_DEFAULT_BUFFER_LENGTH*sizeof(mg_queue_elt)
+ backend->tilesBuffer = [metalSurface->device newBufferWithLength: MG_MTL_CANVAS_DEFAULT_BUFFER_LENGTH*sizeof(mg_tile)
options: MTLResourceStorageModePrivate];
- backend->arenaOffset = [metalSurface->device newBufferWithLength: sizeof(int)
+ backend->tilesOffset = [metalSurface->device newBufferWithLength: sizeof(int)
options: MTLResourceStorageModePrivate];
+ backend->eltBuffer = [metalSurface->device newBufferWithLength: MG_MTL_CANVAS_DEFAULT_BUFFER_LENGTH*sizeof(mg_tile_elt)
+ options: MTLResourceStorageModePrivate];
+
+ backend->eltOffset = [metalSurface->device newBufferWithLength: sizeof(int)
+ options: MTLResourceStorageModePrivate];
+
+ backend->tileArrayBuffer = [metalSurface->device newBufferWithLength: RENDERER_TILE_BUFFER_SIZE*sizeof(int)*RENDERER_MAX_TILES
+ options: MTLResourceStorageModePrivate];
+
+ backend->tileCounters = [metalSurface->device newBufferWithLength: RENDERER_MAX_TILES*sizeof(uint)
+ options: MTLResourceStorageModePrivate];
+
//-----------------------------------------------------------
//NOTE(martin): load the library
//-----------------------------------------------------------
@@ -539,6 +586,7 @@ mg_canvas_backend* mg_mtl_canvas_create(mg_surface surface)
}
id shapeFunction = [library newFunctionWithName:@"ShapeSetup"];
id triangleFunction = [library newFunctionWithName:@"TriangleKernel"];
+ id gatherFunction = [library newFunctionWithName:@"GatherKernel"];
id computeFunction = [library newFunctionWithName:@"RenderKernel"];
id vertexFunction = [library newFunctionWithName:@"VertexShader"];
id fragmentFunction = [library newFunctionWithName:@"FragmentShader"];
@@ -567,6 +615,14 @@ mg_canvas_backend* mg_mtl_canvas_create(mg_surface surface)
reflection: nil
error: &error];
+ MTLComputePipelineDescriptor* gatherPipelineDesc = [[MTLComputePipelineDescriptor alloc] init];
+ gatherPipelineDesc.computeFunction = gatherFunction;
+
+ backend->gatherPipeline = [metalSurface->device newComputePipelineStateWithDescriptor: gatherPipelineDesc
+ options: MTLPipelineOptionNone
+ reflection: nil
+ error: &error];
+
//-----------------------------------------------------------
//NOTE(martin): setup our render pipeline state
//-----------------------------------------------------------
diff --git a/src/mtl_shader.h b/src/mtl_shader.h
index abfa981..f44b0d8 100644
--- a/src/mtl_shader.h
+++ b/src/mtl_shader.h
@@ -43,7 +43,6 @@ typedef struct mg_triangle_data
vector_float4 cubic2;
vector_int4 box;
- vector_int4 tileBox;
vector_int2 p0;
vector_int2 p1;
@@ -69,17 +68,18 @@ typedef struct mg_tile_elt
{
int triangleIndex;
int next;
-} mg_queue_elt;
+} mg_tile_elt;
-typedef struct mg_tile_queue
+typedef struct mg_tile
{
- atomic_int first;
-} mg_tile_queue;
+ atomic_int eltCount;
+ atomic_int firstElt;
+} mg_tile;
typedef struct mg_shape_queue
{
vector_int4 area;
- device mg_tile_queue* tileQueues;
+ int tiles;
} mg_shape_queue;
#ifndef __METAL_VERSION__
diff --git a/src/mtl_shader.metal b/src/mtl_shader.metal
index 81399d3..7f2c724 100644
--- a/src/mtl_shader.metal
+++ b/src/mtl_shader.metal
@@ -58,8 +58,8 @@ device uchar* arena_allocate(device uchar* arenaBuffer,
kernel void ShapeSetup(constant mg_shape* shapeBuffer [[buffer(0)]],
device mg_shape_queue* shapeQueueBuffer [[buffer(1)]],
- device uchar* arenaBuffer [[buffer(2)]],
- device volatile atomic_uint* arenaOffset [[buffer(3)]],
+ device mg_tile* tilesBuffer [[buffer(2)]],
+ device volatile atomic_uint* tilesOffset [[buffer(3)]],
constant float* scaling [[buffer(4)]],
uint gid [[thread_position_in_grid]])
{
@@ -76,14 +76,18 @@ kernel void ShapeSetup(constant mg_shape* shapeBuffer [[buffer(0)]],
int nTilesY = int(box.w)/RENDERER_TILE_SIZE - firstTile.y + 1;
int tileCount = nTilesX * nTilesY;
- int tileArraySize = tileCount * sizeof(mg_tile_queue);
+
+ int tilesIndex = atomic_fetch_add_explicit(tilesOffset, tileCount, memory_order_relaxed);
shapeQueueBuffer[gid].area = int4(firstTile.x, firstTile.y, nTilesX, nTilesY);
- shapeQueueBuffer[gid].tileQueues = (device mg_tile_queue*)arena_allocate(arenaBuffer, arenaOffset, tileArraySize);
+ shapeQueueBuffer[gid].tiles = tilesIndex;
+
+ device mg_tile* tiles = &tilesBuffer[tilesIndex];
for(int i=0; itiles];
int xMin = max(0, tileBox.x - shapeQueue->area.x);
int yMin = max(0, tileBox.y - shapeQueue->area.y);
@@ -170,24 +174,72 @@ kernel void TriangleKernel(constant mg_vertex* vertexBuffer [[buffer(0)]],
{
int tileIndex = y*shapeQueue->area.z + x;
- device mg_tile_queue* tileQueue = &shapeQueue->tileQueues[tileIndex];
- device mg_queue_elt* elt = (device mg_queue_elt*)arena_allocate(arenaBuffer, arenaOffset, sizeof(mg_queue_elt));
- int eltIndex = (device uchar*)elt - arenaBuffer;
-
- elt->next = atomic_exchange_explicit(&tileQueue->first, eltIndex, memory_order_relaxed);
+ int eltIndex = atomic_fetch_add_explicit(eltOffset, 1, memory_order_relaxed);
+ device mg_tile_elt* elt = &eltBuffer[eltIndex];
elt->triangleIndex = gid;
+ elt->next = atomic_exchange_explicit(&tiles[tileIndex].firstElt, eltIndex, memory_order_relaxed);
+ atomic_fetch_add_explicit(&tiles[tileIndex].eltCount, 1, memory_order_relaxed);
}
}
}
-kernel void RenderKernel(const device mg_shape_queue* shapeQueueBuffer [[buffer(0)]],
- const device mg_triangle_data* triangleArray [[buffer(1)]],
- const device uchar* arenaBuffer [[buffer(2)]],
+kernel void GatherKernel(const device mg_shape_queue* shapeQueueBuffer [[buffer(0)]],
+ const device mg_tile* tilesBuffer [[buffer(1)]],
+ const device mg_tile_elt* eltBuffer [[buffer(2)]],
+ device int* tileCounters [[buffer(3)]],
+ device int* tileArrayBuffer [[buffer(4)]],
+ constant int* shapeCount [[buffer(5)]],
+ constant uint2* viewport [[buffer(6)]],
+ uint2 gid [[thread_position_in_grid]])
+{
+ uint2 tilesMatrixDim = (*viewport - 1) / RENDERER_TILE_SIZE + 1;
+ int nTilesX = tilesMatrixDim.x;
- constant int* shapeCount [[buffer(3)]],
- constant int* useTexture [[buffer(4)]],
- constant float* scaling [[buffer(5)]],
+ int2 tileCoord = int2(gid);
+ int tileIndex = tileCoord.y * nTilesX + tileCoord.x;
+
+ device int* tileArray = &tileArrayBuffer[tileIndex * RENDERER_TILE_BUFFER_SIZE];
+
+ int count = 0;
+ for(int shapeIndex = 0; shapeIndex < shapeCount[0]; shapeIndex++)
+ {
+ const device mg_shape_queue* shapeQueue = &shapeQueueBuffer[shapeIndex];
+ const device mg_tile* tiles = &tilesBuffer[shapeQueue->tiles];
+
+ // get the tile queue that corresponds to our tile in the shape area
+ int2 tileQueueCoord = tileCoord - shapeQueue->area.xy;
+
+ if( tileQueueCoord.x >= 0
+ && tileQueueCoord.y >= 0
+ && tileQueueCoord.x < shapeQueue->area.z
+ && tileQueueCoord.y < shapeQueue->area.w)
+ {
+ int localIndex = tileQueueCoord.y * shapeQueue->area.z + tileQueueCoord.x;
+ const device mg_tile* tile = &tiles[localIndex];
+
+ int firstEltIndex = *(device int*)&tile->firstElt;
+ const device mg_tile_elt* elt = 0;
+
+ for(int eltIndex = firstEltIndex; eltIndex >= 0; eltIndex = elt->next)
+ {
+ elt = &eltBuffer[eltIndex];
+ eltIndex = elt->next;
+
+ tileArray[count] = elt->triangleIndex;
+ count++;
+ }
+ }
+ }
+ tileCounters[tileIndex] = count;
+}
+
+kernel void RenderKernel(const device uint* tileCounters [[buffer(0)]],
+ const device uint* tileArrayBuffer [[buffer(1)]],
+ const device mg_triangle_data* triangleArray [[buffer(2)]],
+
+ constant int* useTexture [[buffer(3)]],
+ constant float* scaling [[buffer(4)]],
texture2d outTexture [[texture(0)]],
texture2d texAtlas [[texture(1)]],
@@ -199,7 +251,37 @@ kernel void RenderKernel(const device mg_shape_queue* shapeQueueBuffer [[buffer(
{
//TODO: guard against thread group size not equal to tile size?
const int2 pixelCoord = int2(gid);
- const int2 tileCoord = pixelCoord/ RENDERER_TILE_SIZE;
+ const uint2 tileCoord = uint2(pixelCoord)/ RENDERER_TILE_SIZE;
+ const uint2 tilesMatrixDim = (gridSize - 1) / RENDERER_TILE_SIZE + 1;
+ const uint tileIndex = tileCoord.y * tilesMatrixDim.x + tileCoord.x;
+ const uint tileCounter = min(tileCounters[tileIndex], (uint)RENDERER_TILE_BUFFER_SIZE);
+
+#ifdef RENDERER_DEBUG_TILES
+ //NOTE(martin): color code debug values and show the tile grid
+ {
+ float4 fragColor = float4(0);
+
+ if( pixelCoord.x % 16 == 0
+ ||pixelCoord.y % 16 == 0)
+ {
+ fragColor = float4(0, 0, 0, 1);
+ }
+ else if(tileCounters[tileIndex] == 0xffffu)
+ {
+ fragColor = float4(1, 0, 1, 1);
+ }
+ else if(tileCounter != 0u)
+ {
+ fragColor = float4(0, 1, 0, 1);
+ }
+ else
+ {
+ fragColor = float4(1, 0, 0, 1);
+ }
+ outTexture.write(fragColor, gid);
+ return;
+ }
+#endif
const int subPixelFactor = 16;
const int2 centerPoint = int2((float2(pixelCoord) + float2(0.5, 0.5)) * subPixelFactor);
@@ -227,103 +309,230 @@ kernel void RenderKernel(const device mg_shape_queue* shapeQueueBuffer [[buffer(
currentColor[i] = float4(0, 0, 0, 0);
}
- for(int shapeIndex = 0; shapeIndex < shapeCount[0]; shapeIndex++)
+ for(uint tileArrayIndex=0; tileArrayIndex < tileCounter; tileArrayIndex++)
{
- const device mg_shape_queue* shapeQueue = &shapeQueueBuffer[shapeIndex];
+ int triangleIndex = tileArrayBuffer[RENDERER_TILE_BUFFER_SIZE * tileIndex + tileArrayIndex];
+ const device mg_triangle_data* triangle = &triangleArray[triangleIndex];
- // get the tile queue that corresponds to our tile in the shape area
- int2 tileQueueCoord = tileCoord - shapeQueue->area.xy;
- if( tileQueueCoord.x >= 0
- && tileQueueCoord.y >= 0
- && tileQueueCoord.x < shapeQueue->area.z
- && tileQueueCoord.y < shapeQueue->area.w)
+ int2 p0 = triangle->p0;
+ int2 p1 = triangle->p1;
+ int2 p2 = triangle->p2;
+
+ int cw = triangle->cw;
+
+ int bias0 = triangle->bias0;
+ int bias1 = triangle->bias1;
+ int bias2 = triangle->bias2;
+
+ float4 cubic0 = triangle->cubic0;
+ float4 cubic1 = triangle->cubic1;
+ float4 cubic2 = triangle->cubic2;
+
+ int shapeIndex = triangle->shapeIndex;
+ float4 color = triangle->color;
+ color.rgb *= color.a;
+
+ int4 clip = triangle->box;
+
+ matrix_float3x3 uvTransform = triangle->uvTransform;
+
+ for(int sampleIndex = 0; sampleIndex < sampleCount; sampleIndex++)
{
- int tileQueueIndex = tileQueueCoord.y * shapeQueue->area.z + tileQueueCoord.x;
- device mg_tile_queue* tileQueue = &shapeQueue->tileQueues[tileQueueIndex];
+ int2 samplePoint = samplePoints[sampleIndex];
- int firstEltIndex = atomic_load_explicit(&tileQueue->first, memory_order_relaxed);
- device mg_queue_elt* elt = 0;
-
- for(int eltIndex = firstEltIndex; eltIndex >= 0; eltIndex = elt->next)
+ if( samplePoint.x < clip.x
+ || samplePoint.x > clip.z
+ || samplePoint.y < clip.y
+ || samplePoint.y > clip.w)
{
- elt = (device mg_queue_elt*)(arenaBuffer + eltIndex);
- const device mg_triangle_data* triangle = &triangleArray[elt->triangleIndex];
+ continue;
+ }
- int2 p0 = triangle->p0;
- int2 p1 = triangle->p1;
- int2 p2 = triangle->p2;
+ int w0 = cw*orient2d(p1, p2, samplePoint);
+ int w1 = cw*orient2d(p2, p0, samplePoint);
+ int w2 = cw*orient2d(p0, p1, samplePoint);
- int cw = triangle->cw;
+ if((w0+bias0) >= 0 && (w1+bias1) >= 0 && (w2+bias2) >= 0)
+ {
+ float4 cubic = (cubic0*w0 + cubic1*w1 + cubic2*w2)/(w0+w1+w2);
- int bias0 = triangle->bias0;
- int bias1 = triangle->bias1;
- int bias2 = triangle->bias2;
-
- float4 cubic0 = triangle->cubic0;
- float4 cubic1 = triangle->cubic1;
- float4 cubic2 = triangle->cubic2;
-
- int shapeIndex = triangle->shapeIndex;
- float4 color = triangle->color;
- color.rgb *= color.a;
-
- int4 clip = triangle->box;
-
- matrix_float3x3 uvTransform = triangle->uvTransform;
-
- for(int sampleIndex = 0; sampleIndex < sampleCount; sampleIndex++)
+ float eps = 0.0001;
+ if(cubic.w*(cubic.x*cubic.x*cubic.x - cubic.y*cubic.z) <= eps)
{
- int2 samplePoint = samplePoints[sampleIndex];
-
- if( samplePoint.x < clip.x
- || samplePoint.x > clip.z
- || samplePoint.y < clip.y
- || samplePoint.y > clip.w)
+ if(shapeIndex == currentShapeIndex[sampleIndex])
{
- continue;
+ flipCount[sampleIndex]++;
}
-
- int w0 = cw*orient2d(p1, p2, samplePoint);
- int w1 = cw*orient2d(p2, p0, samplePoint);
- int w2 = cw*orient2d(p0, p1, samplePoint);
-
- if((w0+bias0) >= 0 && (w1+bias1) >= 0 && (w2+bias2) >= 0)
+ else
{
- float4 cubic = (cubic0*w0 + cubic1*w1 + cubic2*w2)/(w0+w1+w2);
-
- float eps = 0.0001;
- if(cubic.w*(cubic.x*cubic.x*cubic.x - cubic.y*cubic.z) <= eps)
+ if(flipCount[sampleIndex] & 0x01)
{
- if(shapeIndex == currentShapeIndex[sampleIndex])
- {
- flipCount[sampleIndex]++;
- }
- else
- {
- if(flipCount[sampleIndex] & 0x01)
- {
- sampleColor[sampleIndex] = currentColor[sampleIndex];
- }
-
- float4 nextColor = color;
-
- if(useTexture[0])
- {
- float3 sampleFP = float3(float2(samplePoint).xy/(subPixelFactor*2.), 1);
- float2 uv = (uvTransform * sampleFP).xy;
-
- constexpr sampler smp(mip_filter::nearest, mag_filter::linear, min_filter::linear);
- float4 texColor = texAtlas.sample(smp, uv);
-
- texColor.rgb *= texColor.a;
- nextColor *= texColor;
- }
-
- currentColor[sampleIndex] = sampleColor[sampleIndex]*(1.-nextColor.a) + nextColor;
- currentShapeIndex[sampleIndex] = shapeIndex;
- flipCount[sampleIndex] = 1;
- }
+ sampleColor[sampleIndex] = currentColor[sampleIndex];
}
+
+ float4 nextColor = color;
+
+ if(useTexture[0])
+ {
+ float3 sampleFP = float3(float2(samplePoint).xy/(subPixelFactor*2.), 1);
+ float2 uv = (uvTransform * sampleFP).xy;
+
+ constexpr sampler smp(mip_filter::nearest, mag_filter::linear, min_filter::linear);
+ float4 texColor = texAtlas.sample(smp, uv);
+
+ texColor.rgb *= texColor.a;
+ nextColor *= texColor;
+ }
+
+ currentColor[sampleIndex] = sampleColor[sampleIndex]*(1.-nextColor.a) + nextColor;
+ currentShapeIndex[sampleIndex] = shapeIndex;
+ flipCount[sampleIndex] = 1;
+ }
+ }
+ }
+ }
+ }
+
+ float4 pixelColor = float4(0);
+ for(int sampleIndex = 0; sampleIndex < sampleCount; sampleIndex++)
+ {
+ if(flipCount[sampleIndex] & 0x01)
+ {
+ sampleColor[sampleIndex] = currentColor[sampleIndex];
+ }
+ pixelColor += sampleColor[sampleIndex];
+ }
+
+ outTexture.write(pixelColor/float(sampleCount), gid);
+}
+
+
+/*
+kernel void RenderKernel(const device uint* tileCounters [[buffer(0)]],
+ const device uint* tileArrayBuffer [[buffer(1)]],
+ const device mg_triangle_data* triangleArray [[buffer(2)]],
+
+ constant int* useTexture [[buffer(3)]],
+ constant float* scaling [[buffer(4)]],
+
+ texture2d outTexture [[texture(0)]],
+ texture2d texAtlas [[texture(1)]],
+
+ uint2 gid [[thread_position_in_grid]],
+ uint2 tgid [[threadgroup_position_in_grid]],
+ uint2 threadsPerThreadgroup [[threads_per_threadgroup]],
+ uint2 gridSize [[threads_per_grid]])
+{
+ const int2 pixelCoord = int2(gid);
+ const uint2 tileCoord = uint2(pixelCoord)/ RENDERER_TILE_SIZE;
+ const uint2 tilesMatrixDim = (gridSize - 1) / RENDERER_TILE_SIZE + 1;
+ const uint tileIndex = tileCoord.y * tilesMatrixDim.x + tileCoord.x;
+ const uint tileCounter = min(tileCounters[tileIndex], (uint)RENDERER_TILE_BUFFER_SIZE);
+
+ const int subPixelFactor = 16;
+ const int2 centerPoint = int2((float2(pixelCoord) + float2(0.5, 0.5)) * subPixelFactor);
+
+ const int sampleCount = 8;
+ int2 samplePoints[sampleCount] = {centerPoint + int2(1, 3),
+ centerPoint + int2(-1, -3),
+ centerPoint + int2(5, -1),
+ centerPoint + int2(-3, 5),
+ centerPoint + int2(-5, -5),
+ centerPoint + int2(-7, 1),
+ centerPoint + int2(3, -7),
+ centerPoint + int2(7, 7)};
+
+ float4 sampleColor[sampleCount];
+ float4 currentColor[sampleCount];
+ int currentShapeIndex[sampleCount];
+ int flipCount[sampleCount];
+
+ for(int i=0; ip0;
+ int2 p1 = triangle->p1;
+ int2 p2 = triangle->p2;
+
+ int cw = triangle->cw;
+
+ int bias0 = triangle->bias0;
+ int bias1 = triangle->bias1;
+ int bias2 = triangle->bias2;
+
+ float4 cubic0 = triangle->cubic0;
+ float4 cubic1 = triangle->cubic1;
+ float4 cubic2 = triangle->cubic2;
+
+ int shapeIndex = triangle->shapeIndex;
+ float4 color = triangle->color;
+ color.rgb *= color.a;
+
+ int4 clip = triangle->box;
+
+ matrix_float3x3 uvTransform = triangle->uvTransform;
+
+ for(int sampleIndex = 0; sampleIndex < sampleCount; sampleIndex++)
+ {
+ int2 samplePoint = samplePoints[sampleIndex];
+
+ if( samplePoint.x < clip.x
+ || samplePoint.x > clip.z
+ || samplePoint.y < clip.y
+ || samplePoint.y > clip.w)
+ {
+ continue;
+ }
+
+ int w0 = cw*orient2d(p1, p2, samplePoint);
+ int w1 = cw*orient2d(p2, p0, samplePoint);
+ int w2 = cw*orient2d(p0, p1, samplePoint);
+
+ if((w0+bias0) >= 0 && (w1+bias1) >= 0 && (w2+bias2) >= 0)
+ {
+ float4 cubic = (cubic0*w0 + cubic1*w1 + cubic2*w2)/(w0+w1+w2);
+
+ float eps = 0.0001;
+ if(cubic.w*(cubic.x*cubic.x*cubic.x - cubic.y*cubic.z) <= eps)
+ {
+ if(shapeIndex == currentShapeIndex[sampleIndex])
+ {
+ flipCount[sampleIndex]++;
+ }
+ else
+ {
+ if(flipCount[sampleIndex] & 0x01)
+ {
+ sampleColor[sampleIndex] = currentColor[sampleIndex];
+ }
+
+ float4 nextColor = color;
+
+ if(useTexture[0])
+ {
+ float3 sampleFP = float3(float2(samplePoint).xy/(subPixelFactor*2.), 1);
+ float2 uv = (uvTransform * sampleFP).xy;
+
+ constexpr sampler smp(mip_filter::nearest, mag_filter::linear, min_filter::linear);
+ float4 texColor = texAtlas.sample(smp, uv);
+
+ texColor.rgb *= texColor.a;
+ nextColor *= texColor;
+ }
+
+ currentColor[sampleIndex] = sampleColor[sampleIndex]*(1.-nextColor.a) + nextColor;
+ currentShapeIndex[sampleIndex] = shapeIndex;
+ flipCount[sampleIndex] = 1;
}
}
}
@@ -343,3 +552,4 @@ kernel void RenderKernel(const device mg_shape_queue* shapeQueueBuffer [[buffer(
outTexture.write(pixelColor/float(sampleCount), gid);
}
+*/