[mtl canvas] Tiling per rectangles into per-shape tile queues (linked lists of triangle indices) to avoid sorting
This commit is contained in:
parent
11113f597c
commit
a4ef58f2d8
|
@ -162,6 +162,7 @@ int main()
|
||||||
|
|
||||||
// head
|
// head
|
||||||
mg_set_color_rgba(1, 1, 0, 1);
|
mg_set_color_rgba(1, 1, 0, 1);
|
||||||
|
|
||||||
mg_circle_fill(x, y, 200);
|
mg_circle_fill(x, y, 200);
|
||||||
|
|
||||||
// smile
|
// smile
|
||||||
|
|
|
@ -82,6 +82,11 @@ int main()
|
||||||
mp_window_bring_to_front(window);
|
mp_window_bring_to_front(window);
|
||||||
mp_window_focus(window);
|
mp_window_focus(window);
|
||||||
|
|
||||||
|
bool tracked = false;
|
||||||
|
vec2 trackPoint = {0};
|
||||||
|
f32 zoom = 1;
|
||||||
|
f32 startX = 0, startY = 0;
|
||||||
|
|
||||||
f64 frameTime = 0;
|
f64 frameTime = 0;
|
||||||
|
|
||||||
while(!mp_should_quit())
|
while(!mp_should_quit())
|
||||||
|
@ -99,8 +104,35 @@ int main()
|
||||||
mp_request_quit();
|
mp_request_quit();
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
case MP_EVENT_KEYBOARD_KEY:
|
case MP_EVENT_MOUSE_BUTTON:
|
||||||
{
|
{
|
||||||
|
if(event.key.code == MP_MOUSE_LEFT)
|
||||||
|
{
|
||||||
|
if(event.key.action == MP_KEY_PRESS)
|
||||||
|
{
|
||||||
|
tracked = true;
|
||||||
|
vec2 mousePos = mp_mouse_position();
|
||||||
|
trackPoint.x = mousePos.x/zoom - startX;
|
||||||
|
trackPoint.y = mousePos.y/zoom - startY;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
tracked = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} break;
|
||||||
|
|
||||||
|
case MP_EVENT_MOUSE_WHEEL:
|
||||||
|
{
|
||||||
|
vec2 mousePos = mp_mouse_position();
|
||||||
|
f32 trackX = mousePos.x/zoom - startX;
|
||||||
|
f32 trackY = mousePos.y/zoom - startY;
|
||||||
|
|
||||||
|
zoom *= 1 + event.move.deltaY * 0.01;
|
||||||
|
zoom = Clamp(zoom, 0.2, 10);
|
||||||
|
|
||||||
|
startX = mousePos.x/zoom - trackX;
|
||||||
|
startY = mousePos.y/zoom - trackY;
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
@ -108,13 +140,20 @@ int main()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(tracked)
|
||||||
|
{
|
||||||
|
vec2 mousePos = mp_mouse_position();
|
||||||
|
startX = mousePos.x/zoom - trackPoint.x;
|
||||||
|
startY = mousePos.y/zoom - trackPoint.y;
|
||||||
|
}
|
||||||
|
|
||||||
mg_surface_prepare(surface);
|
mg_surface_prepare(surface);
|
||||||
|
|
||||||
mg_set_color_rgba(1, 0, 1, 1);
|
mg_set_color_rgba(1, 0, 1, 1);
|
||||||
mg_clear();
|
mg_clear();
|
||||||
|
|
||||||
mg_matrix_push((mg_mat2x3){1, 0, 300,
|
mg_matrix_push((mg_mat2x3){zoom, 0, 300+startX*zoom,
|
||||||
0, 1, 200});
|
0, zoom, 200+startY*zoom});
|
||||||
|
|
||||||
draw_tiger();
|
draw_tiger();
|
||||||
|
|
||||||
|
|
|
@ -208,6 +208,7 @@ typedef struct mg_canvas_data
|
||||||
mp_rect srcRegion;
|
mp_rect srcRegion;
|
||||||
|
|
||||||
vec4 shapeExtents;
|
vec4 shapeExtents;
|
||||||
|
vec4 shapeScreenExtents;
|
||||||
u32 nextShapeIndex;
|
u32 nextShapeIndex;
|
||||||
u32 vertexCount;
|
u32 vertexCount;
|
||||||
u32 indexCount;
|
u32 indexCount;
|
||||||
|
@ -789,6 +790,13 @@ void mg_finalize_shape(mg_canvas_data* canvas)
|
||||||
mg_vertex_layout* layout = &canvas->backend->vertexLayout;
|
mg_vertex_layout* layout = &canvas->backend->vertexLayout;
|
||||||
*(mg_mat2x3*)(layout->uvTransformBuffer + index*layout->uvTransformStride) = uvTransform;
|
*(mg_mat2x3*)(layout->uvTransformBuffer + index*layout->uvTransformStride) = uvTransform;
|
||||||
|
|
||||||
|
//TODO: transform extents before clipping
|
||||||
|
mp_rect clip = {maximum(canvas->clip.x, canvas->shapeScreenExtents.x),
|
||||||
|
maximum(canvas->clip.y, canvas->shapeScreenExtents.y),
|
||||||
|
minimum(canvas->clip.x + canvas->clip.w, canvas->shapeScreenExtents.z),
|
||||||
|
minimum(canvas->clip.y + canvas->clip.h, canvas->shapeScreenExtents.w)};
|
||||||
|
|
||||||
|
*(mp_rect*)(((char*)layout->clipBuffer) + index*layout->clipStride) = clip;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -799,17 +807,12 @@ u32 mg_next_shape(mg_canvas_data* canvas, mg_attributes* attributes)
|
||||||
canvas->transform = attributes->transform;
|
canvas->transform = attributes->transform;
|
||||||
canvas->srcRegion = attributes->srcRegion;
|
canvas->srcRegion = attributes->srcRegion;
|
||||||
canvas->shapeExtents = (vec4){FLT_MAX, FLT_MAX, -FLT_MAX, -FLT_MAX};
|
canvas->shapeExtents = (vec4){FLT_MAX, FLT_MAX, -FLT_MAX, -FLT_MAX};
|
||||||
|
canvas->shapeScreenExtents = (vec4){FLT_MAX, FLT_MAX, -FLT_MAX, -FLT_MAX};
|
||||||
|
|
||||||
mg_vertex_layout* layout = &canvas->backend->vertexLayout;
|
mg_vertex_layout* layout = &canvas->backend->vertexLayout;
|
||||||
int index = canvas->nextShapeIndex;
|
int index = canvas->nextShapeIndex;
|
||||||
canvas->nextShapeIndex++;
|
canvas->nextShapeIndex++;
|
||||||
|
|
||||||
mp_rect clip = {canvas->clip.x,
|
|
||||||
canvas->clip.y,
|
|
||||||
canvas->clip.x + canvas->clip.w,
|
|
||||||
canvas->clip.y + canvas->clip.h};
|
|
||||||
|
|
||||||
*(mp_rect*)(((char*)layout->clipBuffer) + index*layout->clipStride) = clip;
|
|
||||||
*(mg_color*)(((char*)layout->colorBuffer) + index*layout->colorStride) = attributes->color;
|
*(mg_color*)(((char*)layout->colorBuffer) + index*layout->colorStride) = attributes->color;
|
||||||
|
|
||||||
return(index);
|
return(index);
|
||||||
|
@ -842,6 +845,11 @@ void mg_push_vertex_cubic(mg_canvas_data* canvas, vec2 pos, vec4 cubic)
|
||||||
|
|
||||||
vec2 screenPos = mg_mat2x3_mul(canvas->transform, pos);
|
vec2 screenPos = mg_mat2x3_mul(canvas->transform, pos);
|
||||||
|
|
||||||
|
canvas->shapeScreenExtents.x = minimum(canvas->shapeScreenExtents.x, screenPos.x);
|
||||||
|
canvas->shapeScreenExtents.y = minimum(canvas->shapeScreenExtents.y, screenPos.y);
|
||||||
|
canvas->shapeScreenExtents.z = maximum(canvas->shapeScreenExtents.z, screenPos.x);
|
||||||
|
canvas->shapeScreenExtents.w = maximum(canvas->shapeScreenExtents.w, screenPos.y);
|
||||||
|
|
||||||
mg_vertex_layout* layout = &canvas->backend->vertexLayout;
|
mg_vertex_layout* layout = &canvas->backend->vertexLayout;
|
||||||
ASSERT(canvas->vertexCount < layout->maxVertexCount);
|
ASSERT(canvas->vertexCount < layout->maxVertexCount);
|
||||||
ASSERT(canvas->nextShapeIndex > 0);
|
ASSERT(canvas->nextShapeIndex > 0);
|
||||||
|
|
143
src/mtl_canvas.m
143
src/mtl_canvas.m
|
@ -34,9 +34,8 @@ typedef struct mg_mtl_canvas_backend
|
||||||
mg_color clearColor;
|
mg_color clearColor;
|
||||||
|
|
||||||
// permanent metal resources
|
// permanent metal resources
|
||||||
|
id<MTLComputePipelineState> shapePipeline;
|
||||||
id<MTLComputePipelineState> trianglePipeline;
|
id<MTLComputePipelineState> trianglePipeline;
|
||||||
id<MTLComputePipelineState> tilingPipeline;
|
|
||||||
id<MTLComputePipelineState> sortingPipeline;
|
|
||||||
id<MTLComputePipelineState> computePipeline;
|
id<MTLComputePipelineState> computePipeline;
|
||||||
id<MTLRenderPipelineState> renderPipeline;
|
id<MTLRenderPipelineState> renderPipeline;
|
||||||
|
|
||||||
|
@ -53,9 +52,10 @@ typedef struct mg_mtl_canvas_backend
|
||||||
id<MTLBuffer> shapeBuffer[MG_MTL_MAX_BUFFER_AVAILABLE];
|
id<MTLBuffer> shapeBuffer[MG_MTL_MAX_BUFFER_AVAILABLE];
|
||||||
id<MTLBuffer> vertexBuffer[MG_MTL_MAX_BUFFER_AVAILABLE];
|
id<MTLBuffer> vertexBuffer[MG_MTL_MAX_BUFFER_AVAILABLE];
|
||||||
id<MTLBuffer> indexBuffer[MG_MTL_MAX_BUFFER_AVAILABLE];
|
id<MTLBuffer> indexBuffer[MG_MTL_MAX_BUFFER_AVAILABLE];
|
||||||
id<MTLBuffer> tileCounters;
|
id<MTLBuffer> shapeQueueBuffer;
|
||||||
id<MTLBuffer> tileArrayBuffer;
|
|
||||||
id<MTLBuffer> triangleArray;
|
id<MTLBuffer> triangleArray;
|
||||||
|
id<MTLBuffer> arenaBuffer;
|
||||||
|
id<MTLBuffer> arenaOffset;
|
||||||
|
|
||||||
} mg_mtl_canvas_backend;
|
} mg_mtl_canvas_backend;
|
||||||
|
|
||||||
|
@ -212,15 +212,33 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
|
||||||
int triangleCount = indexCount/3;
|
int triangleCount = indexCount/3;
|
||||||
|
|
||||||
//-----------------------------------------------------------
|
//-----------------------------------------------------------
|
||||||
//NOTE(martin): encode the clear counter
|
//NOTE(martin): encode the clear arena offset
|
||||||
//-----------------------------------------------------------
|
//-----------------------------------------------------------
|
||||||
id<MTLBlitCommandEncoder> blitEncoder = [surface->commandBuffer blitCommandEncoder];
|
id<MTLBlitCommandEncoder> blitEncoder = [surface->commandBuffer blitCommandEncoder];
|
||||||
blitEncoder.label = @"clear counters";
|
blitEncoder.label = @"clear arena";
|
||||||
[blitEncoder fillBuffer: backend->tileCounters range: NSMakeRange(0, RENDERER_MAX_TILES*sizeof(uint)) value: 0];
|
[blitEncoder fillBuffer: backend->arenaOffset range: NSMakeRange(0, sizeof(int)) value: 0];
|
||||||
[blitEncoder endEncoding];
|
[blitEncoder endEncoding];
|
||||||
|
|
||||||
//-----------------------------------------------------------
|
//-----------------------------------------------------------
|
||||||
//NOTE(martin): encode the triangle prepass
|
//NOTE(martin): encode the shape setup pass
|
||||||
|
//-----------------------------------------------------------
|
||||||
|
id<MTLComputeCommandEncoder> shapeEncoder = [surface->commandBuffer computeCommandEncoder];
|
||||||
|
shapeEncoder.label = @"shape pass";
|
||||||
|
[shapeEncoder setComputePipelineState: backend->shapePipeline];
|
||||||
|
[shapeEncoder setBuffer: backend->shapeBuffer[backend->bufferIndex] offset:backend->shapeBufferOffset atIndex: 0];
|
||||||
|
[shapeEncoder setBuffer: backend->shapeQueueBuffer offset:0 atIndex: 1];
|
||||||
|
[shapeEncoder setBuffer: backend->arenaBuffer offset:0 atIndex: 2];
|
||||||
|
[shapeEncoder setBuffer: backend->arenaOffset offset:0 atIndex: 3];
|
||||||
|
[shapeEncoder setBytes: &scale length: sizeof(float) atIndex: 4];
|
||||||
|
|
||||||
|
MTLSize shapeGroupSize = MTLSizeMake(backend->shapePipeline.maxTotalThreadsPerThreadgroup, 1, 1);
|
||||||
|
MTLSize shapeGridSize = MTLSizeMake(shapeCount, 1, 1);
|
||||||
|
|
||||||
|
[shapeEncoder dispatchThreads: shapeGridSize threadsPerThreadgroup: shapeGroupSize];
|
||||||
|
[shapeEncoder endEncoding];
|
||||||
|
|
||||||
|
//-----------------------------------------------------------
|
||||||
|
//NOTE(martin): encode the triangle setup and binning
|
||||||
//-----------------------------------------------------------
|
//-----------------------------------------------------------
|
||||||
id<MTLComputeCommandEncoder> triangleEncoder = [surface->commandBuffer computeCommandEncoder];
|
id<MTLComputeCommandEncoder> triangleEncoder = [surface->commandBuffer computeCommandEncoder];
|
||||||
triangleEncoder.label = @"triangle pass";
|
triangleEncoder.label = @"triangle pass";
|
||||||
|
@ -229,8 +247,11 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
|
||||||
[triangleEncoder setBuffer: backend->indexBuffer[backend->bufferIndex] offset:backend->indexBufferOffset atIndex: 1];
|
[triangleEncoder setBuffer: backend->indexBuffer[backend->bufferIndex] offset:backend->indexBufferOffset atIndex: 1];
|
||||||
[triangleEncoder setBuffer: backend->shapeBuffer[backend->bufferIndex] offset:backend->shapeBufferOffset atIndex: 2];
|
[triangleEncoder setBuffer: backend->shapeBuffer[backend->bufferIndex] offset:backend->shapeBufferOffset atIndex: 2];
|
||||||
[triangleEncoder setBuffer: backend->triangleArray offset:0 atIndex: 3];
|
[triangleEncoder setBuffer: backend->triangleArray offset:0 atIndex: 3];
|
||||||
|
[triangleEncoder setBuffer: backend->shapeQueueBuffer offset:0 atIndex: 4];
|
||||||
|
[triangleEncoder setBuffer: backend->arenaBuffer offset:0 atIndex: 5];
|
||||||
|
[triangleEncoder setBuffer: backend->arenaOffset offset:0 atIndex: 6];
|
||||||
|
|
||||||
[triangleEncoder setBytes: &scale length: sizeof(float) atIndex: 4];
|
[triangleEncoder setBytes: &scale length: sizeof(float) atIndex: 7];
|
||||||
|
|
||||||
MTLSize triangleGroupSize = MTLSizeMake(backend->trianglePipeline.maxTotalThreadsPerThreadgroup, 1, 1);
|
MTLSize triangleGroupSize = MTLSizeMake(backend->trianglePipeline.maxTotalThreadsPerThreadgroup, 1, 1);
|
||||||
MTLSize triangleGridSize = MTLSizeMake(triangleCount, 1, 1);
|
MTLSize triangleGridSize = MTLSizeMake(triangleCount, 1, 1);
|
||||||
|
@ -238,56 +259,15 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
|
||||||
[triangleEncoder dispatchThreads: triangleGridSize threadsPerThreadgroup: triangleGroupSize];
|
[triangleEncoder dispatchThreads: triangleGridSize threadsPerThreadgroup: triangleGroupSize];
|
||||||
[triangleEncoder endEncoding];
|
[triangleEncoder endEncoding];
|
||||||
|
|
||||||
//-----------------------------------------------------------
|
|
||||||
//NOTE(martin): encode the tiling pass
|
|
||||||
//-----------------------------------------------------------
|
|
||||||
|
|
||||||
id<MTLComputeCommandEncoder> tileEncoder = [surface->commandBuffer computeCommandEncoder];
|
|
||||||
tileEncoder.label = @"tiling pass";
|
|
||||||
[tileEncoder setComputePipelineState: backend->tilingPipeline];
|
|
||||||
[tileEncoder setBuffer: backend->triangleArray offset:0 atIndex: 0];
|
|
||||||
[tileEncoder setBuffer: backend->tileCounters offset:0 atIndex: 1];
|
|
||||||
[tileEncoder setBuffer: backend->tileArrayBuffer offset:0 atIndex: 2];
|
|
||||||
|
|
||||||
[tileEncoder setBytes: &triangleCount length:sizeof(int) atIndex: 3];
|
|
||||||
[tileEncoder setBytes: &viewportSize length: sizeof(vector_uint2) atIndex: 4];
|
|
||||||
[tileEncoder setBytes: &scale length: sizeof(float) atIndex: 5];
|
|
||||||
|
|
||||||
MTLSize tileGroupSize = MTLSizeMake(1, 1, 16);
|
|
||||||
MTLSize tileGridSize = MTLSizeMake(nTilesX, nTilesY, 16);
|
|
||||||
|
|
||||||
[tileEncoder dispatchThreads: tileGridSize threadsPerThreadgroup: tileGroupSize];
|
|
||||||
[tileEncoder endEncoding];
|
|
||||||
|
|
||||||
//-----------------------------------------------------------
|
|
||||||
//NOTE(martin): encode the sorting pass
|
|
||||||
//-----------------------------------------------------------
|
|
||||||
/*
|
|
||||||
id<MTLComputeCommandEncoder> sortEncoder = [surface->commandBuffer computeCommandEncoder];
|
|
||||||
sortEncoder.label = @"sorting pass";
|
|
||||||
[sortEncoder setComputePipelineState: backend->sortingPipeline];
|
|
||||||
[sortEncoder setBuffer: backend->triangleArray offset:0 atIndex: 0];
|
|
||||||
[sortEncoder setBuffer: backend->tileCounters offset:0 atIndex: 1];
|
|
||||||
[sortEncoder setBuffer: backend->tileArrayBuffer offset:0 atIndex: 2];
|
|
||||||
|
|
||||||
u32 nTilesX = (viewportSize.x + RENDERER_TILE_SIZE - 1)/RENDERER_TILE_SIZE;
|
|
||||||
u32 nTilesY = (viewportSize.y + RENDERER_TILE_SIZE - 1)/RENDERER_TILE_SIZE;
|
|
||||||
|
|
||||||
MTLSize sortGroupSize = MTLSizeMake(backend->sortingPipeline.maxTotalThreadsPerThreadgroup, 1, 1);
|
|
||||||
MTLSize sortGridSize = MTLSizeMake(nTilesX*nTilesY, 1, 1);
|
|
||||||
|
|
||||||
[sortEncoder dispatchThreads: sortGridSize threadsPerThreadgroup: sortGroupSize];
|
|
||||||
[sortEncoder endEncoding];
|
|
||||||
*/
|
|
||||||
//-----------------------------------------------------------
|
//-----------------------------------------------------------
|
||||||
//NOTE(martin): encode drawing pass
|
//NOTE(martin): encode drawing pass
|
||||||
//-----------------------------------------------------------
|
//-----------------------------------------------------------
|
||||||
id<MTLComputeCommandEncoder> drawEncoder = [surface->commandBuffer computeCommandEncoder];
|
id<MTLComputeCommandEncoder> drawEncoder = [surface->commandBuffer computeCommandEncoder];
|
||||||
drawEncoder.label = @"drawing pass";
|
drawEncoder.label = @"drawing pass";
|
||||||
[drawEncoder setComputePipelineState:backend->computePipeline];
|
[drawEncoder setComputePipelineState:backend->computePipeline];
|
||||||
[drawEncoder setBuffer: backend->tileCounters offset:0 atIndex: 0];
|
[drawEncoder setBuffer: backend->shapeQueueBuffer offset:0 atIndex: 0];
|
||||||
[drawEncoder setBuffer: backend->tileArrayBuffer offset:0 atIndex: 1];
|
[drawEncoder setBuffer: backend->triangleArray offset:0 atIndex: 1];
|
||||||
[drawEncoder setBuffer: backend->triangleArray offset:0 atIndex: 2];
|
[drawEncoder setBuffer: backend->arenaBuffer offset:0 atIndex: 2];
|
||||||
|
|
||||||
[drawEncoder setTexture: backend->outTexture atIndex: 0];
|
[drawEncoder setTexture: backend->outTexture atIndex: 0];
|
||||||
int useTexture = 0;
|
int useTexture = 0;
|
||||||
|
@ -298,8 +278,9 @@ void mg_mtl_canvas_draw_batch(mg_canvas_backend* interface, mg_image_data* image
|
||||||
useTexture = 1;
|
useTexture = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
[drawEncoder setBytes: &useTexture length:sizeof(int) atIndex: 3];
|
[drawEncoder setBytes: &shapeCount length:sizeof(int) atIndex: 3];
|
||||||
[drawEncoder setBytes: &scale length: sizeof(float) atIndex: 4];
|
[drawEncoder setBytes: &useTexture length:sizeof(int) atIndex: 4];
|
||||||
|
[drawEncoder setBytes: &scale length: sizeof(float) atIndex: 5];
|
||||||
|
|
||||||
//TODO: check that we don't exceed maxTotalThreadsPerThreadgroup
|
//TODO: check that we don't exceed maxTotalThreadsPerThreadgroup
|
||||||
DEBUG_ASSERT(RENDERER_TILE_SIZE*RENDERER_TILE_SIZE <= backend->computePipeline.maxTotalThreadsPerThreadgroup);
|
DEBUG_ASSERT(RENDERER_TILE_SIZE*RENDERER_TILE_SIZE <= backend->computePipeline.maxTotalThreadsPerThreadgroup);
|
||||||
|
@ -391,8 +372,13 @@ void mg_mtl_canvas_destroy(mg_canvas_backend* interface)
|
||||||
}
|
}
|
||||||
//NOTE: semaphore does not have a destructor?
|
//NOTE: semaphore does not have a destructor?
|
||||||
|
|
||||||
[backend->tileArrayBuffer release];
|
[backend->shapeQueueBuffer release];
|
||||||
[backend->triangleArray release];
|
[backend->triangleArray release];
|
||||||
|
[backend->arenaBuffer release];
|
||||||
|
[backend->arenaOffset release];
|
||||||
|
|
||||||
|
//////////////////////////////////////////
|
||||||
|
//TODO release all pipelines
|
||||||
[backend->computePipeline release];
|
[backend->computePipeline release];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -524,18 +510,18 @@ mg_canvas_backend* mg_mtl_canvas_create(mg_surface surface)
|
||||||
options: bufferOptions];
|
options: bufferOptions];
|
||||||
}
|
}
|
||||||
|
|
||||||
backend->tileArrayBuffer = [metalSurface->device newBufferWithLength: RENDERER_TILE_BUFFER_SIZE*sizeof(int)*RENDERER_MAX_TILES
|
|
||||||
options: MTLResourceStorageModePrivate];
|
|
||||||
|
|
||||||
backend->triangleArray = [metalSurface->device newBufferWithLength: MG_MTL_CANVAS_DEFAULT_BUFFER_LENGTH*sizeof(mg_triangle_data)
|
backend->triangleArray = [metalSurface->device newBufferWithLength: MG_MTL_CANVAS_DEFAULT_BUFFER_LENGTH*sizeof(mg_triangle_data)
|
||||||
options: MTLResourceStorageModePrivate];
|
options: MTLResourceStorageModePrivate];
|
||||||
|
|
||||||
//TODO(martin): retain ?
|
backend->shapeQueueBuffer = [metalSurface->device newBufferWithLength: MG_MTL_CANVAS_DEFAULT_BUFFER_LENGTH*sizeof(mg_shape_queue)
|
||||||
//-----------------------------------------------------------
|
options: MTLResourceStorageModePrivate];
|
||||||
//NOTE(martin): create and initialize tile counters
|
|
||||||
//-----------------------------------------------------------
|
backend->arenaBuffer = [metalSurface->device newBufferWithLength: MG_MTL_CANVAS_DEFAULT_BUFFER_LENGTH*sizeof(mg_queue_elt)
|
||||||
backend->tileCounters = [metalSurface->device newBufferWithLength: RENDERER_MAX_TILES*sizeof(uint)
|
options: MTLResourceStorageModePrivate];
|
||||||
options: MTLResourceStorageModePrivate];
|
|
||||||
|
backend->arenaOffset = [metalSurface->device newBufferWithLength: sizeof(int)
|
||||||
|
options: MTLResourceStorageModePrivate];
|
||||||
|
|
||||||
//-----------------------------------------------------------
|
//-----------------------------------------------------------
|
||||||
//NOTE(martin): load the library
|
//NOTE(martin): load the library
|
||||||
//-----------------------------------------------------------
|
//-----------------------------------------------------------
|
||||||
|
@ -551,9 +537,8 @@ mg_canvas_backend* mg_mtl_canvas_create(mg_surface surface)
|
||||||
LOG_ERROR("error : %s\n", errStr);
|
LOG_ERROR("error : %s\n", errStr);
|
||||||
return(0);
|
return(0);
|
||||||
}
|
}
|
||||||
|
id<MTLFunction> shapeFunction = [library newFunctionWithName:@"ShapeSetup"];
|
||||||
id<MTLFunction> triangleFunction = [library newFunctionWithName:@"TriangleKernel"];
|
id<MTLFunction> triangleFunction = [library newFunctionWithName:@"TriangleKernel"];
|
||||||
id<MTLFunction> tilingFunction = [library newFunctionWithName:@"TileKernel"];
|
|
||||||
id<MTLFunction> sortingFunction = [library newFunctionWithName:@"SortKernel"];
|
|
||||||
id<MTLFunction> computeFunction = [library newFunctionWithName:@"RenderKernel"];
|
id<MTLFunction> computeFunction = [library newFunctionWithName:@"RenderKernel"];
|
||||||
id<MTLFunction> vertexFunction = [library newFunctionWithName:@"VertexShader"];
|
id<MTLFunction> vertexFunction = [library newFunctionWithName:@"VertexShader"];
|
||||||
id<MTLFunction> fragmentFunction = [library newFunctionWithName:@"FragmentShader"];
|
id<MTLFunction> fragmentFunction = [library newFunctionWithName:@"FragmentShader"];
|
||||||
|
@ -566,6 +551,14 @@ mg_canvas_backend* mg_mtl_canvas_create(mg_surface surface)
|
||||||
error:&error];
|
error:&error];
|
||||||
ASSERT(backend->computePipeline);
|
ASSERT(backend->computePipeline);
|
||||||
|
|
||||||
|
MTLComputePipelineDescriptor* shapePipelineDesc = [[MTLComputePipelineDescriptor alloc] init];
|
||||||
|
shapePipelineDesc.computeFunction = shapeFunction;
|
||||||
|
|
||||||
|
backend->shapePipeline = [metalSurface->device newComputePipelineStateWithDescriptor: shapePipelineDesc
|
||||||
|
options: MTLPipelineOptionNone
|
||||||
|
reflection: nil
|
||||||
|
error: &error];
|
||||||
|
|
||||||
MTLComputePipelineDescriptor* trianglePipelineDesc = [[MTLComputePipelineDescriptor alloc] init];
|
MTLComputePipelineDescriptor* trianglePipelineDesc = [[MTLComputePipelineDescriptor alloc] init];
|
||||||
trianglePipelineDesc.computeFunction = triangleFunction;
|
trianglePipelineDesc.computeFunction = triangleFunction;
|
||||||
|
|
||||||
|
@ -574,22 +567,6 @@ mg_canvas_backend* mg_mtl_canvas_create(mg_surface surface)
|
||||||
reflection: nil
|
reflection: nil
|
||||||
error: &error];
|
error: &error];
|
||||||
|
|
||||||
MTLComputePipelineDescriptor* tilingPipelineDesc = [[MTLComputePipelineDescriptor alloc] init];
|
|
||||||
tilingPipelineDesc.computeFunction = tilingFunction;
|
|
||||||
|
|
||||||
backend->tilingPipeline = [metalSurface->device newComputePipelineStateWithDescriptor: tilingPipelineDesc
|
|
||||||
options: MTLPipelineOptionNone
|
|
||||||
reflection: nil
|
|
||||||
error: &error];
|
|
||||||
|
|
||||||
MTLComputePipelineDescriptor* sortingPipelineDesc = [[MTLComputePipelineDescriptor alloc] init];
|
|
||||||
sortingPipelineDesc.computeFunction = sortingFunction;
|
|
||||||
|
|
||||||
backend->sortingPipeline = [metalSurface->device newComputePipelineStateWithDescriptor: sortingPipelineDesc
|
|
||||||
options: MTLPipelineOptionNone
|
|
||||||
reflection: nil
|
|
||||||
error: &error];
|
|
||||||
|
|
||||||
//-----------------------------------------------------------
|
//-----------------------------------------------------------
|
||||||
//NOTE(martin): setup our render pipeline state
|
//NOTE(martin): setup our render pipeline state
|
||||||
//-----------------------------------------------------------
|
//-----------------------------------------------------------
|
||||||
|
|
|
@ -59,4 +59,32 @@ typedef struct mg_triangle_data
|
||||||
|
|
||||||
} mg_triangle_data;
|
} mg_triangle_data;
|
||||||
|
|
||||||
|
#ifndef __METAL_VERSION__
|
||||||
|
#define device
|
||||||
|
#else
|
||||||
|
using namespace metal;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef struct mg_tile_elt
|
||||||
|
{
|
||||||
|
int triangleIndex;
|
||||||
|
int next;
|
||||||
|
} mg_queue_elt;
|
||||||
|
|
||||||
|
typedef struct mg_tile_queue
|
||||||
|
{
|
||||||
|
atomic_int first;
|
||||||
|
} mg_tile_queue;
|
||||||
|
|
||||||
|
typedef struct mg_shape_queue
|
||||||
|
{
|
||||||
|
vector_int4 area;
|
||||||
|
device mg_tile_queue* tileQueues;
|
||||||
|
} mg_shape_queue;
|
||||||
|
|
||||||
|
#ifndef __METAL_VERSION__
|
||||||
|
#undef device
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#endif //__MTL_RENDERER_H_
|
#endif //__MTL_RENDERER_H_
|
||||||
|
|
|
@ -46,13 +46,60 @@ int orient2d(int2 a, int2 b, int2 c)
|
||||||
return((b.x-a.x)*(c.y-a.y) - (b.y-a.y)*(c.x-a.x));
|
return((b.x-a.x)*(c.y-a.y) - (b.y-a.y)*(c.x-a.x));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
device uchar* arena_allocate(device uchar* arenaBuffer,
|
||||||
|
device volatile atomic_uint* arenaOffset,
|
||||||
|
uint size)
|
||||||
|
{
|
||||||
|
uint index = atomic_fetch_add_explicit(arenaOffset, size, memory_order_relaxed);
|
||||||
|
return(&arenaBuffer[index]);
|
||||||
|
}
|
||||||
|
|
||||||
|
//NOTE: shape setup allocates tile queues for each shape
|
||||||
|
|
||||||
|
kernel void ShapeSetup(constant mg_shape* shapeBuffer [[buffer(0)]],
|
||||||
|
device mg_shape_queue* shapeQueueBuffer [[buffer(1)]],
|
||||||
|
device uchar* arenaBuffer [[buffer(2)]],
|
||||||
|
device volatile atomic_uint* arenaOffset [[buffer(3)]],
|
||||||
|
constant float* scaling [[buffer(4)]],
|
||||||
|
uint gid [[thread_position_in_grid]])
|
||||||
|
{
|
||||||
|
|
||||||
|
float4 box = shapeBuffer[gid].clip * scaling[0];
|
||||||
|
|
||||||
|
int2 firstTile = int2(box.xy)/RENDERER_TILE_SIZE;
|
||||||
|
|
||||||
|
//WARN: the following can result in a 1x1 tile allocated even for empty boxes. But if we didn't allocate
|
||||||
|
// any tile queue, the tileQueues pointer for that shape would alias the tileQueues pointer of another
|
||||||
|
// shape, and we would have to detect that in the tiling and drawing kernels. Instead, just accept some
|
||||||
|
// waste and keep the other kernels more uniforms for now...
|
||||||
|
int nTilesX = int(box.z)/RENDERER_TILE_SIZE - firstTile.x + 1;
|
||||||
|
int nTilesY = int(box.w)/RENDERER_TILE_SIZE - firstTile.y + 1;
|
||||||
|
|
||||||
|
int tileCount = nTilesX * nTilesY;
|
||||||
|
int tileArraySize = tileCount * sizeof(mg_tile_queue);
|
||||||
|
|
||||||
|
shapeQueueBuffer[gid].area = int4(firstTile.x, firstTile.y, nTilesX, nTilesY);
|
||||||
|
shapeQueueBuffer[gid].tileQueues = (device mg_tile_queue*)arena_allocate(arenaBuffer, arenaOffset, tileArraySize);
|
||||||
|
|
||||||
|
for(int i=0; i<tileCount; i++)
|
||||||
|
{
|
||||||
|
atomic_store_explicit(&shapeQueueBuffer[gid].tileQueues[i].first, -1, memory_order_relaxed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//NOTE: setup triangle data and bucket triangle into tile queues
|
||||||
|
|
||||||
kernel void TriangleKernel(constant mg_vertex* vertexBuffer [[buffer(0)]],
|
kernel void TriangleKernel(constant mg_vertex* vertexBuffer [[buffer(0)]],
|
||||||
constant uint* indexBuffer [[buffer(1)]],
|
constant uint* indexBuffer [[buffer(1)]],
|
||||||
constant mg_shape* shapeBuffer [[buffer(2)]],
|
constant mg_shape* shapeBuffer [[buffer(2)]],
|
||||||
device mg_triangle_data* triangleArray [[buffer(3)]],
|
device mg_triangle_data* triangleArray [[buffer(3)]],
|
||||||
constant float* scaling [[buffer(4)]],
|
device mg_shape_queue* shapeQueueBuffer [[buffer(4)]],
|
||||||
|
device uchar* arenaBuffer [[buffer(5)]],
|
||||||
|
device volatile atomic_uint* arenaOffset [[buffer(6)]],
|
||||||
|
constant float* scaling [[buffer(7)]],
|
||||||
uint gid [[thread_position_in_grid]])
|
uint gid [[thread_position_in_grid]])
|
||||||
{
|
{
|
||||||
|
//NOTE: triangle setup
|
||||||
uint triangleIndex = gid * 3;
|
uint triangleIndex = gid * 3;
|
||||||
|
|
||||||
uint i0 = indexBuffer[triangleIndex];
|
uint i0 = indexBuffer[triangleIndex];
|
||||||
|
@ -95,7 +142,6 @@ kernel void TriangleKernel(constant mg_vertex* vertexBuffer [[buffer(0)]],
|
||||||
triangleArray[gid].p1 = ip1;
|
triangleArray[gid].p1 = ip1;
|
||||||
triangleArray[gid].p2 = ip2;
|
triangleArray[gid].p2 = ip2;
|
||||||
|
|
||||||
//NOTE(martin): compute triangle orientation and bias for each edge
|
|
||||||
int cw = orient2d(ip0, ip1, ip2) > 0 ? 1 : -1;
|
int cw = orient2d(ip0, ip1, ip2) > 0 ? 1 : -1;
|
||||||
|
|
||||||
triangleArray[gid].cw = cw;
|
triangleArray[gid].cw = cw;
|
||||||
|
@ -103,92 +149,45 @@ kernel void TriangleKernel(constant mg_vertex* vertexBuffer [[buffer(0)]],
|
||||||
triangleArray[gid].bias1 = is_top_left(p2, p0) ? -(1-cw)/2 : -(1+cw)/2;
|
triangleArray[gid].bias1 = is_top_left(p2, p0) ? -(1-cw)/2 : -(1+cw)/2;
|
||||||
triangleArray[gid].bias2 = is_top_left(p0, p1) ? -(1-cw)/2 : -(1+cw)/2;
|
triangleArray[gid].bias2 = is_top_left(p0, p1) ? -(1-cw)/2 : -(1+cw)/2;
|
||||||
|
|
||||||
triangleArray[gid].tileBox = int4(fbox)/RENDERER_TILE_SIZE;
|
int4 tileBox = int4(fbox)/RENDERER_TILE_SIZE;
|
||||||
}
|
triangleArray[gid].tileBox = tileBox;
|
||||||
|
|
||||||
kernel void TileKernel(const device mg_triangle_data* triangleArray [[buffer(0)]],
|
|
||||||
device uint* tileCounters [[buffer(1)]],
|
|
||||||
device uint* tileArrayBuffer [[buffer(2)]],
|
|
||||||
constant int* triangleCount [[buffer(3)]],
|
|
||||||
constant uint2* viewport [[buffer(4)]],
|
|
||||||
constant float* scaling [[buffer(5)]],
|
|
||||||
uint3 gid [[thread_position_in_grid]])
|
|
||||||
{
|
|
||||||
uint2 tilesMatrixDim = (*viewport - 1) / RENDERER_TILE_SIZE + 1;
|
|
||||||
int nTilesX = tilesMatrixDim.x;
|
|
||||||
|
|
||||||
int tileX = gid.x;
|
//NOTE: bucket triangle into tiles
|
||||||
int tileY = gid.y;
|
device mg_shape_queue* shapeQueue = &shapeQueueBuffer[shapeIndex];
|
||||||
int tileIndex = tileY * nTilesX + tileX;
|
|
||||||
int groupIndex = gid.z;
|
|
||||||
|
|
||||||
const int groupSize = 16;
|
int xMin = max(0, tileBox.x - shapeQueue->area.x);
|
||||||
int count = 0;
|
int yMin = max(0, tileBox.y - shapeQueue->area.y);
|
||||||
int mask = 0xffff>>(16-groupIndex);
|
int xMax = min(tileBox.z - shapeQueue->area.x, shapeQueue->area.z-1);
|
||||||
|
int yMax = min(tileBox.w - shapeQueue->area.y, shapeQueue->area.w-1);
|
||||||
|
|
||||||
for(int triangleBatchIndex=0; triangleBatchIndex<triangleCount[0]; triangleBatchIndex += groupSize)
|
//NOTE(martin): it's important to do the computation with signed int, so that we can have negative xMax/yMax
|
||||||
|
// otherwise all triangles on the left or below the x/y axis are attributed to tiles on row/column 0.
|
||||||
|
|
||||||
|
for(int y = yMin; y <= yMax; y++)
|
||||||
{
|
{
|
||||||
int triangleIndex = triangleBatchIndex + groupIndex;
|
for(int x = xMin ; x <= xMax; x++)
|
||||||
bool active = false;
|
|
||||||
// if(triangleIndex + groupIndex < triangleCount[0])
|
|
||||||
{
|
{
|
||||||
int4 box = triangleArray[triangleIndex].tileBox;
|
int tileIndex = y*shapeQueue->area.z + x;
|
||||||
/*
|
|
||||||
if( tileX >= box.x && tileX <= box.z
|
|
||||||
&& tileY >= box.y && tileY <= box.w)
|
|
||||||
{
|
|
||||||
active = true;
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
|
|
||||||
int vote = uint64_t(simd_ballot(active));
|
device mg_tile_queue* tileQueue = &shapeQueue->tileQueues[tileIndex];
|
||||||
if(active)
|
device mg_queue_elt* elt = (device mg_queue_elt*)arena_allocate(arenaBuffer, arenaOffset, sizeof(mg_queue_elt));
|
||||||
{
|
int eltIndex = (device uchar*)elt - arenaBuffer;
|
||||||
int batchOffset = popcount(vote & mask);
|
|
||||||
tileArrayBuffer[tileIndex*RENDERER_TILE_BUFFER_SIZE + count + batchOffset] = triangleIndex;
|
|
||||||
}
|
|
||||||
count += popcount(vote);
|
|
||||||
}
|
|
||||||
if(groupIndex == 0)
|
|
||||||
{
|
|
||||||
tileCounters[tileIndex] = count;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
kernel void SortKernel(constant mg_triangle_data* triangleArray [[buffer(0)]],
|
elt->next = atomic_exchange_explicit(&tileQueue->first, eltIndex, memory_order_relaxed);
|
||||||
const device uint* tileCounters [[buffer(1)]],
|
|
||||||
device uint* tileArrayBuffer [[buffer(2)]],
|
|
||||||
uint gid [[thread_position_in_grid]])
|
|
||||||
{
|
|
||||||
uint tileIndex = gid;
|
|
||||||
uint tileArrayOffset = tileIndex * RENDERER_TILE_BUFFER_SIZE;
|
|
||||||
uint tileArrayCount = min(tileCounters[tileIndex], (uint)RENDERER_TILE_BUFFER_SIZE);
|
|
||||||
|
|
||||||
for(uint tileArrayIndex=1; tileArrayIndex < tileArrayCount; tileArrayIndex++)
|
elt->triangleIndex = gid;
|
||||||
{
|
|
||||||
for(uint sortIndex = tileArrayIndex; sortIndex > 0; sortIndex--)
|
|
||||||
{
|
|
||||||
int shapeIndex = triangleArray[tileArrayBuffer[tileArrayOffset + sortIndex]].shapeIndex;
|
|
||||||
int prevShapeIndex = triangleArray[tileArrayBuffer[tileArrayOffset + sortIndex - 1]].shapeIndex;
|
|
||||||
|
|
||||||
if(shapeIndex >= prevShapeIndex)
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
uint tmp = tileArrayBuffer[tileArrayOffset + sortIndex];
|
|
||||||
tileArrayBuffer[tileArrayOffset + sortIndex] = tileArrayBuffer[tileArrayOffset + sortIndex - 1];
|
|
||||||
tileArrayBuffer[tileArrayOffset + sortIndex - 1] = tmp;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
kernel void RenderKernel(const device uint* tileCounters [[buffer(0)]],
|
kernel void RenderKernel(const device mg_shape_queue* shapeQueueBuffer [[buffer(0)]],
|
||||||
const device uint* tileArrayBuffer [[buffer(1)]],
|
const device mg_triangle_data* triangleArray [[buffer(1)]],
|
||||||
const device mg_triangle_data* triangleArray [[buffer(2)]],
|
const device uchar* arenaBuffer [[buffer(2)]],
|
||||||
|
|
||||||
constant int* useTexture [[buffer(3)]],
|
constant int* shapeCount [[buffer(3)]],
|
||||||
constant float* scaling [[buffer(4)]],
|
constant int* useTexture [[buffer(4)]],
|
||||||
|
constant float* scaling [[buffer(5)]],
|
||||||
|
|
||||||
texture2d<float, access::write> outTexture [[texture(0)]],
|
texture2d<float, access::write> outTexture [[texture(0)]],
|
||||||
texture2d<float> texAtlas [[texture(1)]],
|
texture2d<float> texAtlas [[texture(1)]],
|
||||||
|
@ -200,37 +199,7 @@ kernel void RenderKernel(const device uint* tileCounters [[buffer(0)]],
|
||||||
{
|
{
|
||||||
//TODO: guard against thread group size not equal to tile size?
|
//TODO: guard against thread group size not equal to tile size?
|
||||||
const int2 pixelCoord = int2(gid);
|
const int2 pixelCoord = int2(gid);
|
||||||
const uint2 tileCoord = uint2(pixelCoord)/ RENDERER_TILE_SIZE;
|
const int2 tileCoord = pixelCoord/ RENDERER_TILE_SIZE;
|
||||||
const uint2 tilesMatrixDim = (gridSize - 1) / RENDERER_TILE_SIZE + 1;
|
|
||||||
const uint tileIndex = tileCoord.y * tilesMatrixDim.x + tileCoord.x;
|
|
||||||
const uint tileCounter = min(tileCounters[tileIndex], (uint)RENDERER_TILE_BUFFER_SIZE);
|
|
||||||
|
|
||||||
#ifdef RENDERER_DEBUG_TILES
|
|
||||||
//NOTE(martin): color code debug values and show the tile grid
|
|
||||||
{
|
|
||||||
float4 fragColor = float4(0);
|
|
||||||
|
|
||||||
if( pixelCoord.x % 16 == 0
|
|
||||||
||pixelCoord.y % 16 == 0)
|
|
||||||
{
|
|
||||||
fragColor = float4(0, 0, 0, 1);
|
|
||||||
}
|
|
||||||
else if(tileCounters[tileIndex] == 0xffffu)
|
|
||||||
{
|
|
||||||
fragColor = float4(1, 0, 1, 1);
|
|
||||||
}
|
|
||||||
else if(tileCounter != 0u)
|
|
||||||
{
|
|
||||||
fragColor = float4(0, 1, 0, 1);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
fragColor = float4(1, 0, 0, 1);
|
|
||||||
}
|
|
||||||
outTexture.write(fragColor, gid);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
const int subPixelFactor = 16;
|
const int subPixelFactor = 16;
|
||||||
const int2 centerPoint = int2((float2(pixelCoord) + float2(0.5, 0.5)) * subPixelFactor);
|
const int2 centerPoint = int2((float2(pixelCoord) + float2(0.5, 0.5)) * subPixelFactor);
|
||||||
|
@ -258,89 +227,108 @@ kernel void RenderKernel(const device uint* tileCounters [[buffer(0)]],
|
||||||
currentColor[i] = float4(0, 0, 0, 0);
|
currentColor[i] = float4(0, 0, 0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
for(uint tileArrayIndex=0; tileArrayIndex < tileCounter; tileArrayIndex++)
|
for(int shapeIndex = 0; shapeIndex < shapeCount[0]; shapeIndex++)
|
||||||
{
|
{
|
||||||
int triangleIndex = tileArrayBuffer[RENDERER_TILE_BUFFER_SIZE * tileIndex + tileArrayIndex];
|
const device mg_shape_queue* shapeQueue = &shapeQueueBuffer[shapeIndex];
|
||||||
const device mg_triangle_data* triangle = &triangleArray[triangleIndex];
|
|
||||||
|
|
||||||
int2 p0 = triangle->p0;
|
// get the tile queue that corresponds to our tile in the shape area
|
||||||
int2 p1 = triangle->p1;
|
int2 tileQueueCoord = tileCoord - shapeQueue->area.xy;
|
||||||
int2 p2 = triangle->p2;
|
if( tileQueueCoord.x >= 0
|
||||||
|
&& tileQueueCoord.y >= 0
|
||||||
int cw = triangle->cw;
|
&& tileQueueCoord.x < shapeQueue->area.z
|
||||||
|
&& tileQueueCoord.y < shapeQueue->area.w)
|
||||||
int bias0 = triangle->bias0;
|
|
||||||
int bias1 = triangle->bias1;
|
|
||||||
int bias2 = triangle->bias2;
|
|
||||||
|
|
||||||
float4 cubic0 = triangle->cubic0;
|
|
||||||
float4 cubic1 = triangle->cubic1;
|
|
||||||
float4 cubic2 = triangle->cubic2;
|
|
||||||
|
|
||||||
int shapeIndex = triangle->shapeIndex;
|
|
||||||
float4 color = triangle->color;
|
|
||||||
color.rgb *= color.a;
|
|
||||||
|
|
||||||
int4 clip = triangle->box;
|
|
||||||
|
|
||||||
matrix_float3x3 uvTransform = triangle->uvTransform;
|
|
||||||
|
|
||||||
for(int sampleIndex = 0; sampleIndex < sampleCount; sampleIndex++)
|
|
||||||
{
|
{
|
||||||
int2 samplePoint = samplePoints[sampleIndex];
|
int tileQueueIndex = tileQueueCoord.y * shapeQueue->area.z + tileQueueCoord.x;
|
||||||
|
device mg_tile_queue* tileQueue = &shapeQueue->tileQueues[tileQueueIndex];
|
||||||
|
|
||||||
if( samplePoint.x < clip.x
|
int firstEltIndex = atomic_load_explicit(&tileQueue->first, memory_order_relaxed);
|
||||||
|| samplePoint.x > clip.z
|
device mg_queue_elt* elt = 0;
|
||||||
|| samplePoint.y < clip.y
|
|
||||||
|| samplePoint.y > clip.w)
|
for(int eltIndex = firstEltIndex; eltIndex >= 0; eltIndex = elt->next)
|
||||||
{
|
{
|
||||||
continue;
|
elt = (device mg_queue_elt*)(arenaBuffer + eltIndex);
|
||||||
}
|
const device mg_triangle_data* triangle = &triangleArray[elt->triangleIndex];
|
||||||
|
|
||||||
int w0 = cw*orient2d(p1, p2, samplePoint);
|
int2 p0 = triangle->p0;
|
||||||
int w1 = cw*orient2d(p2, p0, samplePoint);
|
int2 p1 = triangle->p1;
|
||||||
int w2 = cw*orient2d(p0, p1, samplePoint);
|
int2 p2 = triangle->p2;
|
||||||
|
|
||||||
if((w0+bias0) >= 0 && (w1+bias1) >= 0 && (w2+bias2) >= 0)
|
int cw = triangle->cw;
|
||||||
{
|
|
||||||
float4 cubic = (cubic0*w0 + cubic1*w1 + cubic2*w2)/(w0+w1+w2);
|
|
||||||
|
|
||||||
float eps = 0.0001;
|
int bias0 = triangle->bias0;
|
||||||
if(cubic.w*(cubic.x*cubic.x*cubic.x - cubic.y*cubic.z) <= eps)
|
int bias1 = triangle->bias1;
|
||||||
|
int bias2 = triangle->bias2;
|
||||||
|
|
||||||
|
float4 cubic0 = triangle->cubic0;
|
||||||
|
float4 cubic1 = triangle->cubic1;
|
||||||
|
float4 cubic2 = triangle->cubic2;
|
||||||
|
|
||||||
|
int shapeIndex = triangle->shapeIndex;
|
||||||
|
float4 color = triangle->color;
|
||||||
|
color.rgb *= color.a;
|
||||||
|
|
||||||
|
int4 clip = triangle->box;
|
||||||
|
|
||||||
|
matrix_float3x3 uvTransform = triangle->uvTransform;
|
||||||
|
|
||||||
|
for(int sampleIndex = 0; sampleIndex < sampleCount; sampleIndex++)
|
||||||
{
|
{
|
||||||
if(shapeIndex == currentShapeIndex[sampleIndex])
|
int2 samplePoint = samplePoints[sampleIndex];
|
||||||
|
|
||||||
|
if( samplePoint.x < clip.x
|
||||||
|
|| samplePoint.x > clip.z
|
||||||
|
|| samplePoint.y < clip.y
|
||||||
|
|| samplePoint.y > clip.w)
|
||||||
{
|
{
|
||||||
flipCount[sampleIndex]++;
|
continue;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
|
int w0 = cw*orient2d(p1, p2, samplePoint);
|
||||||
|
int w1 = cw*orient2d(p2, p0, samplePoint);
|
||||||
|
int w2 = cw*orient2d(p0, p1, samplePoint);
|
||||||
|
|
||||||
|
if((w0+bias0) >= 0 && (w1+bias1) >= 0 && (w2+bias2) >= 0)
|
||||||
{
|
{
|
||||||
if(flipCount[sampleIndex] & 0x01)
|
float4 cubic = (cubic0*w0 + cubic1*w1 + cubic2*w2)/(w0+w1+w2);
|
||||||
|
|
||||||
|
float eps = 0.0001;
|
||||||
|
if(cubic.w*(cubic.x*cubic.x*cubic.x - cubic.y*cubic.z) <= eps)
|
||||||
{
|
{
|
||||||
sampleColor[sampleIndex] = currentColor[sampleIndex];
|
if(shapeIndex == currentShapeIndex[sampleIndex])
|
||||||
|
{
|
||||||
|
flipCount[sampleIndex]++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if(flipCount[sampleIndex] & 0x01)
|
||||||
|
{
|
||||||
|
sampleColor[sampleIndex] = currentColor[sampleIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
float4 nextColor = color;
|
||||||
|
|
||||||
|
if(useTexture[0])
|
||||||
|
{
|
||||||
|
float3 sampleFP = float3(float2(samplePoint).xy/(subPixelFactor*2.), 1);
|
||||||
|
float2 uv = (uvTransform * sampleFP).xy;
|
||||||
|
|
||||||
|
constexpr sampler smp(mip_filter::nearest, mag_filter::linear, min_filter::linear);
|
||||||
|
float4 texColor = texAtlas.sample(smp, uv);
|
||||||
|
|
||||||
|
texColor.rgb *= texColor.a;
|
||||||
|
nextColor *= texColor;
|
||||||
|
}
|
||||||
|
|
||||||
|
currentColor[sampleIndex] = sampleColor[sampleIndex]*(1.-nextColor.a) + nextColor;
|
||||||
|
currentShapeIndex[sampleIndex] = shapeIndex;
|
||||||
|
flipCount[sampleIndex] = 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
float4 nextColor = color;
|
|
||||||
|
|
||||||
if(useTexture[0])
|
|
||||||
{
|
|
||||||
float3 sampleFP = float3(float2(samplePoint).xy/(subPixelFactor*2.), 1);
|
|
||||||
float2 uv = (uvTransform * sampleFP).xy;
|
|
||||||
|
|
||||||
constexpr sampler smp(mip_filter::nearest, mag_filter::linear, min_filter::linear);
|
|
||||||
float4 texColor = texAtlas.sample(smp, uv);
|
|
||||||
|
|
||||||
texColor.rgb *= texColor.a;
|
|
||||||
nextColor *= texColor;
|
|
||||||
}
|
|
||||||
|
|
||||||
currentColor[sampleIndex] = sampleColor[sampleIndex]*(1.-nextColor.a) + nextColor;
|
|
||||||
currentShapeIndex[sampleIndex] = shapeIndex;
|
|
||||||
flipCount[sampleIndex] = 1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
float4 pixelColor = float4(0);
|
float4 pixelColor = float4(0);
|
||||||
for(int sampleIndex = 0; sampleIndex < sampleCount; sampleIndex++)
|
for(int sampleIndex = 0; sampleIndex < sampleCount; sampleIndex++)
|
||||||
|
@ -353,4 +341,5 @@ kernel void RenderKernel(const device uint* tileCounters [[buffer(0)]],
|
||||||
}
|
}
|
||||||
|
|
||||||
outTexture.write(pixelColor/float(sampleCount), gid);
|
outTexture.write(pixelColor/float(sampleCount), gid);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue