From 7628138cee3f55b0b6c5d426a9e7f567b118ed93 Mon Sep 17 00:00:00 2001 From: martinfouilleul Date: Fri, 30 Jun 2023 16:53:36 +0200 Subject: [PATCH] [wip, win32, canvas] tiling in per-path tile queues, then merge (no backprop for now) --- src/gl_canvas.c | 41 +++++------ src/glsl_shaders/common.glsl | 2 +- src/glsl_shaders/merge.glsl | 109 ++++++++++++++++++++++++++-- src/glsl_shaders/path_setup.glsl | 53 ++++++++++++-- src/glsl_shaders/raster.glsl | 75 ++++++++++++------- src/glsl_shaders/segment_setup.glsl | 32 ++++---- 6 files changed, 233 insertions(+), 79 deletions(-) diff --git a/src/gl_canvas.c b/src/gl_canvas.c index 17f42d3..bb0c597 100644 --- a/src/gl_canvas.c +++ b/src/gl_canvas.c @@ -127,9 +127,7 @@ typedef struct mg_gl_canvas_backend GLuint pathSetup; GLuint segmentSetup; GLuint backprop; - /* GLuint merge; - */ GLuint raster; GLuint blit; @@ -245,12 +243,15 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, //NOTE: path setup pass glUseProgram(backend->pathSetup); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, backend->tileQueueBuffer); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, backend->pathBuffer, backend->pathBufferOffset, pathCount*sizeof(mg_gl_path)); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->pathQueueBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, backend->tileQueueCountBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->tileQueueBuffer); -// glUniform1i(0, tileSize); -// glUniform1f(1, scale); + glUniform1i(0, tileSize); + glUniform1f(1, scale); - glDispatchCompute(nTilesX, nTilesY, 1); + glDispatchCompute(pathCount, 1, 1); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); @@ -260,19 +261,19 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, backend->elementBuffer, backend->elementBufferOffset, eltCount*sizeof(mg_gl_path_elt)); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->segmentCountBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, backend->segmentBuffer); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->tileOpCountBuffer); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, backend->tileOpBuffer); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, backend->tileQueueBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->pathQueueBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, backend->tileQueueBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, backend->tileOpCountBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, backend->tileOpBuffer); glUniform1f(0, scale); glUniform1ui(1, tileSize); - glUniform2i(2, nTilesX, nTilesY); glDispatchCompute(eltCount, 1, 1); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); //NOTE: backprop pass - +/* glUseProgram(backend->backprop); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, backend->tileQueueBuffer); @@ -281,23 +282,24 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, glDispatchCompute(nTilesY, 1, 1); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); +*/ - /* //NOTE: merge pass glUseProgram(backend->merge); glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, backend->pathBuffer, backend->pathBufferOffset, pathCount*sizeof(mg_gl_path)); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->pathQueueBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, backend->tileQueueBuffer); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->tileOpBuffer); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, backend->tileOpCountBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->tileOpCountBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, backend->tileOpBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, backend->screenTilesBuffer); glUniform1i(0, tileSize); glUniform1f(1, scale); + glUniform1i(2, pathCount); glDispatchCompute(nTilesX, nTilesY, 1); - */ + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); //NOTE: raster pass glUseProgram(backend->raster); @@ -314,11 +316,10 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, backend->pathBuffer, backend->pathBufferOffset, pathCount*sizeof(mg_gl_path)); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->segmentCountBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, backend->segmentBuffer); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->tileQueueBuffer); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, backend->tileOpBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->tileOpBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, backend->screenTilesBuffer); -// glUniform1ui(0, tileSize); -// glUniform1f(1, scale); + glUniform1f(0, scale); int err = glGetError(); if(err) @@ -751,9 +752,7 @@ mg_canvas_backend* gl_canvas_backend_create(mg_wgl_surface* surface) err |= mg_gl_canvas_compile_compute_program(glsl_path_setup, &backend->pathSetup); err |= mg_gl_canvas_compile_compute_program(glsl_segment_setup, &backend->segmentSetup); err |= mg_gl_canvas_compile_compute_program(glsl_backprop, &backend->backprop); - /* err |= mg_gl_canvas_compile_compute_program(glsl_merge, &backend->merge); - */ err |= mg_gl_canvas_compile_compute_program(glsl_raster, &backend->raster); err |= mg_gl_canvas_compile_render_program("blit", glsl_blit_vertex, glsl_blit_fragment, &backend->blit); diff --git a/src/glsl_shaders/common.glsl b/src/glsl_shaders/common.glsl index 1b1cb53..93d35ab 100644 --- a/src/glsl_shaders/common.glsl +++ b/src/glsl_shaders/common.glsl @@ -53,7 +53,7 @@ struct mg_gl_segment struct mg_gl_path_queue { - vec4 area; + ivec4 area; int tileQueues; }; diff --git a/src/glsl_shaders/merge.glsl b/src/glsl_shaders/merge.glsl index 8d759ab..67de3e1 100644 --- a/src/glsl_shaders/merge.glsl +++ b/src/glsl_shaders/merge.glsl @@ -19,23 +19,116 @@ layout(binding = 2) restrict readonly buffer tileQueueBufferSSBO mg_gl_tile_queue elements[]; } tileQueueBuffer; -layout(binding = 3) restrict readonly buffer tileOpBufferSSBO -{ - mg_gl_tile_op elements[]; -} tileOpBuffer; - -layout(binding = 4) restrict readonly buffer tileOpCountBufferSSBO +layout(binding = 3) coherent restrict buffer tileOpCountBufferSSBO { int elements[]; } tileOpCountBuffer; -layout(binding = 5) restrict readonly buffer screenTilesBufferSSBO +layout(binding = 4) restrict buffer tileOpBufferSSBO { - mg_gl_tile_queue elements[]; + mg_gl_tile_op elements[]; +} tileOpBuffer; + +layout(binding = 5) restrict writeonly buffer screenTilesBufferSSBO +{ + int elements[]; } screenTilesBuffer; +layout(location = 0) uniform int tileSize; +layout(location = 1) uniform float scale; +layout(location = 2) uniform int pathCount; void main() { + ivec2 nTiles = ivec2(gl_NumWorkGroups.xy); + ivec2 tileCoord = ivec2(gl_WorkGroupID.xy); + int tileIndex = tileCoord.y * nTiles.x + tileCoord.x; + screenTilesBuffer.elements[tileIndex] = -1; + + int lastOpIndex = -1; + + for(int pathIndex = 0; pathIndex < pathCount; pathIndex++) + { + mg_gl_path_queue pathQueue = pathQueueBuffer.elements[pathIndex]; + ivec2 pathTileCoord = tileCoord - pathQueue.area.xy; + + vec4 pathBox = pathBuffer.elements[pathIndex].box; + vec4 pathClip = pathBuffer.elements[pathIndex].clip; + + float xMax = min(pathBox.z, pathClip.z); + int tileMax = int(xMax * scale) / tileSize; + int pathTileMax = tileMax - pathQueue.area.x; + + if( pathTileCoord.x >= 0 + && pathTileCoord.x <= pathTileMax + && pathTileCoord.y >= 0 + && pathTileCoord.y < pathQueue.area.w) + { + int pathTileIndex = pathQueue.tileQueues + pathTileCoord.y * pathQueue.area.z + pathTileCoord.x; + mg_gl_tile_queue tileQueue = tileQueueBuffer.elements[pathTileIndex]; + + int windingOffset = tileQueue.windingOffset; + int firstOpIndex = tileQueue.first; + + if(firstOpIndex == -1) + { + if((windingOffset & 1) != 0) + { + //NOTE: tile is full covered. Add path start op (with winding offset). + // Additionally if color is opaque and tile is fully inside clip, trim tile list. + int pathOpIndex = atomicAdd(tileOpCountBuffer.elements[0], 1); + + tileOpBuffer.elements[pathOpIndex].kind = MG_GL_OP_START; + tileOpBuffer.elements[pathOpIndex].next = -1; + tileOpBuffer.elements[pathOpIndex].index = pathIndex; + tileOpBuffer.elements[pathOpIndex].windingOffset = windingOffset; + + vec4 clip = pathBuffer.elements[pathIndex].clip * scale; + vec4 tileBox = vec4(tileCoord.x, tileCoord.y, tileCoord.x+1, tileCoord.y+1); + tileBox *= tileSize; + + if( lastOpIndex < 0 + ||(pathBuffer.elements[pathIndex].color.a == 1 + && tileBox.x >= clip.x + && tileBox.z < clip.z + && tileBox.y >= clip.y + && tileBox.w < clip.w)) + { + screenTilesBuffer.elements[tileIndex] = pathOpIndex; + } + else + { + tileOpBuffer.elements[lastOpIndex].next = pathOpIndex; + } + lastOpIndex = pathOpIndex; + } + // else, tile is fully uncovered, skip path + } + else + { + //NOTE: add path start op (with winding offset) + int pathOpIndex = atomicAdd(tileOpCountBuffer.elements[0], 1); + + tileOpBuffer.elements[pathOpIndex].kind = MG_GL_OP_START; + tileOpBuffer.elements[pathOpIndex].next = -1; + tileOpBuffer.elements[pathOpIndex].index = pathIndex; + tileOpBuffer.elements[pathOpIndex].windingOffset = windingOffset; + + if(lastOpIndex < 0) + { + screenTilesBuffer.elements[tileIndex] = pathOpIndex; + } + else + { + tileOpBuffer.elements[lastOpIndex].next = pathOpIndex; + } + lastOpIndex = pathOpIndex; + + //NOTE: chain remaining path ops to end of tile list + tileOpBuffer.elements[lastOpIndex].next = firstOpIndex; + lastOpIndex = tileQueue.last; + } + } + } } diff --git a/src/glsl_shaders/path_setup.glsl b/src/glsl_shaders/path_setup.glsl index 7bd3d01..6196abc 100644 --- a/src/glsl_shaders/path_setup.glsl +++ b/src/glsl_shaders/path_setup.glsl @@ -4,18 +4,57 @@ layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; precision mediump float; layout(std430) buffer; -layout(binding = 0) restrict writeonly buffer tileQueueBufferSSBO +layout(binding = 0) restrict readonly buffer pathBufferSSBO +{ + mg_gl_path elements[]; +} pathBuffer; + +layout(binding = 1) restrict writeonly buffer pathQueueBufferSSBO +{ + mg_gl_path_queue elements[]; +} pathQueueBuffer; + +layout(binding = 2) coherent restrict buffer tileQueueCountBufferSSBO +{ + int elements[]; +} tileQueueCountBuffer; + +layout(binding = 3) restrict writeonly buffer tileQueueBufferSSBO { mg_gl_tile_queue elements[]; } tileQueueBuffer; +layout(location = 0) uniform int tileSize; +layout(location = 1) uniform float scale; + void main() { - uvec2 nTiles = gl_NumWorkGroups.xy; - uvec2 tileCoord = gl_WorkGroupID.xy; - uint tileIndex = tileCoord.y * nTiles.x + tileCoord.x; + uint pathIndex = gl_WorkGroupID.x; + const mg_gl_path path = pathBuffer.elements[pathIndex]; - tileQueueBuffer.elements[tileIndex].windingOffset = 0; - tileQueueBuffer.elements[tileIndex].first = -1; - tileQueueBuffer.elements[tileIndex].last = -1; + //NOTE: we don't clip on the right, since we need those tiles to accurately compute + // the prefix sum of winding increments in the backprop pass. + vec4 clippedBox = vec4(max(path.box.x, path.clip.x), + max(path.box.y, path.clip.y), + path.box.z, + min(path.box.w, path.clip.w)); + + ivec2 firstTile = ivec2(clippedBox.xy*scale)/tileSize; + ivec2 lastTile = ivec2(clippedBox.zw*scale)/tileSize; + + int nTilesX = max(0, lastTile.x - firstTile.x + 1); + int nTilesY = max(0, lastTile.y - firstTile.y + 1); + int tileCount = nTilesX * nTilesY; + + int tileQueuesIndex = atomicAdd(tileQueueCountBuffer.elements[0], tileCount); + + pathQueueBuffer.elements[pathIndex].area = ivec4(firstTile.x, firstTile.y, nTilesX, nTilesY); + pathQueueBuffer.elements[pathIndex].tileQueues = tileQueuesIndex; + + for(int i=0; i= 0) { //imageStore(outTexture, ivec2(sampleCoord), vec4(0, 1, 0, 1)); @@ -161,7 +156,30 @@ void main() mg_gl_tile_op op = tileOpBuffer.elements[opIndex]; opIndex = op.next; - if(op.kind == MG_GL_OP_SEGMENT) + if(op.kind == MG_GL_OP_START) + { + vec4 pathColor = pathBuffer.elements[pathIndex].color; + pathColor.rgb *= pathColor.a; + + vec4 clip = pathBuffer.elements[pathIndex].clip * scale; + + if( sampleCoord.x >= clip.x + && sampleCoord.x < clip.z + && sampleCoord.y >= clip.y + && sampleCoord.y < clip.w) + { + bool filled = (pathBuffer.elements[pathIndex].cmd == MG_GL_FILL && ((winding & 1) != 0)) + ||(pathBuffer.elements[pathIndex].cmd == MG_GL_STROKE && (winding != 0)); + if(filled) + { + vec4 nextColor = pathColor; + color = color*(1-nextColor.a) + nextColor; + } + winding = op.windingOffset; + } + pathIndex = op.index; + } + else if(op.kind == MG_GL_OP_SEGMENT) { int segIndex = op.index; mg_gl_segment seg = segmentBuffer.elements[segIndex]; @@ -188,24 +206,25 @@ void main() } } } - int pathIndex = 0; -// vec4 clip = pathBuffer.elements[pathIndex].clip * scale; + vec4 pathColor = pathBuffer.elements[pathIndex].color; + pathColor.rgb *= pathColor.a; -/* if( sampleCoord.x >= clip.x + vec4 clip = pathBuffer.elements[pathIndex].clip * scale; + + if( sampleCoord.x >= clip.x && sampleCoord.x < clip.z && sampleCoord.y >= clip.y && sampleCoord.y < clip.w) -*/ { - /* - bool filled = (pathBuffer[pathIndex].cmd == MG_GL_FILL && (winding[sampleIndex] & 1)) - ||(pathBuffer[pathIndex].cmd == MG_GL_STROKE && (winding[sampleIndex] != 0)); - */ - bool filled = (winding & 1) != 0; + { + bool filled = (pathBuffer.elements[pathIndex].cmd == MG_GL_FILL && ((winding & 1) != 0)) + ||(pathBuffer.elements[pathIndex].cmd == MG_GL_STROKE && (winding != 0)); if(filled) { - // write to texture - imageStore(outTexture, ivec2(sampleCoord), vec4(1, 0, 0, 1)); + vec4 nextColor = pathColor; + color = color*(1-nextColor.a) + nextColor; } } + // write to texture + imageStore(outTexture, ivec2(sampleCoord), color); } diff --git a/src/glsl_shaders/segment_setup.glsl b/src/glsl_shaders/segment_setup.glsl index b2e45e5..6bc9007 100644 --- a/src/glsl_shaders/segment_setup.glsl +++ b/src/glsl_shaders/segment_setup.glsl @@ -19,24 +19,28 @@ layout(binding = 2) restrict buffer segmentBufferSSBO mg_gl_segment elements[]; } segmentBuffer; -layout(binding = 3) coherent restrict buffer tileOpCountBufferSSBO +layout(binding = 3) restrict buffer pathQueueBufferSSBO +{ + mg_gl_path_queue elements[]; +} pathQueueBuffer; + +layout(binding = 4) coherent restrict buffer tileQueueBufferSSBO +{ + mg_gl_tile_queue elements[]; +} tileQueueBuffer; + +layout(binding = 5) coherent restrict buffer tileOpCountBufferSSBO { int elements[]; } tileOpCountBuffer; -layout(binding = 4) restrict buffer tileOpBufferSSBO +layout(binding = 6) restrict buffer tileOpBufferSSBO { mg_gl_tile_op elements[]; } tileOpBuffer; -layout(binding = 5) coherent restrict buffer tileQueuesBufferSSBO -{ - mg_gl_tile_queue elements[]; -} tileQueuesBuffer; - layout(location = 0) uniform float scale; layout(location = 1) uniform uint tileSize; -layout(location = 2) uniform ivec2 nTiles; int push_segment(in vec2 p[4], int kind) { @@ -197,9 +201,9 @@ void bin_to_tiles(int segIndex) { //NOTE: add segment index to the queues of tiles it overlaps with const mg_gl_segment seg = segmentBuffer.elements[segIndex]; + const mg_gl_path_queue pathQueue = pathQueueBuffer.elements[seg.pathIndex]; - ivec4 pathArea = ivec4(0, 0, nTiles.x, nTiles.y); - + ivec4 pathArea = pathQueue.area; ivec4 coveredTiles = ivec4(seg.box)/int(tileSize); int xMin = max(0, coveredTiles.x - pathArea.x); int yMin = max(0, coveredTiles.y - pathArea.y); @@ -260,18 +264,18 @@ void bin_to_tiles(int segIndex) tileOpBuffer.elements[tileOpIndex].crossRight = false; tileOpBuffer.elements[tileOpIndex].next = -1; - int tileIndex = y*pathArea.z + x; + int tileQueueIndex = pathQueue.tileQueues + y*pathArea.z + x; - tileOpBuffer.elements[tileOpIndex].next = atomicExchange(tileQueuesBuffer.elements[tileIndex].first, tileOpIndex); + tileOpBuffer.elements[tileOpIndex].next = atomicExchange(tileQueueBuffer.elements[tileQueueIndex].first, tileOpIndex); if(tileOpBuffer.elements[tileOpIndex].next == -1) { - tileQueuesBuffer.elements[tileIndex].last = tileOpIndex; + tileQueueBuffer.elements[tileQueueIndex].last = tileOpIndex; } //NOTE: if the segment crosses the tile's bottom boundary, update the tile's winding offset if(crossB) { - atomicAdd(tileQueuesBuffer.elements[tileIndex].windingOffset, seg.windingIncrement); + atomicAdd(tileQueueBuffer.elements[tileQueueIndex].windingOffset, seg.windingIncrement); } //NOTE: if the segment crosses the right boundary, mark it.