From 782b7f54ae8ff47b65a9a8bce7de29a67842f329 Mon Sep 17 00:00:00 2001 From: martinfouilleul Date: Fri, 14 Jul 2023 18:58:18 +0200 Subject: [PATCH] [win32, canvas] only dispatch raster shader for tiles touched by paths --- examples/canvas/main.c | 5 ++ src/gl_canvas.c | 33 ++++++++- src/glsl_shaders/common.glsl | 20 +++++- src/glsl_shaders/merge.glsl | 100 +++++++++++++++++++------- src/glsl_shaders/raster.glsl | 135 ++++++++++++++--------------------- 5 files changed, 182 insertions(+), 111 deletions(-) diff --git a/examples/canvas/main.c b/examples/canvas/main.c index a0cbf81..eb5c0c9 100644 --- a/examples/canvas/main.c +++ b/examples/canvas/main.c @@ -63,6 +63,11 @@ int main() //NOTE: create surface mg_surface surface = mg_surface_create_for_window(window, MG_CANVAS); + if(mg_surface_is_nil(surface)) + { + printf("Error: couldn't create surface\n"); + return(-1); + } mg_surface_swap_interval(surface, 0); mg_canvas canvas = mg_canvas_create(); diff --git a/src/gl_canvas.c b/src/gl_canvas.c index 93c02b0..6dd08d3 100644 --- a/src/gl_canvas.c +++ b/src/gl_canvas.c @@ -53,6 +53,14 @@ enum { LAYOUT_PATH_ELT_SIZE = sizeof(mg_gl_path_elt), }; + +typedef struct mg_gl_dispatch_indirect_command +{ + u32 num_groups_x; + u32 num_groups_y; + u32 num_groups_z; + +} mg_gl_dispatch_indirect_command; //////////////////////////////////////////////////////////// //NOTE: these are just here for the sizes... @@ -96,6 +104,11 @@ typedef struct mg_gl_tile_queue } mg_gl_tile_queue; +typedef struct mg_gl_screen_tile +{ + u32 tileCoord[2]; + i32 first; +} mg_gl_screen_tile; //////////////////////////////////////////////////////////// enum { @@ -140,6 +153,7 @@ typedef struct mg_gl_canvas_backend GLuint tileOpBuffer; GLuint tileOpCountBuffer; GLuint screenTilesBuffer; + GLuint rasterDispatchBuffer; GLuint dummyVertexBuffer; @@ -958,6 +972,10 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, glBindBuffer(GL_SHADER_STORAGE_BUFFER, elementBuffer); glFlushMappedBufferRange(GL_SHADER_STORAGE_BUFFER, elementBufferOffset, eltCount*sizeof(mg_gl_path_elt)); + //NOTE: clear out texture + u8 clearColor[4] = {0}; + glClearTexImage(backend->outTexture, 0, GL_RGBA, GL_BYTE, clearColor); + //NOTE: clear counters int zero = 0; glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->segmentCountBuffer); @@ -969,6 +987,9 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->tileOpCountBuffer); glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(int), &zero, GL_DYNAMIC_COPY); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->rasterDispatchBuffer); + glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(mg_gl_dispatch_indirect_command), &zero, GL_DYNAMIC_COPY); + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); //NOTE: path setup pass @@ -1048,6 +1069,7 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->tileOpCountBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, backend->tileOpBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, backend->screenTilesBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, backend->rasterDispatchBuffer); glUniform1i(0, tileSize); glUniform1f(1, scale); @@ -1090,7 +1112,9 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, { glUniform1ui(2, 0); } - glDispatchCompute(nTilesX, nTilesY, 1); + + glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, backend->rasterDispatchBuffer); + glDispatchComputeIndirect(0); //NOTE: blit pass glUseProgram(backend->blit); @@ -1526,7 +1550,12 @@ mg_canvas_backend* gl_canvas_backend_create(mg_wgl_surface* surface) glGenBuffers(1, &backend->screenTilesBuffer); glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->screenTilesBuffer); - glBufferData(GL_SHADER_STORAGE_BUFFER, nTilesX*nTilesY*sizeof(int), 0, GL_DYNAMIC_COPY); + glBufferData(GL_SHADER_STORAGE_BUFFER, nTilesX*nTilesY*sizeof(mg_gl_screen_tile), 0, GL_DYNAMIC_COPY); + + glGenBuffers(1, &backend->rasterDispatchBuffer); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->rasterDispatchBuffer); + glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(mg_gl_dispatch_indirect_command), 0, GL_DYNAMIC_COPY); + if(err) { diff --git a/src/glsl_shaders/common.glsl b/src/glsl_shaders/common.glsl index 677718f..3dd72c0 100644 --- a/src/glsl_shaders/common.glsl +++ b/src/glsl_shaders/common.glsl @@ -17,8 +17,11 @@ layout(std430) buffer; #define MG_GL_TR 4 /* curve on top right */ // Operations -#define MG_GL_OP_START 0 -#define MG_GL_OP_SEGMENT 1 +#define MG_GL_OP_FILL 0 +#define MG_GL_OP_CLIP_FILL 1 +#define MG_GL_OP_START 2 +#define MG_GL_OP_END 3 +#define MG_GL_OP_SEGMENT 4 // MSAA #define MG_GL_MAX_SAMPLE_COUNT 8 @@ -73,6 +76,19 @@ struct mg_gl_tile_queue int last; }; +struct mg_gl_screen_tile +{ + uvec2 tileCoord; + int first; +}; + +struct mg_gl_dispatch_indirect_command +{ + uint num_groups_x; + uint num_groups_y; + uint num_groups_z; +}; + float ccw(vec2 a, vec2 b, vec2 c) { return((b.x-a.x)*(c.y-a.y) - (b.y-a.y)*(c.x-a.x)); diff --git a/src/glsl_shaders/merge.glsl b/src/glsl_shaders/merge.glsl index 68302f4..61dd50c 100644 --- a/src/glsl_shaders/merge.glsl +++ b/src/glsl_shaders/merge.glsl @@ -31,9 +31,15 @@ layout(binding = 4) restrict buffer tileOpBufferSSBO layout(binding = 5) restrict writeonly buffer screenTilesBufferSSBO { - int elements[]; + mg_gl_screen_tile elements[]; } screenTilesBuffer; +layout(binding = 6) coherent restrict buffer dispatchBufferSSBO +{ + mg_gl_dispatch_indirect_command elements[]; +} dispatchBuffer; + + layout(location = 0) uniform int tileSize; layout(location = 1) uniform float scale; layout(location = 2) uniform int pathCount; @@ -41,13 +47,14 @@ layout(location = 3) uniform int cullSolidTiles; void main() { - ivec2 nTiles = ivec2(gl_NumWorkGroups.xy); ivec2 tileCoord = ivec2(gl_WorkGroupID.xy); - int tileIndex = tileCoord.y * nTiles.x + tileCoord.x; + int tileIndex = -1; - screenTilesBuffer.elements[tileIndex] = -1; int lastOpIndex = -1; + dispatchBuffer.elements[0].num_groups_y = 1; + dispatchBuffer.elements[0].num_groups_z = 1; + for(int pathIndex = 0; pathIndex < pathCount; pathIndex++) { mg_gl_path_queue pathQueue = pathQueueBuffer.elements[pathIndex]; @@ -65,13 +72,32 @@ void main() && pathTileCoord.y >= 0 && pathTileCoord.y < pathQueue.area.w) { + if(tileIndex < 0) + { + tileIndex = int(atomicAdd(dispatchBuffer.elements[0].num_groups_x, 1)); + screenTilesBuffer.elements[tileIndex].tileCoord = uvec2(tileCoord); + screenTilesBuffer.elements[tileIndex].first = -1; + } + int pathTileIndex = pathQueue.tileQueues + pathTileCoord.y * pathQueue.area.z + pathTileCoord.x; mg_gl_tile_queue tileQueue = tileQueueBuffer.elements[pathTileIndex]; int windingOffset = tileQueue.windingOffset; int firstOpIndex = tileQueue.first; - if(firstOpIndex == -1) + vec4 tileBox = vec4(tileCoord.x, tileCoord.y, tileCoord.x+1, tileCoord.y+1); + tileBox *= tileSize; + vec4 clip = pathBuffer.elements[pathIndex].clip * scale; + + if( tileBox.x >= clip.z + || tileBox.z < clip.x + || tileBox.y >= clip.w + || tileBox.w < clip.y) + { + //NOTE: tile is fully outside clip, cull it + //TODO: move that test up + } + else if(firstOpIndex == -1) { if((windingOffset & 1) != 0) { @@ -79,29 +105,33 @@ void main() // Additionally if color is opaque and tile is fully inside clip, trim tile list. int pathOpIndex = atomicAdd(tileOpCountBuffer.elements[0], 1); - tileOpBuffer.elements[pathOpIndex].kind = MG_GL_OP_START; + tileOpBuffer.elements[pathOpIndex].kind = MG_GL_OP_CLIP_FILL; tileOpBuffer.elements[pathOpIndex].next = -1; tileOpBuffer.elements[pathOpIndex].index = pathIndex; tileOpBuffer.elements[pathOpIndex].windingOffsetOrCrossRight = windingOffset; - vec4 clip = pathBuffer.elements[pathIndex].clip * scale; - vec4 tileBox = vec4(tileCoord.x, tileCoord.y, tileCoord.x+1, tileCoord.y+1); - tileBox *= tileSize; - - if( lastOpIndex < 0 - ||(pathBuffer.elements[pathIndex].color.a == 1 - && cullSolidTiles != 0 - && tileBox.x >= clip.x - && tileBox.z < clip.z - && tileBox.y >= clip.y - && tileBox.w < clip.w)) + if(lastOpIndex < 0) { - screenTilesBuffer.elements[tileIndex] = pathOpIndex; + screenTilesBuffer.elements[tileIndex].first = pathOpIndex; } else { tileOpBuffer.elements[lastOpIndex].next = pathOpIndex; } + + if( tileBox.x >= clip.x + && tileBox.z < clip.z + && tileBox.y >= clip.y + && tileBox.w < clip.w) + { + tileOpBuffer.elements[pathOpIndex].kind = MG_GL_OP_FILL; + + if( pathBuffer.elements[pathIndex].color.a == 1 + && cullSolidTiles != 0) + { + screenTilesBuffer.elements[tileIndex].first = pathOpIndex; + } + } lastOpIndex = pathOpIndex; } // else, tile is fully uncovered, skip path @@ -109,26 +139,44 @@ void main() else { //NOTE: add path start op (with winding offset) - int pathOpIndex = atomicAdd(tileOpCountBuffer.elements[0], 1); + int startOpIndex = atomicAdd(tileOpCountBuffer.elements[0], 1); - tileOpBuffer.elements[pathOpIndex].kind = MG_GL_OP_START; - tileOpBuffer.elements[pathOpIndex].next = -1; - tileOpBuffer.elements[pathOpIndex].index = pathIndex; - tileOpBuffer.elements[pathOpIndex].windingOffsetOrCrossRight = windingOffset; + tileOpBuffer.elements[startOpIndex].kind = MG_GL_OP_START; + tileOpBuffer.elements[startOpIndex].next = -1; + tileOpBuffer.elements[startOpIndex].index = pathIndex; + tileOpBuffer.elements[startOpIndex].windingOffsetOrCrossRight = windingOffset; if(lastOpIndex < 0) { - screenTilesBuffer.elements[tileIndex] = pathOpIndex; + screenTilesBuffer.elements[tileIndex].first = startOpIndex; } else { - tileOpBuffer.elements[lastOpIndex].next = pathOpIndex; + tileOpBuffer.elements[lastOpIndex].next = startOpIndex; } - lastOpIndex = pathOpIndex; + lastOpIndex = startOpIndex; //NOTE: chain remaining path ops to end of tile list tileOpBuffer.elements[lastOpIndex].next = firstOpIndex; lastOpIndex = tileQueue.last; + + //NOTE: add path end op + int endOpIndex = atomicAdd(tileOpCountBuffer.elements[0], 1); + + tileOpBuffer.elements[endOpIndex].kind = MG_GL_OP_END; + tileOpBuffer.elements[endOpIndex].next = -1; + tileOpBuffer.elements[endOpIndex].index = pathIndex; + tileOpBuffer.elements[endOpIndex].windingOffsetOrCrossRight = windingOffset; + + if(lastOpIndex < 0) + { + screenTilesBuffer.elements[tileIndex].first = endOpIndex; + } + else + { + tileOpBuffer.elements[lastOpIndex].next = endOpIndex; + } + lastOpIndex = endOpIndex; } } } diff --git a/src/glsl_shaders/raster.glsl b/src/glsl_shaders/raster.glsl index 2406159..8acaa15 100644 --- a/src/glsl_shaders/raster.glsl +++ b/src/glsl_shaders/raster.glsl @@ -21,7 +21,7 @@ layout(binding = 2) restrict readonly buffer tileOpBufferSSBO layout(binding = 3) restrict readonly buffer screenTilesBufferSSBO { - int elements[]; + mg_gl_screen_tile elements[]; } screenTilesBuffer; layout(location = 0) uniform float scale; @@ -33,11 +33,10 @@ layout(binding = 1) uniform sampler2D srcTexture; void main() { - uvec2 nTiles = gl_NumWorkGroups.xy; - uvec2 tileCoord = gl_WorkGroupID.xy; - uint tileIndex = tileCoord.y * nTiles.x + tileCoord.x; + uint tileIndex = gl_WorkGroupID.x; + uvec2 tileCoord = screenTilesBuffer.elements[tileIndex].tileCoord; + ivec2 pixelCoord = ivec2(tileCoord * gl_WorkGroupSize.x + gl_LocalInvocationID.xy); - ivec2 pixelCoord = ivec2(gl_WorkGroupID.xy*uvec2(16, 16) + gl_LocalInvocationID.xy); vec2 centerCoord = vec2(pixelCoord) + vec2(0.5, 0.5); /* @@ -47,7 +46,6 @@ void main() return; } */ - vec2 sampleCoords[MG_GL_MAX_SAMPLE_COUNT] = { centerCoord + vec2(1, 3)/16, centerCoord + vec2(-1, -3)/16, @@ -83,57 +81,18 @@ void main() } int pathIndex = 0; - int opIndex = screenTilesBuffer.elements[tileIndex]; + int opIndex = screenTilesBuffer.elements[tileIndex].first; while(opIndex >= 0) { mg_gl_tile_op op = tileOpBuffer.elements[opIndex]; - opIndex = op.next; if(op.kind == MG_GL_OP_START) { - vec4 clip = pathBuffer.elements[pathIndex].clip * scale; - vec4 nextColor = pathBuffer.elements[pathIndex].color; - nextColor.rgb *= nextColor.a; - - if(useTexture != 0) - { - vec4 texColor = vec4(0); - for(int sampleIndex = 0; sampleIndex= clip.x - && sampleCoord.x < clip.z - && sampleCoord.y >= clip.y - && sampleCoord.y < clip.w) - { - bool filled = (pathBuffer.elements[pathIndex].cmd == MG_GL_FILL && ((winding[sampleIndex] & 1) != 0)) - ||(pathBuffer.elements[pathIndex].cmd == MG_GL_STROKE && (winding[sampleIndex] != 0)); - if(filled) - { - coverage++; - } - } winding[sampleIndex] = op.windingOffsetOrCrossRight; } - coverage /= sampleCount; - color = coverage*(color*(1-nextColor.a) + nextColor) + (1.-coverage)*color; - pathIndex = op.index; } else if(op.kind == MG_GL_OP_SEGMENT) { @@ -166,48 +125,62 @@ void main() } } } - } - - vec4 clip = pathBuffer.elements[pathIndex].clip * scale; - - vec4 nextColor = pathBuffer.elements[pathIndex].color; - nextColor.rgb *= nextColor.a; - - if(useTexture != 0) - { - vec4 texColor = vec4(0); - for(int sampleIndex = 0; sampleIndex= clip.x - && sampleCoord.x < clip.z - && sampleCoord.y >= clip.y - && sampleCoord.y < clip.w) - { - bool filled = (pathBuffer.elements[pathIndex].cmd == MG_GL_FILL && ((winding[sampleIndex] & 1) != 0)) - ||(pathBuffer.elements[pathIndex].cmd == MG_GL_STROKE && (winding[sampleIndex] != 0)); - if(filled) + if(useTexture != 0) { - coverage++; + vec4 texColor = vec4(0); + for(int sampleIndex = 0; sampleIndex= clip.x + && sampleCoord.x < clip.z + && sampleCoord.y >= clip.y + && sampleCoord.y < clip.w) + { + bool filled = op.kind == MG_GL_OP_CLIP_FILL + ||(pathBuffer.elements[pathIndex].cmd == MG_GL_FILL && ((winding[sampleIndex] & 1) != 0)) + ||(pathBuffer.elements[pathIndex].cmd == MG_GL_STROKE && (winding[sampleIndex] != 0)); + if(filled) + { + coverage++; + } + } + winding[sampleIndex] = op.windingOffsetOrCrossRight; + } + coverage /= sampleCount; + color = coverage*(color*(1-nextColor.a) + nextColor) + (1.-coverage)*color; } } + opIndex = op.next; } - coverage /= sampleCount; - color = coverage*(color*(1-nextColor.a) + nextColor) + (1.-coverage)*color; imageStore(outTexture, pixelCoord, color); }