[win32, canvas] only dispatch raster shader for tiles touched by paths

This commit is contained in:
martinfouilleul 2023-07-14 18:58:18 +02:00
parent 1c244c2a00
commit 782b7f54ae
5 changed files with 182 additions and 111 deletions

View File

@ -63,6 +63,11 @@ int main()
//NOTE: create surface
mg_surface surface = mg_surface_create_for_window(window, MG_CANVAS);
if(mg_surface_is_nil(surface))
{
printf("Error: couldn't create surface\n");
return(-1);
}
mg_surface_swap_interval(surface, 0);
mg_canvas canvas = mg_canvas_create();

View File

@ -53,6 +53,14 @@ enum {
LAYOUT_PATH_ELT_SIZE = sizeof(mg_gl_path_elt),
};
typedef struct mg_gl_dispatch_indirect_command
{
u32 num_groups_x;
u32 num_groups_y;
u32 num_groups_z;
} mg_gl_dispatch_indirect_command;
////////////////////////////////////////////////////////////
//NOTE: these are just here for the sizes...
@ -96,6 +104,11 @@ typedef struct mg_gl_tile_queue
} mg_gl_tile_queue;
typedef struct mg_gl_screen_tile
{
u32 tileCoord[2];
i32 first;
} mg_gl_screen_tile;
////////////////////////////////////////////////////////////
enum {
@ -140,6 +153,7 @@ typedef struct mg_gl_canvas_backend
GLuint tileOpBuffer;
GLuint tileOpCountBuffer;
GLuint screenTilesBuffer;
GLuint rasterDispatchBuffer;
GLuint dummyVertexBuffer;
@ -958,6 +972,10 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
glBindBuffer(GL_SHADER_STORAGE_BUFFER, elementBuffer);
glFlushMappedBufferRange(GL_SHADER_STORAGE_BUFFER, elementBufferOffset, eltCount*sizeof(mg_gl_path_elt));
//NOTE: clear out texture
u8 clearColor[4] = {0};
glClearTexImage(backend->outTexture, 0, GL_RGBA, GL_BYTE, clearColor);
//NOTE: clear counters
int zero = 0;
glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->segmentCountBuffer);
@ -969,6 +987,9 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->tileOpCountBuffer);
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(int), &zero, GL_DYNAMIC_COPY);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->rasterDispatchBuffer);
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(mg_gl_dispatch_indirect_command), &zero, GL_DYNAMIC_COPY);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
//NOTE: path setup pass
@ -1048,6 +1069,7 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->tileOpCountBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, backend->tileOpBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, backend->screenTilesBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, backend->rasterDispatchBuffer);
glUniform1i(0, tileSize);
glUniform1f(1, scale);
@ -1090,7 +1112,9 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
{
glUniform1ui(2, 0);
}
glDispatchCompute(nTilesX, nTilesY, 1);
glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, backend->rasterDispatchBuffer);
glDispatchComputeIndirect(0);
//NOTE: blit pass
glUseProgram(backend->blit);
@ -1526,7 +1550,12 @@ mg_canvas_backend* gl_canvas_backend_create(mg_wgl_surface* surface)
glGenBuffers(1, &backend->screenTilesBuffer);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->screenTilesBuffer);
glBufferData(GL_SHADER_STORAGE_BUFFER, nTilesX*nTilesY*sizeof(int), 0, GL_DYNAMIC_COPY);
glBufferData(GL_SHADER_STORAGE_BUFFER, nTilesX*nTilesY*sizeof(mg_gl_screen_tile), 0, GL_DYNAMIC_COPY);
glGenBuffers(1, &backend->rasterDispatchBuffer);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->rasterDispatchBuffer);
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(mg_gl_dispatch_indirect_command), 0, GL_DYNAMIC_COPY);
if(err)
{

View File

@ -17,8 +17,11 @@ layout(std430) buffer;
#define MG_GL_TR 4 /* curve on top right */
// Operations
#define MG_GL_OP_START 0
#define MG_GL_OP_SEGMENT 1
#define MG_GL_OP_FILL 0
#define MG_GL_OP_CLIP_FILL 1
#define MG_GL_OP_START 2
#define MG_GL_OP_END 3
#define MG_GL_OP_SEGMENT 4
// MSAA
#define MG_GL_MAX_SAMPLE_COUNT 8
@ -73,6 +76,19 @@ struct mg_gl_tile_queue
int last;
};
struct mg_gl_screen_tile
{
uvec2 tileCoord;
int first;
};
struct mg_gl_dispatch_indirect_command
{
uint num_groups_x;
uint num_groups_y;
uint num_groups_z;
};
float ccw(vec2 a, vec2 b, vec2 c)
{
return((b.x-a.x)*(c.y-a.y) - (b.y-a.y)*(c.x-a.x));

View File

@ -31,9 +31,15 @@ layout(binding = 4) restrict buffer tileOpBufferSSBO
layout(binding = 5) restrict writeonly buffer screenTilesBufferSSBO
{
int elements[];
mg_gl_screen_tile elements[];
} screenTilesBuffer;
layout(binding = 6) coherent restrict buffer dispatchBufferSSBO
{
mg_gl_dispatch_indirect_command elements[];
} dispatchBuffer;
layout(location = 0) uniform int tileSize;
layout(location = 1) uniform float scale;
layout(location = 2) uniform int pathCount;
@ -41,13 +47,14 @@ layout(location = 3) uniform int cullSolidTiles;
void main()
{
ivec2 nTiles = ivec2(gl_NumWorkGroups.xy);
ivec2 tileCoord = ivec2(gl_WorkGroupID.xy);
int tileIndex = tileCoord.y * nTiles.x + tileCoord.x;
int tileIndex = -1;
screenTilesBuffer.elements[tileIndex] = -1;
int lastOpIndex = -1;
dispatchBuffer.elements[0].num_groups_y = 1;
dispatchBuffer.elements[0].num_groups_z = 1;
for(int pathIndex = 0; pathIndex < pathCount; pathIndex++)
{
mg_gl_path_queue pathQueue = pathQueueBuffer.elements[pathIndex];
@ -65,13 +72,32 @@ void main()
&& pathTileCoord.y >= 0
&& pathTileCoord.y < pathQueue.area.w)
{
if(tileIndex < 0)
{
tileIndex = int(atomicAdd(dispatchBuffer.elements[0].num_groups_x, 1));
screenTilesBuffer.elements[tileIndex].tileCoord = uvec2(tileCoord);
screenTilesBuffer.elements[tileIndex].first = -1;
}
int pathTileIndex = pathQueue.tileQueues + pathTileCoord.y * pathQueue.area.z + pathTileCoord.x;
mg_gl_tile_queue tileQueue = tileQueueBuffer.elements[pathTileIndex];
int windingOffset = tileQueue.windingOffset;
int firstOpIndex = tileQueue.first;
if(firstOpIndex == -1)
vec4 tileBox = vec4(tileCoord.x, tileCoord.y, tileCoord.x+1, tileCoord.y+1);
tileBox *= tileSize;
vec4 clip = pathBuffer.elements[pathIndex].clip * scale;
if( tileBox.x >= clip.z
|| tileBox.z < clip.x
|| tileBox.y >= clip.w
|| tileBox.w < clip.y)
{
//NOTE: tile is fully outside clip, cull it
//TODO: move that test up
}
else if(firstOpIndex == -1)
{
if((windingOffset & 1) != 0)
{
@ -79,29 +105,33 @@ void main()
// Additionally if color is opaque and tile is fully inside clip, trim tile list.
int pathOpIndex = atomicAdd(tileOpCountBuffer.elements[0], 1);
tileOpBuffer.elements[pathOpIndex].kind = MG_GL_OP_START;
tileOpBuffer.elements[pathOpIndex].kind = MG_GL_OP_CLIP_FILL;
tileOpBuffer.elements[pathOpIndex].next = -1;
tileOpBuffer.elements[pathOpIndex].index = pathIndex;
tileOpBuffer.elements[pathOpIndex].windingOffsetOrCrossRight = windingOffset;
vec4 clip = pathBuffer.elements[pathIndex].clip * scale;
vec4 tileBox = vec4(tileCoord.x, tileCoord.y, tileCoord.x+1, tileCoord.y+1);
tileBox *= tileSize;
if( lastOpIndex < 0
||(pathBuffer.elements[pathIndex].color.a == 1
&& cullSolidTiles != 0
&& tileBox.x >= clip.x
&& tileBox.z < clip.z
&& tileBox.y >= clip.y
&& tileBox.w < clip.w))
if(lastOpIndex < 0)
{
screenTilesBuffer.elements[tileIndex] = pathOpIndex;
screenTilesBuffer.elements[tileIndex].first = pathOpIndex;
}
else
{
tileOpBuffer.elements[lastOpIndex].next = pathOpIndex;
}
if( tileBox.x >= clip.x
&& tileBox.z < clip.z
&& tileBox.y >= clip.y
&& tileBox.w < clip.w)
{
tileOpBuffer.elements[pathOpIndex].kind = MG_GL_OP_FILL;
if( pathBuffer.elements[pathIndex].color.a == 1
&& cullSolidTiles != 0)
{
screenTilesBuffer.elements[tileIndex].first = pathOpIndex;
}
}
lastOpIndex = pathOpIndex;
}
// else, tile is fully uncovered, skip path
@ -109,26 +139,44 @@ void main()
else
{
//NOTE: add path start op (with winding offset)
int pathOpIndex = atomicAdd(tileOpCountBuffer.elements[0], 1);
int startOpIndex = atomicAdd(tileOpCountBuffer.elements[0], 1);
tileOpBuffer.elements[pathOpIndex].kind = MG_GL_OP_START;
tileOpBuffer.elements[pathOpIndex].next = -1;
tileOpBuffer.elements[pathOpIndex].index = pathIndex;
tileOpBuffer.elements[pathOpIndex].windingOffsetOrCrossRight = windingOffset;
tileOpBuffer.elements[startOpIndex].kind = MG_GL_OP_START;
tileOpBuffer.elements[startOpIndex].next = -1;
tileOpBuffer.elements[startOpIndex].index = pathIndex;
tileOpBuffer.elements[startOpIndex].windingOffsetOrCrossRight = windingOffset;
if(lastOpIndex < 0)
{
screenTilesBuffer.elements[tileIndex] = pathOpIndex;
screenTilesBuffer.elements[tileIndex].first = startOpIndex;
}
else
{
tileOpBuffer.elements[lastOpIndex].next = pathOpIndex;
tileOpBuffer.elements[lastOpIndex].next = startOpIndex;
}
lastOpIndex = pathOpIndex;
lastOpIndex = startOpIndex;
//NOTE: chain remaining path ops to end of tile list
tileOpBuffer.elements[lastOpIndex].next = firstOpIndex;
lastOpIndex = tileQueue.last;
//NOTE: add path end op
int endOpIndex = atomicAdd(tileOpCountBuffer.elements[0], 1);
tileOpBuffer.elements[endOpIndex].kind = MG_GL_OP_END;
tileOpBuffer.elements[endOpIndex].next = -1;
tileOpBuffer.elements[endOpIndex].index = pathIndex;
tileOpBuffer.elements[endOpIndex].windingOffsetOrCrossRight = windingOffset;
if(lastOpIndex < 0)
{
screenTilesBuffer.elements[tileIndex].first = endOpIndex;
}
else
{
tileOpBuffer.elements[lastOpIndex].next = endOpIndex;
}
lastOpIndex = endOpIndex;
}
}
}

View File

@ -21,7 +21,7 @@ layout(binding = 2) restrict readonly buffer tileOpBufferSSBO
layout(binding = 3) restrict readonly buffer screenTilesBufferSSBO
{
int elements[];
mg_gl_screen_tile elements[];
} screenTilesBuffer;
layout(location = 0) uniform float scale;
@ -33,11 +33,10 @@ layout(binding = 1) uniform sampler2D srcTexture;
void main()
{
uvec2 nTiles = gl_NumWorkGroups.xy;
uvec2 tileCoord = gl_WorkGroupID.xy;
uint tileIndex = tileCoord.y * nTiles.x + tileCoord.x;
uint tileIndex = gl_WorkGroupID.x;
uvec2 tileCoord = screenTilesBuffer.elements[tileIndex].tileCoord;
ivec2 pixelCoord = ivec2(tileCoord * gl_WorkGroupSize.x + gl_LocalInvocationID.xy);
ivec2 pixelCoord = ivec2(gl_WorkGroupID.xy*uvec2(16, 16) + gl_LocalInvocationID.xy);
vec2 centerCoord = vec2(pixelCoord) + vec2(0.5, 0.5);
/*
@ -47,7 +46,6 @@ void main()
return;
}
*/
vec2 sampleCoords[MG_GL_MAX_SAMPLE_COUNT] = {
centerCoord + vec2(1, 3)/16,
centerCoord + vec2(-1, -3)/16,
@ -83,57 +81,18 @@ void main()
}
int pathIndex = 0;
int opIndex = screenTilesBuffer.elements[tileIndex];
int opIndex = screenTilesBuffer.elements[tileIndex].first;
while(opIndex >= 0)
{
mg_gl_tile_op op = tileOpBuffer.elements[opIndex];
opIndex = op.next;
if(op.kind == MG_GL_OP_START)
{
vec4 clip = pathBuffer.elements[pathIndex].clip * scale;
vec4 nextColor = pathBuffer.elements[pathIndex].color;
nextColor.rgb *= nextColor.a;
if(useTexture != 0)
{
vec4 texColor = vec4(0);
for(int sampleIndex = 0; sampleIndex<srcSampleCount; sampleIndex++)
{
vec2 sampleCoord = imgSampleCoords[sampleIndex];
vec3 ph = vec3(sampleCoord.xy, 1);
vec2 uv = (pathBuffer.elements[pathIndex].uvTransform * ph).xy;
texColor += texture(srcTexture, uv);
}
texColor /= srcSampleCount;
texColor.rgb *= texColor.a;
nextColor *= texColor;
}
float coverage = 0;
for(int sampleIndex = 0; sampleIndex<sampleCount; sampleIndex++)
{
vec2 sampleCoord = sampleCoords[sampleIndex];
if( sampleCoord.x >= clip.x
&& sampleCoord.x < clip.z
&& sampleCoord.y >= clip.y
&& sampleCoord.y < clip.w)
{
bool filled = (pathBuffer.elements[pathIndex].cmd == MG_GL_FILL && ((winding[sampleIndex] & 1) != 0))
||(pathBuffer.elements[pathIndex].cmd == MG_GL_STROKE && (winding[sampleIndex] != 0));
if(filled)
{
coverage++;
}
}
winding[sampleIndex] = op.windingOffsetOrCrossRight;
}
coverage /= sampleCount;
color = coverage*(color*(1-nextColor.a) + nextColor) + (1.-coverage)*color;
pathIndex = op.index;
}
else if(op.kind == MG_GL_OP_SEGMENT)
{
@ -166,48 +125,62 @@ void main()
}
}
}
}
vec4 clip = pathBuffer.elements[pathIndex].clip * scale;
vec4 nextColor = pathBuffer.elements[pathIndex].color;
nextColor.rgb *= nextColor.a;
if(useTexture != 0)
{
vec4 texColor = vec4(0);
for(int sampleIndex = 0; sampleIndex<srcSampleCount; sampleIndex++)
else
{
vec2 sampleCoord = imgSampleCoords[sampleIndex];
vec3 ph = vec3(sampleCoord.xy, 1);
vec2 uv = (pathBuffer.elements[pathIndex].uvTransform * ph).xy;
texColor += texture(srcTexture, uv);
}
texColor /= srcSampleCount;
texColor.rgb *= texColor.a;
nextColor *= texColor;
}
int pathIndex = op.index;
float coverage = 0;
for(int sampleIndex=0; sampleIndex<sampleCount; sampleIndex++)
{
vec2 sampleCoord = sampleCoords[sampleIndex];
vec4 nextColor = pathBuffer.elements[pathIndex].color;
nextColor.rgb *= nextColor.a;
if( sampleCoord.x >= clip.x
&& sampleCoord.x < clip.z
&& sampleCoord.y >= clip.y
&& sampleCoord.y < clip.w)
{
bool filled = (pathBuffer.elements[pathIndex].cmd == MG_GL_FILL && ((winding[sampleIndex] & 1) != 0))
||(pathBuffer.elements[pathIndex].cmd == MG_GL_STROKE && (winding[sampleIndex] != 0));
if(filled)
if(useTexture != 0)
{
coverage++;
vec4 texColor = vec4(0);
for(int sampleIndex = 0; sampleIndex<srcSampleCount; sampleIndex++)
{
vec2 sampleCoord = imgSampleCoords[sampleIndex];
vec3 ph = vec3(sampleCoord.xy, 1);
vec2 uv = (pathBuffer.elements[pathIndex].uvTransform * ph).xy;
texColor += texture(srcTexture, uv);
}
texColor /= srcSampleCount;
texColor.rgb *= texColor.a;
nextColor *= texColor;
}
if(op.kind == MG_GL_OP_FILL)
{
color = color*(1-nextColor.a) + nextColor;
}
else
{
vec4 clip = pathBuffer.elements[pathIndex].clip * scale;
float coverage = 0;
for(int sampleIndex = 0; sampleIndex<sampleCount; sampleIndex++)
{
vec2 sampleCoord = sampleCoords[sampleIndex];
if( sampleCoord.x >= clip.x
&& sampleCoord.x < clip.z
&& sampleCoord.y >= clip.y
&& sampleCoord.y < clip.w)
{
bool filled = op.kind == MG_GL_OP_CLIP_FILL
||(pathBuffer.elements[pathIndex].cmd == MG_GL_FILL && ((winding[sampleIndex] & 1) != 0))
||(pathBuffer.elements[pathIndex].cmd == MG_GL_STROKE && (winding[sampleIndex] != 0));
if(filled)
{
coverage++;
}
}
winding[sampleIndex] = op.windingOffsetOrCrossRight;
}
coverage /= sampleCount;
color = coverage*(color*(1-nextColor.a) + nextColor) + (1.-coverage)*color;
}
}
opIndex = op.next;
}
coverage /= sampleCount;
color = coverage*(color*(1-nextColor.a) + nextColor) + (1.-coverage)*color;
imageStore(outTexture, pixelCoord, color);
}