[wip, win32, canvas] tiling in per-path tile queues, then merge (no backprop for now)

This commit is contained in:
martinfouilleul 2023-06-30 16:53:36 +02:00
parent f0b7cf32a9
commit 7628138cee
6 changed files with 233 additions and 79 deletions

View File

@ -127,9 +127,7 @@ typedef struct mg_gl_canvas_backend
GLuint pathSetup;
GLuint segmentSetup;
GLuint backprop;
/*
GLuint merge;
*/
GLuint raster;
GLuint blit;
@ -245,12 +243,15 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
//NOTE: path setup pass
glUseProgram(backend->pathSetup);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, backend->tileQueueBuffer);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, backend->pathBuffer, backend->pathBufferOffset, pathCount*sizeof(mg_gl_path));
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->pathQueueBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, backend->tileQueueCountBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->tileQueueBuffer);
// glUniform1i(0, tileSize);
// glUniform1f(1, scale);
glUniform1i(0, tileSize);
glUniform1f(1, scale);
glDispatchCompute(nTilesX, nTilesY, 1);
glDispatchCompute(pathCount, 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
@ -260,19 +261,19 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, backend->elementBuffer, backend->elementBufferOffset, eltCount*sizeof(mg_gl_path_elt));
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->segmentCountBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, backend->segmentBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->tileOpCountBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, backend->tileOpBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, backend->tileQueueBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->pathQueueBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, backend->tileQueueBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, backend->tileOpCountBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, backend->tileOpBuffer);
glUniform1f(0, scale);
glUniform1ui(1, tileSize);
glUniform2i(2, nTilesX, nTilesY);
glDispatchCompute(eltCount, 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
//NOTE: backprop pass
/*
glUseProgram(backend->backprop);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, backend->tileQueueBuffer);
@ -281,23 +282,24 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
glDispatchCompute(nTilesY, 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
*/
/*
//NOTE: merge pass
glUseProgram(backend->merge);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, backend->pathBuffer, backend->pathBufferOffset, pathCount*sizeof(mg_gl_path));
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->pathQueueBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, backend->tileQueueBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->tileOpBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, backend->tileOpCountBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->tileOpCountBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, backend->tileOpBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, backend->screenTilesBuffer);
glUniform1i(0, tileSize);
glUniform1f(1, scale);
glUniform1i(2, pathCount);
glDispatchCompute(nTilesX, nTilesY, 1);
*/
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
//NOTE: raster pass
glUseProgram(backend->raster);
@ -314,11 +316,10 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, backend->pathBuffer, backend->pathBufferOffset, pathCount*sizeof(mg_gl_path));
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->segmentCountBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, backend->segmentBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->tileQueueBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, backend->tileOpBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->tileOpBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, backend->screenTilesBuffer);
// glUniform1ui(0, tileSize);
// glUniform1f(1, scale);
glUniform1f(0, scale);
int err = glGetError();
if(err)
@ -751,9 +752,7 @@ mg_canvas_backend* gl_canvas_backend_create(mg_wgl_surface* surface)
err |= mg_gl_canvas_compile_compute_program(glsl_path_setup, &backend->pathSetup);
err |= mg_gl_canvas_compile_compute_program(glsl_segment_setup, &backend->segmentSetup);
err |= mg_gl_canvas_compile_compute_program(glsl_backprop, &backend->backprop);
/*
err |= mg_gl_canvas_compile_compute_program(glsl_merge, &backend->merge);
*/
err |= mg_gl_canvas_compile_compute_program(glsl_raster, &backend->raster);
err |= mg_gl_canvas_compile_render_program("blit", glsl_blit_vertex, glsl_blit_fragment, &backend->blit);

View File

@ -53,7 +53,7 @@ struct mg_gl_segment
struct mg_gl_path_queue
{
vec4 area;
ivec4 area;
int tileQueues;
};

View File

@ -19,23 +19,116 @@ layout(binding = 2) restrict readonly buffer tileQueueBufferSSBO
mg_gl_tile_queue elements[];
} tileQueueBuffer;
layout(binding = 3) restrict readonly buffer tileOpBufferSSBO
{
mg_gl_tile_op elements[];
} tileOpBuffer;
layout(binding = 4) restrict readonly buffer tileOpCountBufferSSBO
layout(binding = 3) coherent restrict buffer tileOpCountBufferSSBO
{
int elements[];
} tileOpCountBuffer;
layout(binding = 5) restrict readonly buffer screenTilesBufferSSBO
layout(binding = 4) restrict buffer tileOpBufferSSBO
{
mg_gl_tile_queue elements[];
mg_gl_tile_op elements[];
} tileOpBuffer;
layout(binding = 5) restrict writeonly buffer screenTilesBufferSSBO
{
int elements[];
} screenTilesBuffer;
layout(location = 0) uniform int tileSize;
layout(location = 1) uniform float scale;
layout(location = 2) uniform int pathCount;
void main()
{
ivec2 nTiles = ivec2(gl_NumWorkGroups.xy);
ivec2 tileCoord = ivec2(gl_WorkGroupID.xy);
int tileIndex = tileCoord.y * nTiles.x + tileCoord.x;
screenTilesBuffer.elements[tileIndex] = -1;
int lastOpIndex = -1;
for(int pathIndex = 0; pathIndex < pathCount; pathIndex++)
{
mg_gl_path_queue pathQueue = pathQueueBuffer.elements[pathIndex];
ivec2 pathTileCoord = tileCoord - pathQueue.area.xy;
vec4 pathBox = pathBuffer.elements[pathIndex].box;
vec4 pathClip = pathBuffer.elements[pathIndex].clip;
float xMax = min(pathBox.z, pathClip.z);
int tileMax = int(xMax * scale) / tileSize;
int pathTileMax = tileMax - pathQueue.area.x;
if( pathTileCoord.x >= 0
&& pathTileCoord.x <= pathTileMax
&& pathTileCoord.y >= 0
&& pathTileCoord.y < pathQueue.area.w)
{
int pathTileIndex = pathQueue.tileQueues + pathTileCoord.y * pathQueue.area.z + pathTileCoord.x;
mg_gl_tile_queue tileQueue = tileQueueBuffer.elements[pathTileIndex];
int windingOffset = tileQueue.windingOffset;
int firstOpIndex = tileQueue.first;
if(firstOpIndex == -1)
{
if((windingOffset & 1) != 0)
{
//NOTE: tile is full covered. Add path start op (with winding offset).
// Additionally if color is opaque and tile is fully inside clip, trim tile list.
int pathOpIndex = atomicAdd(tileOpCountBuffer.elements[0], 1);
tileOpBuffer.elements[pathOpIndex].kind = MG_GL_OP_START;
tileOpBuffer.elements[pathOpIndex].next = -1;
tileOpBuffer.elements[pathOpIndex].index = pathIndex;
tileOpBuffer.elements[pathOpIndex].windingOffset = windingOffset;
vec4 clip = pathBuffer.elements[pathIndex].clip * scale;
vec4 tileBox = vec4(tileCoord.x, tileCoord.y, tileCoord.x+1, tileCoord.y+1);
tileBox *= tileSize;
if( lastOpIndex < 0
||(pathBuffer.elements[pathIndex].color.a == 1
&& tileBox.x >= clip.x
&& tileBox.z < clip.z
&& tileBox.y >= clip.y
&& tileBox.w < clip.w))
{
screenTilesBuffer.elements[tileIndex] = pathOpIndex;
}
else
{
tileOpBuffer.elements[lastOpIndex].next = pathOpIndex;
}
lastOpIndex = pathOpIndex;
}
// else, tile is fully uncovered, skip path
}
else
{
//NOTE: add path start op (with winding offset)
int pathOpIndex = atomicAdd(tileOpCountBuffer.elements[0], 1);
tileOpBuffer.elements[pathOpIndex].kind = MG_GL_OP_START;
tileOpBuffer.elements[pathOpIndex].next = -1;
tileOpBuffer.elements[pathOpIndex].index = pathIndex;
tileOpBuffer.elements[pathOpIndex].windingOffset = windingOffset;
if(lastOpIndex < 0)
{
screenTilesBuffer.elements[tileIndex] = pathOpIndex;
}
else
{
tileOpBuffer.elements[lastOpIndex].next = pathOpIndex;
}
lastOpIndex = pathOpIndex;
//NOTE: chain remaining path ops to end of tile list
tileOpBuffer.elements[lastOpIndex].next = firstOpIndex;
lastOpIndex = tileQueue.last;
}
}
}
}

View File

@ -4,18 +4,57 @@ layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
precision mediump float;
layout(std430) buffer;
layout(binding = 0) restrict writeonly buffer tileQueueBufferSSBO
layout(binding = 0) restrict readonly buffer pathBufferSSBO
{
mg_gl_path elements[];
} pathBuffer;
layout(binding = 1) restrict writeonly buffer pathQueueBufferSSBO
{
mg_gl_path_queue elements[];
} pathQueueBuffer;
layout(binding = 2) coherent restrict buffer tileQueueCountBufferSSBO
{
int elements[];
} tileQueueCountBuffer;
layout(binding = 3) restrict writeonly buffer tileQueueBufferSSBO
{
mg_gl_tile_queue elements[];
} tileQueueBuffer;
layout(location = 0) uniform int tileSize;
layout(location = 1) uniform float scale;
void main()
{
uvec2 nTiles = gl_NumWorkGroups.xy;
uvec2 tileCoord = gl_WorkGroupID.xy;
uint tileIndex = tileCoord.y * nTiles.x + tileCoord.x;
uint pathIndex = gl_WorkGroupID.x;
const mg_gl_path path = pathBuffer.elements[pathIndex];
tileQueueBuffer.elements[tileIndex].windingOffset = 0;
tileQueueBuffer.elements[tileIndex].first = -1;
tileQueueBuffer.elements[tileIndex].last = -1;
//NOTE: we don't clip on the right, since we need those tiles to accurately compute
// the prefix sum of winding increments in the backprop pass.
vec4 clippedBox = vec4(max(path.box.x, path.clip.x),
max(path.box.y, path.clip.y),
path.box.z,
min(path.box.w, path.clip.w));
ivec2 firstTile = ivec2(clippedBox.xy*scale)/tileSize;
ivec2 lastTile = ivec2(clippedBox.zw*scale)/tileSize;
int nTilesX = max(0, lastTile.x - firstTile.x + 1);
int nTilesY = max(0, lastTile.y - firstTile.y + 1);
int tileCount = nTilesX * nTilesY;
int tileQueuesIndex = atomicAdd(tileQueueCountBuffer.elements[0], tileCount);
pathQueueBuffer.elements[pathIndex].area = ivec4(firstTile.x, firstTile.y, nTilesX, nTilesY);
pathQueueBuffer.elements[pathIndex].tileQueues = tileQueuesIndex;
for(int i=0; i<tileCount; i++)
{
tileQueueBuffer.elements[tileQueuesIndex + i].first = -1;
tileQueueBuffer.elements[tileQueuesIndex + i].last = -1;
tileQueueBuffer.elements[tileQueuesIndex + i].windingOffset = 0;
}
}

View File

@ -9,7 +9,7 @@ layout(binding = 0) restrict readonly buffer pathBufferSSBO
mg_gl_path elements[];
} pathBuffer;
layout(binding = 1) restrict readonly buffer segmentBufferCountSSBO
layout(binding = 1) restrict readonly buffer segmentCountBufferSSBO
{
int elements[];
} segmentCountBuffer;
@ -19,19 +19,17 @@ layout(binding = 2) restrict readonly buffer segmentBufferSSBO
mg_gl_segment elements[];
} segmentBuffer;
layout(binding = 3) restrict readonly buffer tileQueuesBufferSSBO
{
mg_gl_tile_queue elements[];
} tileQueuesBuffer;
layout(binding = 4) restrict readonly buffer tileOpBufferSSBO
layout(binding = 3) restrict readonly buffer tileOpBufferSSBO
{
mg_gl_tile_op elements[];
} tileOpBuffer;
layout(binding = 4) restrict readonly buffer screenTilesBufferSSBO
{
int elements[];
} screenTilesBuffer;
//layout(location = 0) uniform uint tileSize; // this has to be commented until it's effectively used!!
//layout(location = 0) uniform float scale;
layout(location = 0) uniform float scale;
layout(rgba8, binding = 0) uniform restrict writeonly image2D outTexture;
@ -139,12 +137,10 @@ void main()
ivec2 pixelCoord = ivec2(gl_WorkGroupID.xy*uvec2(16, 16) + gl_LocalInvocationID.xy);
vec2 sampleCoord = vec2(pixelCoord);
imageStore(outTexture, ivec2(sampleCoord), vec4(1, 1, 1, 1));
mg_gl_tile_queue tileQueue = tileQueuesBuffer.elements[tileIndex];
int opIndex = tileQueue.first;
int winding = tileQueue.windingOffset;
int pathIndex = 0;
int opIndex = screenTilesBuffer.elements[tileIndex];
int winding = 0;
vec4 color = vec4(0);
if((pixelCoord.x % 16) == 0 || (pixelCoord.y % 16) == 0)
{
@ -152,7 +148,6 @@ void main()
return;
}
int opCount = 0;
while(opIndex >= 0)
{
//imageStore(outTexture, ivec2(sampleCoord), vec4(0, 1, 0, 1));
@ -161,7 +156,30 @@ void main()
mg_gl_tile_op op = tileOpBuffer.elements[opIndex];
opIndex = op.next;
if(op.kind == MG_GL_OP_SEGMENT)
if(op.kind == MG_GL_OP_START)
{
vec4 pathColor = pathBuffer.elements[pathIndex].color;
pathColor.rgb *= pathColor.a;
vec4 clip = pathBuffer.elements[pathIndex].clip * scale;
if( sampleCoord.x >= clip.x
&& sampleCoord.x < clip.z
&& sampleCoord.y >= clip.y
&& sampleCoord.y < clip.w)
{
bool filled = (pathBuffer.elements[pathIndex].cmd == MG_GL_FILL && ((winding & 1) != 0))
||(pathBuffer.elements[pathIndex].cmd == MG_GL_STROKE && (winding != 0));
if(filled)
{
vec4 nextColor = pathColor;
color = color*(1-nextColor.a) + nextColor;
}
winding = op.windingOffset;
}
pathIndex = op.index;
}
else if(op.kind == MG_GL_OP_SEGMENT)
{
int segIndex = op.index;
mg_gl_segment seg = segmentBuffer.elements[segIndex];
@ -188,24 +206,25 @@ void main()
}
}
}
int pathIndex = 0;
// vec4 clip = pathBuffer.elements[pathIndex].clip * scale;
vec4 pathColor = pathBuffer.elements[pathIndex].color;
pathColor.rgb *= pathColor.a;
/* if( sampleCoord.x >= clip.x
vec4 clip = pathBuffer.elements[pathIndex].clip * scale;
if( sampleCoord.x >= clip.x
&& sampleCoord.x < clip.z
&& sampleCoord.y >= clip.y
&& sampleCoord.y < clip.w)
*/ {
/*
bool filled = (pathBuffer[pathIndex].cmd == MG_GL_FILL && (winding[sampleIndex] & 1))
||(pathBuffer[pathIndex].cmd == MG_GL_STROKE && (winding[sampleIndex] != 0));
*/
bool filled = (winding & 1) != 0;
{
bool filled = (pathBuffer.elements[pathIndex].cmd == MG_GL_FILL && ((winding & 1) != 0))
||(pathBuffer.elements[pathIndex].cmd == MG_GL_STROKE && (winding != 0));
if(filled)
{
vec4 nextColor = pathColor;
color = color*(1-nextColor.a) + nextColor;
}
}
// write to texture
imageStore(outTexture, ivec2(sampleCoord), vec4(1, 0, 0, 1));
}
}
imageStore(outTexture, ivec2(sampleCoord), color);
}

View File

@ -19,24 +19,28 @@ layout(binding = 2) restrict buffer segmentBufferSSBO
mg_gl_segment elements[];
} segmentBuffer;
layout(binding = 3) coherent restrict buffer tileOpCountBufferSSBO
layout(binding = 3) restrict buffer pathQueueBufferSSBO
{
mg_gl_path_queue elements[];
} pathQueueBuffer;
layout(binding = 4) coherent restrict buffer tileQueueBufferSSBO
{
mg_gl_tile_queue elements[];
} tileQueueBuffer;
layout(binding = 5) coherent restrict buffer tileOpCountBufferSSBO
{
int elements[];
} tileOpCountBuffer;
layout(binding = 4) restrict buffer tileOpBufferSSBO
layout(binding = 6) restrict buffer tileOpBufferSSBO
{
mg_gl_tile_op elements[];
} tileOpBuffer;
layout(binding = 5) coherent restrict buffer tileQueuesBufferSSBO
{
mg_gl_tile_queue elements[];
} tileQueuesBuffer;
layout(location = 0) uniform float scale;
layout(location = 1) uniform uint tileSize;
layout(location = 2) uniform ivec2 nTiles;
int push_segment(in vec2 p[4], int kind)
{
@ -197,9 +201,9 @@ void bin_to_tiles(int segIndex)
{
//NOTE: add segment index to the queues of tiles it overlaps with
const mg_gl_segment seg = segmentBuffer.elements[segIndex];
const mg_gl_path_queue pathQueue = pathQueueBuffer.elements[seg.pathIndex];
ivec4 pathArea = ivec4(0, 0, nTiles.x, nTiles.y);
ivec4 pathArea = pathQueue.area;
ivec4 coveredTiles = ivec4(seg.box)/int(tileSize);
int xMin = max(0, coveredTiles.x - pathArea.x);
int yMin = max(0, coveredTiles.y - pathArea.y);
@ -260,18 +264,18 @@ void bin_to_tiles(int segIndex)
tileOpBuffer.elements[tileOpIndex].crossRight = false;
tileOpBuffer.elements[tileOpIndex].next = -1;
int tileIndex = y*pathArea.z + x;
int tileQueueIndex = pathQueue.tileQueues + y*pathArea.z + x;
tileOpBuffer.elements[tileOpIndex].next = atomicExchange(tileQueuesBuffer.elements[tileIndex].first, tileOpIndex);
tileOpBuffer.elements[tileOpIndex].next = atomicExchange(tileQueueBuffer.elements[tileQueueIndex].first, tileOpIndex);
if(tileOpBuffer.elements[tileOpIndex].next == -1)
{
tileQueuesBuffer.elements[tileIndex].last = tileOpIndex;
tileQueueBuffer.elements[tileQueueIndex].last = tileOpIndex;
}
//NOTE: if the segment crosses the tile's bottom boundary, update the tile's winding offset
if(crossB)
{
atomicAdd(tileQueuesBuffer.elements[tileIndex].windingOffset, seg.windingIncrement);
atomicAdd(tileQueueBuffer.elements[tileQueueIndex].windingOffset, seg.windingIncrement);
}
//NOTE: if the segment crosses the right boundary, mark it.