[win32, canvas] use glBindBufferBase() and explicit uniform offset to avoid glBindBufferRange() implementation-specific alignment issues

This commit is contained in:
martinfouilleul 2023-07-15 15:29:45 +02:00
parent 6a3cb83932
commit bff9a3618f
6 changed files with 100 additions and 26 deletions

View File

@ -966,6 +966,11 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
int pathCount = backend->pathCount - backend->pathBatchStart;
int eltCount = backend->eltCount - backend->eltBatchStart;
if(!pathCount || !eltCount)
{
return;
}
glBindBuffer(GL_SHADER_STORAGE_BUFFER, pathBuffer);
glFlushMappedBufferRange(GL_SHADER_STORAGE_BUFFER, pathBufferOffset, pathCount*sizeof(mg_gl_path));
@ -992,6 +997,12 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
int err = glGetError();
if(err)
{
log_error("gl error %i\n", err);
}
//NOTE: path setup pass
int maxWorkGroupCount = 0;
glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &maxWorkGroupCount);
@ -1009,17 +1020,27 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
for(int i=0; i<pathCount; i += maxWorkGroupCount)
{
int pathOffset = pathBufferOffset + i*sizeof(mg_gl_path);
int pathQueueOffset = i*sizeof(mg_gl_path_queue);
int count = minimum(maxWorkGroupCount, pathCount-i);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, pathBuffer, pathOffset, count*sizeof(mg_gl_path));
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 1, backend->pathQueueBuffer, pathQueueOffset, count*sizeof(mg_gl_path_queue));
glUniform1i(2, backend->pathBatchStart + i);
glUniform1i(3, i);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, pathBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->pathQueueBuffer);
glDispatchCompute(count, 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
}
if(!err)
{
err = glGetError();
if(err)
{
log_error("gl error %i\n", err);
}
}
//NOTE: segment setup pass
glUseProgram(backend->segmentSetup);
@ -1038,12 +1059,23 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
int offset = elementBufferOffset + i*sizeof(mg_gl_path_elt);
int count = minimum(maxWorkGroupCount, eltCount-i);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, elementBuffer, offset, count*sizeof(mg_gl_path_elt));
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, elementBuffer);
glUniform1i(2, (backend->eltBatchStart + i));
glDispatchCompute(count, 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
}
if(!err)
{
err = glGetError();
if(err)
{
log_error("gl error %i\n", err);
}
}
//NOTE: backprop pass
glUseProgram(backend->backprop);
@ -1051,19 +1083,28 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
for(int i=0; i<pathCount; i += maxWorkGroupCount)
{
int offset = i*sizeof(mg_gl_path_queue);
int count = minimum(maxWorkGroupCount, pathCount-i);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, backend->pathQueueBuffer, offset, count*sizeof(mg_gl_path_queue));
glUniform1i(0, i);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, backend->pathQueueBuffer);
glDispatchCompute(count, 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
}
if(!err)
{
err = glGetError();
if(err)
{
log_error("gl error %i\n", err);
}
}
//NOTE: merge pass
glUseProgram(backend->merge);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, pathBuffer, pathBufferOffset, pathCount*sizeof(mg_gl_path));
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, pathBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->pathQueueBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, backend->tileQueueBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->tileOpCountBuffer);
@ -1085,13 +1126,23 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
glUniform1i(3, 1);
}
glUniform1i(4, backend->pathBatchStart);
glDispatchCompute(nTilesX, nTilesY, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
if(!err)
{
err = glGetError();
if(err)
{
log_error("gl error %i\n", err);
}
}
//NOTE: raster pass
glUseProgram(backend->raster);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, pathBuffer, pathBufferOffset, pathCount*sizeof(mg_gl_path));
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, pathBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->segmentBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, backend->tileOpBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->screenTilesBuffer);
@ -1113,9 +1164,20 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
glUniform1ui(2, 0);
}
glUniform1i(3, backend->pathBatchStart);
glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, backend->rasterDispatchBuffer);
glDispatchComputeIndirect(0);
if(!err)
{
err = glGetError();
if(err)
{
log_error("gl error %i\n", err);
}
}
//NOTE: blit pass
glUseProgram(backend->blit);
glBindBuffer(GL_ARRAY_BUFFER, backend->dummyVertexBuffer);
@ -1125,11 +1187,14 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
glDrawArrays(GL_TRIANGLES, 0, 6);
int err = glGetError();
if(!err)
{
err = glGetError();
if(err)
{
log_error("gl error %i\n", err);
}
}
backend->pathBatchStart = backend->pathCount;
backend->eltBatchStart = backend->eltCount;

View File

@ -14,6 +14,8 @@ layout(binding = 1) restrict buffer tileQueueBufferSSBO
mg_gl_tile_queue elements[];
} tileQueueBuffer;
layout(location = 0) uniform int pathQueueBufferStart;
shared int nextRowIndex;
void main()
@ -28,7 +30,7 @@ void main()
barrier();
int rowIndex = 0;
mg_gl_path_queue pathQueue = pathQueueBuffer.elements[pathIndex];
mg_gl_path_queue pathQueue = pathQueueBuffer.elements[pathQueueBufferStart + pathIndex];
int tileQueueBase = pathQueue.tileQueues;
int rowSize = pathQueue.area.z;
int rowCount = pathQueue.area.w;

View File

@ -44,6 +44,7 @@ layout(location = 0) uniform int tileSize;
layout(location = 1) uniform float scale;
layout(location = 2) uniform int pathCount;
layout(location = 3) uniform int cullSolidTiles;
layout(location = 4) uniform int pathBufferStart;
void main()
{
@ -60,8 +61,8 @@ void main()
mg_gl_path_queue pathQueue = pathQueueBuffer.elements[pathIndex];
ivec2 pathTileCoord = tileCoord - pathQueue.area.xy;
vec4 pathBox = pathBuffer.elements[pathIndex].box;
vec4 pathClip = pathBuffer.elements[pathIndex].clip;
vec4 pathBox = pathBuffer.elements[pathBufferStart + pathIndex].box;
vec4 pathClip = pathBuffer.elements[pathBufferStart + pathIndex].clip;
float xMax = min(pathBox.z, pathClip.z);
int tileMax = int(xMax * scale) / tileSize;
@ -87,7 +88,7 @@ void main()
vec4 tileBox = vec4(tileCoord.x, tileCoord.y, tileCoord.x+1, tileCoord.y+1);
tileBox *= tileSize;
vec4 clip = pathBuffer.elements[pathIndex].clip * scale;
vec4 clip = pathBuffer.elements[pathBufferStart + pathIndex].clip * scale;
if( tileBox.x >= clip.z
|| tileBox.z < clip.x
@ -126,7 +127,7 @@ void main()
{
tileOpBuffer.elements[pathOpIndex].kind = MG_GL_OP_FILL;
if( pathBuffer.elements[pathIndex].color.a == 1
if( pathBuffer.elements[pathBufferStart + pathIndex].color.a == 1
&& cullSolidTiles != 0)
{
screenTilesBuffer.elements[tileIndex].first = pathOpIndex;

View File

@ -26,11 +26,13 @@ layout(binding = 3) restrict writeonly buffer tileQueueBufferSSBO
layout(location = 0) uniform int tileSize;
layout(location = 1) uniform float scale;
layout(location = 2) uniform int pathBufferStart;
layout(location = 3) uniform int pathQueueBufferStart;
void main()
{
uint pathIndex = gl_WorkGroupID.x;
const mg_gl_path path = pathBuffer.elements[pathIndex];
const mg_gl_path path = pathBuffer.elements[pathIndex + pathBufferStart];
//NOTE: we don't clip on the right, since we need those tiles to accurately compute
// the prefix sum of winding increments in the backprop pass.
@ -48,8 +50,8 @@ void main()
int tileQueuesIndex = atomicAdd(tileQueueCountBuffer.elements[0], tileCount);
pathQueueBuffer.elements[pathIndex].area = ivec4(firstTile.x, firstTile.y, nTilesX, nTilesY);
pathQueueBuffer.elements[pathIndex].tileQueues = tileQueuesIndex;
pathQueueBuffer.elements[pathQueueBufferStart + pathIndex].area = ivec4(firstTile.x, firstTile.y, nTilesX, nTilesY);
pathQueueBuffer.elements[pathQueueBufferStart + pathIndex].tileQueues = tileQueuesIndex;
for(int i=0; i<tileCount; i++)
{

View File

@ -27,6 +27,7 @@ layout(binding = 3) restrict readonly buffer screenTilesBufferSSBO
layout(location = 0) uniform float scale;
layout(location = 1) uniform int msaaSampleCount;
layout(location = 2) uniform uint useTexture;
layout(location = 3) uniform int pathBufferStart;
layout(rgba8, binding = 0) uniform restrict writeonly image2D outTexture;
layout(binding = 1) uniform sampler2D srcTexture;
@ -129,7 +130,7 @@ void main()
{
int pathIndex = op.index;
vec4 nextColor = pathBuffer.elements[pathIndex].color;
vec4 nextColor = pathBuffer.elements[pathBufferStart + pathIndex].color;
nextColor.rgb *= nextColor.a;
if(useTexture != 0)
@ -139,7 +140,7 @@ void main()
{
vec2 sampleCoord = imgSampleCoords[sampleIndex];
vec3 ph = vec3(sampleCoord.xy, 1);
vec2 uv = (pathBuffer.elements[pathIndex].uvTransform * ph).xy;
vec2 uv = (pathBuffer.elements[pathBufferStart + pathIndex].uvTransform * ph).xy;
texColor += texture(srcTexture, uv);
}
texColor /= srcSampleCount;
@ -153,7 +154,7 @@ void main()
}
else
{
vec4 clip = pathBuffer.elements[pathIndex].clip * scale;
vec4 clip = pathBuffer.elements[pathBufferStart + pathIndex].clip * scale;
float coverage = 0;
for(int sampleIndex = 0; sampleIndex<sampleCount; sampleIndex++)
@ -166,8 +167,10 @@ void main()
&& sampleCoord.y < clip.w)
{
bool filled = op.kind == MG_GL_OP_CLIP_FILL
||(pathBuffer.elements[pathIndex].cmd == MG_GL_FILL && ((winding[sampleIndex] & 1) != 0))
||(pathBuffer.elements[pathIndex].cmd == MG_GL_STROKE && (winding[sampleIndex] != 0));
||(pathBuffer.elements[pathBufferStart + pathIndex].cmd == MG_GL_FILL
&& ((winding[sampleIndex] & 1) != 0))
||(pathBuffer.elements[pathBufferStart + pathIndex].cmd == MG_GL_STROKE
&& (winding[sampleIndex] != 0));
if(filled)
{
coverage++;

View File

@ -41,6 +41,7 @@ layout(binding = 6) restrict buffer tileOpBufferSSBO
layout(location = 0) uniform float scale;
layout(location = 1) uniform uint tileSize;
layout(location = 2) uniform int elementBufferStart;
void bin_to_tiles(int segIndex)
{
@ -824,7 +825,7 @@ void main()
{
int eltIndex = int(gl_WorkGroupID.x);
mg_gl_path_elt elt = elementBuffer.elements[eltIndex];
mg_gl_path_elt elt = elementBuffer.elements[elementBufferStart + eltIndex];
switch(elt.kind)
{