[win32, canvas] use glBindBufferBase() and explicit uniform offset to avoid glBindBufferRange() implementation-specific alignment issues

This commit is contained in:
martinfouilleul 2023-07-15 15:29:45 +02:00
parent 6a3cb83932
commit bff9a3618f
6 changed files with 100 additions and 26 deletions

View File

@ -966,6 +966,11 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
int pathCount = backend->pathCount - backend->pathBatchStart; int pathCount = backend->pathCount - backend->pathBatchStart;
int eltCount = backend->eltCount - backend->eltBatchStart; int eltCount = backend->eltCount - backend->eltBatchStart;
if(!pathCount || !eltCount)
{
return;
}
glBindBuffer(GL_SHADER_STORAGE_BUFFER, pathBuffer); glBindBuffer(GL_SHADER_STORAGE_BUFFER, pathBuffer);
glFlushMappedBufferRange(GL_SHADER_STORAGE_BUFFER, pathBufferOffset, pathCount*sizeof(mg_gl_path)); glFlushMappedBufferRange(GL_SHADER_STORAGE_BUFFER, pathBufferOffset, pathCount*sizeof(mg_gl_path));
@ -992,6 +997,12 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
int err = glGetError();
if(err)
{
log_error("gl error %i\n", err);
}
//NOTE: path setup pass //NOTE: path setup pass
int maxWorkGroupCount = 0; int maxWorkGroupCount = 0;
glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &maxWorkGroupCount); glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &maxWorkGroupCount);
@ -1009,17 +1020,27 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
for(int i=0; i<pathCount; i += maxWorkGroupCount) for(int i=0; i<pathCount; i += maxWorkGroupCount)
{ {
int pathOffset = pathBufferOffset + i*sizeof(mg_gl_path);
int pathQueueOffset = i*sizeof(mg_gl_path_queue);
int count = minimum(maxWorkGroupCount, pathCount-i); int count = minimum(maxWorkGroupCount, pathCount-i);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, pathBuffer, pathOffset, count*sizeof(mg_gl_path)); glUniform1i(2, backend->pathBatchStart + i);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 1, backend->pathQueueBuffer, pathQueueOffset, count*sizeof(mg_gl_path_queue)); glUniform1i(3, i);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, pathBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->pathQueueBuffer);
glDispatchCompute(count, 1, 1); glDispatchCompute(count, 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
} }
if(!err)
{
err = glGetError();
if(err)
{
log_error("gl error %i\n", err);
}
}
//NOTE: segment setup pass //NOTE: segment setup pass
glUseProgram(backend->segmentSetup); glUseProgram(backend->segmentSetup);
@ -1038,12 +1059,23 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
int offset = elementBufferOffset + i*sizeof(mg_gl_path_elt); int offset = elementBufferOffset + i*sizeof(mg_gl_path_elt);
int count = minimum(maxWorkGroupCount, eltCount-i); int count = minimum(maxWorkGroupCount, eltCount-i);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, elementBuffer, offset, count*sizeof(mg_gl_path_elt)); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, elementBuffer);
glUniform1i(2, (backend->eltBatchStart + i));
glDispatchCompute(count, 1, 1); glDispatchCompute(count, 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
} }
if(!err)
{
err = glGetError();
if(err)
{
log_error("gl error %i\n", err);
}
}
//NOTE: backprop pass //NOTE: backprop pass
glUseProgram(backend->backprop); glUseProgram(backend->backprop);
@ -1051,19 +1083,28 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
for(int i=0; i<pathCount; i += maxWorkGroupCount) for(int i=0; i<pathCount; i += maxWorkGroupCount)
{ {
int offset = i*sizeof(mg_gl_path_queue);
int count = minimum(maxWorkGroupCount, pathCount-i); int count = minimum(maxWorkGroupCount, pathCount-i);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, backend->pathQueueBuffer, offset, count*sizeof(mg_gl_path_queue)); glUniform1i(0, i);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, backend->pathQueueBuffer);
glDispatchCompute(count, 1, 1); glDispatchCompute(count, 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
} }
if(!err)
{
err = glGetError();
if(err)
{
log_error("gl error %i\n", err);
}
}
//NOTE: merge pass //NOTE: merge pass
glUseProgram(backend->merge); glUseProgram(backend->merge);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, pathBuffer, pathBufferOffset, pathCount*sizeof(mg_gl_path)); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, pathBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->pathQueueBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->pathQueueBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, backend->tileQueueBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, backend->tileQueueBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->tileOpCountBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->tileOpCountBuffer);
@ -1085,13 +1126,23 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
glUniform1i(3, 1); glUniform1i(3, 1);
} }
glUniform1i(4, backend->pathBatchStart);
glDispatchCompute(nTilesX, nTilesY, 1); glDispatchCompute(nTilesX, nTilesY, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
if(!err)
{
err = glGetError();
if(err)
{
log_error("gl error %i\n", err);
}
}
//NOTE: raster pass //NOTE: raster pass
glUseProgram(backend->raster); glUseProgram(backend->raster);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, pathBuffer, pathBufferOffset, pathCount*sizeof(mg_gl_path)); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, pathBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->segmentBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->segmentBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, backend->tileOpBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, backend->tileOpBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->screenTilesBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->screenTilesBuffer);
@ -1113,9 +1164,20 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
glUniform1ui(2, 0); glUniform1ui(2, 0);
} }
glUniform1i(3, backend->pathBatchStart);
glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, backend->rasterDispatchBuffer); glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, backend->rasterDispatchBuffer);
glDispatchComputeIndirect(0); glDispatchComputeIndirect(0);
if(!err)
{
err = glGetError();
if(err)
{
log_error("gl error %i\n", err);
}
}
//NOTE: blit pass //NOTE: blit pass
glUseProgram(backend->blit); glUseProgram(backend->blit);
glBindBuffer(GL_ARRAY_BUFFER, backend->dummyVertexBuffer); glBindBuffer(GL_ARRAY_BUFFER, backend->dummyVertexBuffer);
@ -1125,10 +1187,13 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
glDrawArrays(GL_TRIANGLES, 0, 6); glDrawArrays(GL_TRIANGLES, 0, 6);
int err = glGetError(); if(!err)
if(err)
{ {
log_error("gl error %i\n", err); err = glGetError();
if(err)
{
log_error("gl error %i\n", err);
}
} }
backend->pathBatchStart = backend->pathCount; backend->pathBatchStart = backend->pathCount;

View File

@ -14,6 +14,8 @@ layout(binding = 1) restrict buffer tileQueueBufferSSBO
mg_gl_tile_queue elements[]; mg_gl_tile_queue elements[];
} tileQueueBuffer; } tileQueueBuffer;
layout(location = 0) uniform int pathQueueBufferStart;
shared int nextRowIndex; shared int nextRowIndex;
void main() void main()
@ -28,7 +30,7 @@ void main()
barrier(); barrier();
int rowIndex = 0; int rowIndex = 0;
mg_gl_path_queue pathQueue = pathQueueBuffer.elements[pathIndex]; mg_gl_path_queue pathQueue = pathQueueBuffer.elements[pathQueueBufferStart + pathIndex];
int tileQueueBase = pathQueue.tileQueues; int tileQueueBase = pathQueue.tileQueues;
int rowSize = pathQueue.area.z; int rowSize = pathQueue.area.z;
int rowCount = pathQueue.area.w; int rowCount = pathQueue.area.w;

View File

@ -44,6 +44,7 @@ layout(location = 0) uniform int tileSize;
layout(location = 1) uniform float scale; layout(location = 1) uniform float scale;
layout(location = 2) uniform int pathCount; layout(location = 2) uniform int pathCount;
layout(location = 3) uniform int cullSolidTiles; layout(location = 3) uniform int cullSolidTiles;
layout(location = 4) uniform int pathBufferStart;
void main() void main()
{ {
@ -60,8 +61,8 @@ void main()
mg_gl_path_queue pathQueue = pathQueueBuffer.elements[pathIndex]; mg_gl_path_queue pathQueue = pathQueueBuffer.elements[pathIndex];
ivec2 pathTileCoord = tileCoord - pathQueue.area.xy; ivec2 pathTileCoord = tileCoord - pathQueue.area.xy;
vec4 pathBox = pathBuffer.elements[pathIndex].box; vec4 pathBox = pathBuffer.elements[pathBufferStart + pathIndex].box;
vec4 pathClip = pathBuffer.elements[pathIndex].clip; vec4 pathClip = pathBuffer.elements[pathBufferStart + pathIndex].clip;
float xMax = min(pathBox.z, pathClip.z); float xMax = min(pathBox.z, pathClip.z);
int tileMax = int(xMax * scale) / tileSize; int tileMax = int(xMax * scale) / tileSize;
@ -87,7 +88,7 @@ void main()
vec4 tileBox = vec4(tileCoord.x, tileCoord.y, tileCoord.x+1, tileCoord.y+1); vec4 tileBox = vec4(tileCoord.x, tileCoord.y, tileCoord.x+1, tileCoord.y+1);
tileBox *= tileSize; tileBox *= tileSize;
vec4 clip = pathBuffer.elements[pathIndex].clip * scale; vec4 clip = pathBuffer.elements[pathBufferStart + pathIndex].clip * scale;
if( tileBox.x >= clip.z if( tileBox.x >= clip.z
|| tileBox.z < clip.x || tileBox.z < clip.x
@ -126,7 +127,7 @@ void main()
{ {
tileOpBuffer.elements[pathOpIndex].kind = MG_GL_OP_FILL; tileOpBuffer.elements[pathOpIndex].kind = MG_GL_OP_FILL;
if( pathBuffer.elements[pathIndex].color.a == 1 if( pathBuffer.elements[pathBufferStart + pathIndex].color.a == 1
&& cullSolidTiles != 0) && cullSolidTiles != 0)
{ {
screenTilesBuffer.elements[tileIndex].first = pathOpIndex; screenTilesBuffer.elements[tileIndex].first = pathOpIndex;

View File

@ -26,11 +26,13 @@ layout(binding = 3) restrict writeonly buffer tileQueueBufferSSBO
layout(location = 0) uniform int tileSize; layout(location = 0) uniform int tileSize;
layout(location = 1) uniform float scale; layout(location = 1) uniform float scale;
layout(location = 2) uniform int pathBufferStart;
layout(location = 3) uniform int pathQueueBufferStart;
void main() void main()
{ {
uint pathIndex = gl_WorkGroupID.x; uint pathIndex = gl_WorkGroupID.x;
const mg_gl_path path = pathBuffer.elements[pathIndex]; const mg_gl_path path = pathBuffer.elements[pathIndex + pathBufferStart];
//NOTE: we don't clip on the right, since we need those tiles to accurately compute //NOTE: we don't clip on the right, since we need those tiles to accurately compute
// the prefix sum of winding increments in the backprop pass. // the prefix sum of winding increments in the backprop pass.
@ -48,8 +50,8 @@ void main()
int tileQueuesIndex = atomicAdd(tileQueueCountBuffer.elements[0], tileCount); int tileQueuesIndex = atomicAdd(tileQueueCountBuffer.elements[0], tileCount);
pathQueueBuffer.elements[pathIndex].area = ivec4(firstTile.x, firstTile.y, nTilesX, nTilesY); pathQueueBuffer.elements[pathQueueBufferStart + pathIndex].area = ivec4(firstTile.x, firstTile.y, nTilesX, nTilesY);
pathQueueBuffer.elements[pathIndex].tileQueues = tileQueuesIndex; pathQueueBuffer.elements[pathQueueBufferStart + pathIndex].tileQueues = tileQueuesIndex;
for(int i=0; i<tileCount; i++) for(int i=0; i<tileCount; i++)
{ {

View File

@ -27,6 +27,7 @@ layout(binding = 3) restrict readonly buffer screenTilesBufferSSBO
layout(location = 0) uniform float scale; layout(location = 0) uniform float scale;
layout(location = 1) uniform int msaaSampleCount; layout(location = 1) uniform int msaaSampleCount;
layout(location = 2) uniform uint useTexture; layout(location = 2) uniform uint useTexture;
layout(location = 3) uniform int pathBufferStart;
layout(rgba8, binding = 0) uniform restrict writeonly image2D outTexture; layout(rgba8, binding = 0) uniform restrict writeonly image2D outTexture;
layout(binding = 1) uniform sampler2D srcTexture; layout(binding = 1) uniform sampler2D srcTexture;
@ -129,7 +130,7 @@ void main()
{ {
int pathIndex = op.index; int pathIndex = op.index;
vec4 nextColor = pathBuffer.elements[pathIndex].color; vec4 nextColor = pathBuffer.elements[pathBufferStart + pathIndex].color;
nextColor.rgb *= nextColor.a; nextColor.rgb *= nextColor.a;
if(useTexture != 0) if(useTexture != 0)
@ -139,7 +140,7 @@ void main()
{ {
vec2 sampleCoord = imgSampleCoords[sampleIndex]; vec2 sampleCoord = imgSampleCoords[sampleIndex];
vec3 ph = vec3(sampleCoord.xy, 1); vec3 ph = vec3(sampleCoord.xy, 1);
vec2 uv = (pathBuffer.elements[pathIndex].uvTransform * ph).xy; vec2 uv = (pathBuffer.elements[pathBufferStart + pathIndex].uvTransform * ph).xy;
texColor += texture(srcTexture, uv); texColor += texture(srcTexture, uv);
} }
texColor /= srcSampleCount; texColor /= srcSampleCount;
@ -153,7 +154,7 @@ void main()
} }
else else
{ {
vec4 clip = pathBuffer.elements[pathIndex].clip * scale; vec4 clip = pathBuffer.elements[pathBufferStart + pathIndex].clip * scale;
float coverage = 0; float coverage = 0;
for(int sampleIndex = 0; sampleIndex<sampleCount; sampleIndex++) for(int sampleIndex = 0; sampleIndex<sampleCount; sampleIndex++)
@ -166,8 +167,10 @@ void main()
&& sampleCoord.y < clip.w) && sampleCoord.y < clip.w)
{ {
bool filled = op.kind == MG_GL_OP_CLIP_FILL bool filled = op.kind == MG_GL_OP_CLIP_FILL
||(pathBuffer.elements[pathIndex].cmd == MG_GL_FILL && ((winding[sampleIndex] & 1) != 0)) ||(pathBuffer.elements[pathBufferStart + pathIndex].cmd == MG_GL_FILL
||(pathBuffer.elements[pathIndex].cmd == MG_GL_STROKE && (winding[sampleIndex] != 0)); && ((winding[sampleIndex] & 1) != 0))
||(pathBuffer.elements[pathBufferStart + pathIndex].cmd == MG_GL_STROKE
&& (winding[sampleIndex] != 0));
if(filled) if(filled)
{ {
coverage++; coverage++;

View File

@ -41,6 +41,7 @@ layout(binding = 6) restrict buffer tileOpBufferSSBO
layout(location = 0) uniform float scale; layout(location = 0) uniform float scale;
layout(location = 1) uniform uint tileSize; layout(location = 1) uniform uint tileSize;
layout(location = 2) uniform int elementBufferStart;
void bin_to_tiles(int segIndex) void bin_to_tiles(int segIndex)
{ {
@ -824,7 +825,7 @@ void main()
{ {
int eltIndex = int(gl_WorkGroupID.x); int eltIndex = int(gl_WorkGroupID.x);
mg_gl_path_elt elt = elementBuffer.elements[eltIndex]; mg_gl_path_elt elt = elementBuffer.elements[elementBufferStart + eltIndex];
switch(elt.kind) switch(elt.kind)
{ {