diff --git a/src/gl_canvas.c b/src/gl_canvas.c index 6dd08d3..1765843 100644 --- a/src/gl_canvas.c +++ b/src/gl_canvas.c @@ -966,6 +966,11 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, int pathCount = backend->pathCount - backend->pathBatchStart; int eltCount = backend->eltCount - backend->eltBatchStart; + if(!pathCount || !eltCount) + { + return; + } + glBindBuffer(GL_SHADER_STORAGE_BUFFER, pathBuffer); glFlushMappedBufferRange(GL_SHADER_STORAGE_BUFFER, pathBufferOffset, pathCount*sizeof(mg_gl_path)); @@ -992,6 +997,12 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); + int err = glGetError(); + if(err) + { + log_error("gl error %i\n", err); + } + //NOTE: path setup pass int maxWorkGroupCount = 0; glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &maxWorkGroupCount); @@ -1009,17 +1020,27 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, for(int i=0; ipathQueueBuffer, pathQueueOffset, count*sizeof(mg_gl_path_queue)); + glUniform1i(2, backend->pathBatchStart + i); + glUniform1i(3, i); + + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, pathBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->pathQueueBuffer); glDispatchCompute(count, 1, 1); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); } + if(!err) + { + err = glGetError(); + if(err) + { + log_error("gl error %i\n", err); + } + } + //NOTE: segment setup pass glUseProgram(backend->segmentSetup); @@ -1038,12 +1059,23 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, int offset = elementBufferOffset + i*sizeof(mg_gl_path_elt); int count = minimum(maxWorkGroupCount, eltCount-i); - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, elementBuffer, offset, count*sizeof(mg_gl_path_elt)); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, elementBuffer); + + glUniform1i(2, (backend->eltBatchStart + i)); glDispatchCompute(count, 1, 1); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); } + if(!err) + { + err = glGetError(); + if(err) + { + log_error("gl error %i\n", err); + } + } + //NOTE: backprop pass glUseProgram(backend->backprop); @@ -1051,19 +1083,28 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, for(int i=0; ipathQueueBuffer, offset, count*sizeof(mg_gl_path_queue)); + glUniform1i(0, i); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, backend->pathQueueBuffer); glDispatchCompute(count, 1, 1); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); } + if(!err) + { + err = glGetError(); + if(err) + { + log_error("gl error %i\n", err); + } + } + //NOTE: merge pass glUseProgram(backend->merge); - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, pathBuffer, pathBufferOffset, pathCount*sizeof(mg_gl_path)); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, pathBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->pathQueueBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, backend->tileQueueBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->tileOpCountBuffer); @@ -1085,13 +1126,23 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, glUniform1i(3, 1); } + glUniform1i(4, backend->pathBatchStart); + glDispatchCompute(nTilesX, nTilesY, 1); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); + if(!err) + { + err = glGetError(); + if(err) + { + log_error("gl error %i\n", err); + } + } //NOTE: raster pass glUseProgram(backend->raster); - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, pathBuffer, pathBufferOffset, pathCount*sizeof(mg_gl_path)); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, pathBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->segmentBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, backend->tileOpBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->screenTilesBuffer); @@ -1113,9 +1164,20 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, glUniform1ui(2, 0); } + glUniform1i(3, backend->pathBatchStart); + glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, backend->rasterDispatchBuffer); glDispatchComputeIndirect(0); + if(!err) + { + err = glGetError(); + if(err) + { + log_error("gl error %i\n", err); + } + } + //NOTE: blit pass glUseProgram(backend->blit); glBindBuffer(GL_ARRAY_BUFFER, backend->dummyVertexBuffer); @@ -1125,10 +1187,13 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, glDrawArrays(GL_TRIANGLES, 0, 6); - int err = glGetError(); - if(err) + if(!err) { - log_error("gl error %i\n", err); + err = glGetError(); + if(err) + { + log_error("gl error %i\n", err); + } } backend->pathBatchStart = backend->pathCount; diff --git a/src/glsl_shaders/backprop.glsl b/src/glsl_shaders/backprop.glsl index 6cb4741..e7df4f4 100644 --- a/src/glsl_shaders/backprop.glsl +++ b/src/glsl_shaders/backprop.glsl @@ -14,6 +14,8 @@ layout(binding = 1) restrict buffer tileQueueBufferSSBO mg_gl_tile_queue elements[]; } tileQueueBuffer; +layout(location = 0) uniform int pathQueueBufferStart; + shared int nextRowIndex; void main() @@ -28,7 +30,7 @@ void main() barrier(); int rowIndex = 0; - mg_gl_path_queue pathQueue = pathQueueBuffer.elements[pathIndex]; + mg_gl_path_queue pathQueue = pathQueueBuffer.elements[pathQueueBufferStart + pathIndex]; int tileQueueBase = pathQueue.tileQueues; int rowSize = pathQueue.area.z; int rowCount = pathQueue.area.w; diff --git a/src/glsl_shaders/merge.glsl b/src/glsl_shaders/merge.glsl index 61dd50c..fb796a3 100644 --- a/src/glsl_shaders/merge.glsl +++ b/src/glsl_shaders/merge.glsl @@ -44,6 +44,7 @@ layout(location = 0) uniform int tileSize; layout(location = 1) uniform float scale; layout(location = 2) uniform int pathCount; layout(location = 3) uniform int cullSolidTiles; +layout(location = 4) uniform int pathBufferStart; void main() { @@ -60,8 +61,8 @@ void main() mg_gl_path_queue pathQueue = pathQueueBuffer.elements[pathIndex]; ivec2 pathTileCoord = tileCoord - pathQueue.area.xy; - vec4 pathBox = pathBuffer.elements[pathIndex].box; - vec4 pathClip = pathBuffer.elements[pathIndex].clip; + vec4 pathBox = pathBuffer.elements[pathBufferStart + pathIndex].box; + vec4 pathClip = pathBuffer.elements[pathBufferStart + pathIndex].clip; float xMax = min(pathBox.z, pathClip.z); int tileMax = int(xMax * scale) / tileSize; @@ -87,7 +88,7 @@ void main() vec4 tileBox = vec4(tileCoord.x, tileCoord.y, tileCoord.x+1, tileCoord.y+1); tileBox *= tileSize; - vec4 clip = pathBuffer.elements[pathIndex].clip * scale; + vec4 clip = pathBuffer.elements[pathBufferStart + pathIndex].clip * scale; if( tileBox.x >= clip.z || tileBox.z < clip.x @@ -126,7 +127,7 @@ void main() { tileOpBuffer.elements[pathOpIndex].kind = MG_GL_OP_FILL; - if( pathBuffer.elements[pathIndex].color.a == 1 + if( pathBuffer.elements[pathBufferStart + pathIndex].color.a == 1 && cullSolidTiles != 0) { screenTilesBuffer.elements[tileIndex].first = pathOpIndex; diff --git a/src/glsl_shaders/path_setup.glsl b/src/glsl_shaders/path_setup.glsl index 6196abc..fe711da 100644 --- a/src/glsl_shaders/path_setup.glsl +++ b/src/glsl_shaders/path_setup.glsl @@ -26,11 +26,13 @@ layout(binding = 3) restrict writeonly buffer tileQueueBufferSSBO layout(location = 0) uniform int tileSize; layout(location = 1) uniform float scale; +layout(location = 2) uniform int pathBufferStart; +layout(location = 3) uniform int pathQueueBufferStart; void main() { uint pathIndex = gl_WorkGroupID.x; - const mg_gl_path path = pathBuffer.elements[pathIndex]; + const mg_gl_path path = pathBuffer.elements[pathIndex + pathBufferStart]; //NOTE: we don't clip on the right, since we need those tiles to accurately compute // the prefix sum of winding increments in the backprop pass. @@ -48,8 +50,8 @@ void main() int tileQueuesIndex = atomicAdd(tileQueueCountBuffer.elements[0], tileCount); - pathQueueBuffer.elements[pathIndex].area = ivec4(firstTile.x, firstTile.y, nTilesX, nTilesY); - pathQueueBuffer.elements[pathIndex].tileQueues = tileQueuesIndex; + pathQueueBuffer.elements[pathQueueBufferStart + pathIndex].area = ivec4(firstTile.x, firstTile.y, nTilesX, nTilesY); + pathQueueBuffer.elements[pathQueueBufferStart + pathIndex].tileQueues = tileQueuesIndex; for(int i=0; i