[gl canvas] balance dispatch of raster shader along 2 dimensions to avoid hitting the max workgroup count per dimension
This commit is contained in:
parent
3c103eeb65
commit
b300cc4d7d
|
@ -4,7 +4,7 @@ setlocal EnableDelayedExpansion
|
||||||
|
|
||||||
if not exist bin mkdir bin
|
if not exist bin mkdir bin
|
||||||
|
|
||||||
set glsl_shaders=src\glsl_shaders\common.glsl src\glsl_shaders\blit_vertex.glsl src\glsl_shaders\blit_fragment.glsl src\glsl_shaders\path_setup.glsl src\glsl_shaders\segment_setup.glsl src\glsl_shaders\backprop.glsl src\glsl_shaders\merge.glsl src\glsl_shaders\raster.glsl
|
set glsl_shaders=src\glsl_shaders\common.glsl src\glsl_shaders\blit_vertex.glsl src\glsl_shaders\blit_fragment.glsl src\glsl_shaders\path_setup.glsl src\glsl_shaders\segment_setup.glsl src\glsl_shaders\backprop.glsl src\glsl_shaders\merge.glsl src\glsl_shaders\raster.glsl src\glsl_shaders\balance_workgroups.glsl
|
||||||
|
|
||||||
call python3 scripts\embed_text.py %glsl_shaders% --prefix=glsl_ --output src\glsl_shaders.h
|
call python3 scripts\embed_text.py %glsl_shaders% --prefix=glsl_ --output src\glsl_shaders.h
|
||||||
|
|
||||||
|
|
|
@ -149,6 +149,7 @@ typedef struct mg_gl_canvas_backend
|
||||||
GLuint segmentSetup;
|
GLuint segmentSetup;
|
||||||
GLuint backprop;
|
GLuint backprop;
|
||||||
GLuint merge;
|
GLuint merge;
|
||||||
|
GLuint balanceWorkgroups;
|
||||||
GLuint raster;
|
GLuint raster;
|
||||||
GLuint blit;
|
GLuint blit;
|
||||||
|
|
||||||
|
@ -167,6 +168,7 @@ typedef struct mg_gl_canvas_backend
|
||||||
GLuint tileOpBuffer;
|
GLuint tileOpBuffer;
|
||||||
GLuint tileOpCountBuffer;
|
GLuint tileOpCountBuffer;
|
||||||
GLuint screenTilesBuffer;
|
GLuint screenTilesBuffer;
|
||||||
|
GLuint screenTilesCountBuffer;
|
||||||
GLuint rasterDispatchBuffer;
|
GLuint rasterDispatchBuffer;
|
||||||
GLuint dummyVertexBuffer;
|
GLuint dummyVertexBuffer;
|
||||||
|
|
||||||
|
@ -1095,6 +1097,9 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
|
||||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->rasterDispatchBuffer);
|
glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->rasterDispatchBuffer);
|
||||||
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(mg_gl_dispatch_indirect_command), &zero, GL_DYNAMIC_COPY);
|
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(mg_gl_dispatch_indirect_command), &zero, GL_DYNAMIC_COPY);
|
||||||
|
|
||||||
|
glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->screenTilesCountBuffer);
|
||||||
|
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(int), &zero, GL_DYNAMIC_COPY);
|
||||||
|
|
||||||
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
|
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
|
||||||
|
|
||||||
int err = glGetError();
|
int err = glGetError();
|
||||||
|
@ -1210,7 +1215,7 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
|
||||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->tileOpCountBuffer);
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->tileOpCountBuffer);
|
||||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, backend->tileOpBuffer);
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, backend->tileOpBuffer);
|
||||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, backend->screenTilesBuffer);
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, backend->screenTilesBuffer);
|
||||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, backend->rasterDispatchBuffer);
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, backend->screenTilesCountBuffer);
|
||||||
|
|
||||||
glUniform1i(0, tileSize);
|
glUniform1i(0, tileSize);
|
||||||
glUniform1f(1, scale);
|
glUniform1f(1, scale);
|
||||||
|
@ -1239,6 +1244,17 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
|
||||||
log_error("gl error %i\n", err);
|
log_error("gl error %i\n", err);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//NOTE: balance work groups
|
||||||
|
glUseProgram(backend->balanceWorkgroups);
|
||||||
|
|
||||||
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, backend->screenTilesCountBuffer);
|
||||||
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->rasterDispatchBuffer);
|
||||||
|
glUniform1ui(0, maxWorkGroupCount);
|
||||||
|
|
||||||
|
glDispatchCompute(1, 1, 1);
|
||||||
|
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
|
||||||
|
|
||||||
//NOTE: raster pass
|
//NOTE: raster pass
|
||||||
glUseProgram(backend->raster);
|
glUseProgram(backend->raster);
|
||||||
|
|
||||||
|
@ -1246,6 +1262,7 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
|
||||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->segmentBuffer);
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->segmentBuffer);
|
||||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, backend->tileOpBuffer);
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, backend->tileOpBuffer);
|
||||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->screenTilesBuffer);
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->screenTilesBuffer);
|
||||||
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, backend->screenTilesCountBuffer);
|
||||||
|
|
||||||
glUniform1f(0, scale);
|
glUniform1f(0, scale);
|
||||||
glUniform1i(1, backend->msaaCount);
|
glUniform1i(1, backend->msaaCount);
|
||||||
|
@ -1265,6 +1282,7 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
|
||||||
}
|
}
|
||||||
|
|
||||||
glUniform1i(3, backend->pathBatchStart);
|
glUniform1i(3, backend->pathBatchStart);
|
||||||
|
glUniform1ui(4, maxWorkGroupCount);
|
||||||
|
|
||||||
glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, backend->rasterDispatchBuffer);
|
glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, backend->rasterDispatchBuffer);
|
||||||
glDispatchComputeIndirect(0);
|
glDispatchComputeIndirect(0);
|
||||||
|
@ -1659,6 +1677,7 @@ mg_canvas_backend* gl_canvas_backend_create(mg_wgl_surface* surface)
|
||||||
err |= mg_gl_canvas_compile_compute_program(glsl_segment_setup, &backend->segmentSetup);
|
err |= mg_gl_canvas_compile_compute_program(glsl_segment_setup, &backend->segmentSetup);
|
||||||
err |= mg_gl_canvas_compile_compute_program(glsl_backprop, &backend->backprop);
|
err |= mg_gl_canvas_compile_compute_program(glsl_backprop, &backend->backprop);
|
||||||
err |= mg_gl_canvas_compile_compute_program(glsl_merge, &backend->merge);
|
err |= mg_gl_canvas_compile_compute_program(glsl_merge, &backend->merge);
|
||||||
|
err |= mg_gl_canvas_compile_compute_program(glsl_balance_workgroups, &backend->balanceWorkgroups);
|
||||||
err |= mg_gl_canvas_compile_compute_program(glsl_raster, &backend->raster);
|
err |= mg_gl_canvas_compile_compute_program(glsl_raster, &backend->raster);
|
||||||
err |= mg_gl_canvas_compile_render_program("blit", glsl_blit_vertex, glsl_blit_fragment, &backend->blit);
|
err |= mg_gl_canvas_compile_render_program("blit", glsl_blit_vertex, glsl_blit_fragment, &backend->blit);
|
||||||
|
|
||||||
|
@ -1744,11 +1763,14 @@ mg_canvas_backend* gl_canvas_backend_create(mg_wgl_surface* surface)
|
||||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->screenTilesBuffer);
|
glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->screenTilesBuffer);
|
||||||
glBufferData(GL_SHADER_STORAGE_BUFFER, nTilesX*nTilesY*MG_GL_SCREEN_TILE_SIZE, 0, GL_DYNAMIC_COPY);
|
glBufferData(GL_SHADER_STORAGE_BUFFER, nTilesX*nTilesY*MG_GL_SCREEN_TILE_SIZE, 0, GL_DYNAMIC_COPY);
|
||||||
|
|
||||||
|
glGenBuffers(1, &backend->screenTilesCountBuffer);
|
||||||
|
glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->screenTilesCountBuffer);
|
||||||
|
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(int), 0, GL_DYNAMIC_COPY);
|
||||||
|
|
||||||
glGenBuffers(1, &backend->rasterDispatchBuffer);
|
glGenBuffers(1, &backend->rasterDispatchBuffer);
|
||||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->rasterDispatchBuffer);
|
glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->rasterDispatchBuffer);
|
||||||
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(mg_gl_dispatch_indirect_command), 0, GL_DYNAMIC_COPY);
|
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(mg_gl_dispatch_indirect_command), 0, GL_DYNAMIC_COPY);
|
||||||
|
|
||||||
|
|
||||||
if(err)
|
if(err)
|
||||||
{
|
{
|
||||||
mg_gl_canvas_destroy((mg_canvas_backend*)backend);
|
mg_gl_canvas_destroy((mg_canvas_backend*)backend);
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
|
||||||
|
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
|
||||||
|
|
||||||
|
precision mediump float;
|
||||||
|
layout(std430) buffer;
|
||||||
|
|
||||||
|
layout(binding = 0) coherent restrict readonly buffer screenTilesCountBufferSSBO
|
||||||
|
{
|
||||||
|
int elements[];
|
||||||
|
} screenTilesCountBuffer;
|
||||||
|
|
||||||
|
layout(binding = 1) coherent restrict writeonly buffer dispatchBufferSSBO
|
||||||
|
{
|
||||||
|
mg_gl_dispatch_indirect_command elements[];
|
||||||
|
} dispatchBuffer;
|
||||||
|
|
||||||
|
|
||||||
|
layout(location = 0) uniform uint maxWorkGroupCount;
|
||||||
|
|
||||||
|
void main()
|
||||||
|
{
|
||||||
|
uint totalWorkGroupCount = screenTilesCountBuffer.elements[0];
|
||||||
|
|
||||||
|
dispatchBuffer.elements[0].num_groups_x = totalWorkGroupCount > maxWorkGroupCount ? maxWorkGroupCount : totalWorkGroupCount;
|
||||||
|
dispatchBuffer.elements[0].num_groups_y = (totalWorkGroupCount + maxWorkGroupCount - 1) / maxWorkGroupCount;
|
||||||
|
dispatchBuffer.elements[0].num_groups_z = 1;
|
||||||
|
}
|
|
@ -34,10 +34,10 @@ layout(binding = 5) restrict writeonly buffer screenTilesBufferSSBO
|
||||||
mg_gl_screen_tile elements[];
|
mg_gl_screen_tile elements[];
|
||||||
} screenTilesBuffer;
|
} screenTilesBuffer;
|
||||||
|
|
||||||
layout(binding = 6) coherent restrict buffer dispatchBufferSSBO
|
layout(binding = 6) coherent restrict buffer screenTilesCountBufferSSBO
|
||||||
{
|
{
|
||||||
mg_gl_dispatch_indirect_command elements[];
|
int elements[];
|
||||||
} dispatchBuffer;
|
} screenTilesCountBuffer;
|
||||||
|
|
||||||
|
|
||||||
layout(location = 0) uniform int tileSize;
|
layout(location = 0) uniform int tileSize;
|
||||||
|
@ -53,9 +53,6 @@ void main()
|
||||||
|
|
||||||
int lastOpIndex = -1;
|
int lastOpIndex = -1;
|
||||||
|
|
||||||
dispatchBuffer.elements[0].num_groups_y = 1;
|
|
||||||
dispatchBuffer.elements[0].num_groups_z = 1;
|
|
||||||
|
|
||||||
for(int pathIndex = 0; pathIndex < pathCount; pathIndex++)
|
for(int pathIndex = 0; pathIndex < pathCount; pathIndex++)
|
||||||
{
|
{
|
||||||
mg_gl_path_queue pathQueue = pathQueueBuffer.elements[pathIndex];
|
mg_gl_path_queue pathQueue = pathQueueBuffer.elements[pathIndex];
|
||||||
|
@ -75,7 +72,7 @@ void main()
|
||||||
{
|
{
|
||||||
if(tileIndex < 0)
|
if(tileIndex < 0)
|
||||||
{
|
{
|
||||||
tileIndex = int(atomicAdd(dispatchBuffer.elements[0].num_groups_x, 1));
|
tileIndex = int(atomicAdd(screenTilesCountBuffer.elements[0], 1));
|
||||||
screenTilesBuffer.elements[tileIndex].tileCoord = uvec2(tileCoord);
|
screenTilesBuffer.elements[tileIndex].tileCoord = uvec2(tileCoord);
|
||||||
screenTilesBuffer.elements[tileIndex].first = -1;
|
screenTilesBuffer.elements[tileIndex].first = -1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,17 +24,30 @@ layout(binding = 3) restrict readonly buffer screenTilesBufferSSBO
|
||||||
mg_gl_screen_tile elements[];
|
mg_gl_screen_tile elements[];
|
||||||
} screenTilesBuffer;
|
} screenTilesBuffer;
|
||||||
|
|
||||||
|
layout(binding = 4) restrict readonly buffer screenTilesCountBufferSSBO
|
||||||
|
{
|
||||||
|
int elements[];
|
||||||
|
} screenTilesCountBuffer;
|
||||||
|
|
||||||
|
|
||||||
layout(location = 0) uniform float scale;
|
layout(location = 0) uniform float scale;
|
||||||
layout(location = 1) uniform int msaaSampleCount;
|
layout(location = 1) uniform int msaaSampleCount;
|
||||||
layout(location = 2) uniform uint useTexture;
|
layout(location = 2) uniform uint useTexture;
|
||||||
layout(location = 3) uniform int pathBufferStart;
|
layout(location = 3) uniform int pathBufferStart;
|
||||||
|
layout(location = 4) uniform uint maxWorkGroupCount;
|
||||||
|
|
||||||
layout(rgba8, binding = 0) uniform restrict writeonly image2D outTexture;
|
layout(rgba8, binding = 0) uniform restrict writeonly image2D outTexture;
|
||||||
layout(binding = 1) uniform sampler2D srcTexture;
|
layout(binding = 1) uniform sampler2D srcTexture;
|
||||||
|
|
||||||
void main()
|
void main()
|
||||||
{
|
{
|
||||||
uint tileIndex = gl_WorkGroupID.x;
|
uint tileIndex = gl_WorkGroupID.y * maxWorkGroupCount + gl_WorkGroupID.x;
|
||||||
|
|
||||||
|
if(tileIndex >= screenTilesCountBuffer.elements[0])
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
uvec2 tileCoord = screenTilesBuffer.elements[tileIndex].tileCoord;
|
uvec2 tileCoord = screenTilesBuffer.elements[tileIndex].tileCoord;
|
||||||
ivec2 pixelCoord = ivec2(tileCoord * gl_WorkGroupSize.x + gl_LocalInvocationID.xy);
|
ivec2 pixelCoord = ivec2(tileCoord * gl_WorkGroupSize.x + gl_LocalInvocationID.xy);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue