From 680deb35b0db77f220f76af1df205b439fdf174d Mon Sep 17 00:00:00 2001 From: martinfouilleul Date: Thu, 27 Jul 2023 11:37:39 +0200 Subject: [PATCH 1/5] [win32, gl canvas] grow path and element input buffers as needed --- examples/perf_text/main.c | 2 +- examples/tiger/main.c | 8 +----- src/gl_canvas.c | 59 +++++++++++++++++++++++++++++++++++++-- 3 files changed, 58 insertions(+), 11 deletions(-) diff --git a/examples/perf_text/main.c b/examples/perf_text/main.c index 52328eb..bcb9952 100644 --- a/examples/perf_text/main.c +++ b/examples/perf_text/main.c @@ -201,7 +201,7 @@ int main() f32 trackX = mousePos.x/zoom - startX; f32 trackY = mousePos.y/zoom - startY; - zoom *= 1 + event->move.deltaY * 0.01; + zoom *= 1 + event->mouse.deltaY * 0.01; zoom = Clamp(zoom, 0.2, 10); startX = mousePos.x/zoom - trackX; diff --git a/examples/tiger/main.c b/examples/tiger/main.c index ea51073..42831f2 100644 --- a/examples/tiger/main.c +++ b/examples/tiger/main.c @@ -108,12 +108,6 @@ int main() mp_request_quit(); } break; - case MP_EVENT_WINDOW_RESIZE: - { - mp_rect frame = {0, 0, event->frame.rect.w, event->frame.rect.h}; - mg_surface_set_frame(surface, frame); - } break; - case MP_EVENT_MOUSE_BUTTON: { if(event->key.code == MP_MOUSE_LEFT) @@ -138,7 +132,7 @@ int main() f32 pinX = (mousePos.x - startX)/zoom; f32 pinY = (mousePos.y - startY)/zoom; - zoom *= 1 + event->move.deltaY * 0.01; + zoom *= 1 + event->mouse.deltaY * 0.01; zoom = Clamp(zoom, 0.5, 5); startX = mousePos.x - pinX*zoom; diff --git a/src/gl_canvas.c b/src/gl_canvas.c index 30c6b35..4fbdcc0 100644 --- a/src/gl_canvas.c +++ b/src/gl_canvas.c @@ -182,9 +182,47 @@ static void mg_update_path_extents(vec4* extents, vec2 p) extents->w = maximum(extents->w, p.y); } +void mg_gl_grow_input_buffer(mg_gl_mapped_buffer* buffer, int copyStart, int copySize, int newSize) +{ + mg_gl_mapped_buffer newBuffer = {0}; + newBuffer.size = newSize; + glGenBuffers(1, &newBuffer.buffer); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, newBuffer.buffer); + glBufferStorage(GL_SHADER_STORAGE_BUFFER, newBuffer.size, 0, GL_MAP_WRITE_BIT|GL_MAP_PERSISTENT_BIT); + newBuffer.contents = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, + 0, + newBuffer.size, + GL_MAP_WRITE_BIT + |GL_MAP_PERSISTENT_BIT + |GL_MAP_FLUSH_EXPLICIT_BIT); + + memcpy(newBuffer.contents + copyStart, buffer->contents + copyStart, copySize); + + glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer->buffer); + glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); + glDeleteBuffers(1, &buffer->buffer); + + *buffer = newBuffer; +} + void mg_gl_canvas_encode_element(mg_gl_canvas_backend* backend, mg_path_elt_type kind, vec2* p) { - mg_gl_path_elt* elementData = (mg_gl_path_elt*)backend->elementBuffer[backend->bufferIndex].contents; + int bufferIndex = backend->bufferIndex; + int bufferCap = backend->elementBuffer[bufferIndex].size / sizeof(mg_gl_path_elt); + if(backend->eltCount >= bufferCap) + { + int newBufferCap = (int)(bufferCap * 1.5); + int newBufferSize = newBufferCap * sizeof(mg_gl_path_elt); + + log_info("growing element buffer to %i elements\n", newBufferCap); + + mg_gl_grow_input_buffer(&backend->elementBuffer[bufferIndex], + backend->eltBatchStart * sizeof(mg_gl_path_elt), + backend->eltCount * sizeof(mg_gl_path_elt), + newBufferSize); + } + + mg_gl_path_elt* elementData = (mg_gl_path_elt*)backend->elementBuffer[bufferIndex].contents; mg_gl_path_elt* elt = &elementData[backend->eltCount]; backend->eltCount++; @@ -224,6 +262,21 @@ void mg_gl_canvas_encode_element(mg_gl_canvas_backend* backend, mg_path_elt_type void mg_gl_canvas_encode_path(mg_gl_canvas_backend* backend, mg_primitive* primitive, f32 scale) { + int bufferIndex = backend->bufferIndex; + int bufferCap = backend->pathBuffer[bufferIndex].size / sizeof(mg_gl_path); + if(backend->pathCount >= bufferCap) + { + int newBufferCap = (int)(bufferCap * 1.5); + int newBufferSize = newBufferCap * sizeof(mg_gl_path); + + log_info("growing path buffer to %i elements\n", newBufferCap); + + mg_gl_grow_input_buffer(&backend->pathBuffer[bufferIndex], + backend->pathBatchStart * sizeof(mg_gl_path), + backend->eltCount * sizeof(mg_gl_path), + newBufferSize); + } + mg_gl_path* pathData = (mg_gl_path*)backend->pathBuffer[backend->bufferIndex].contents; mg_gl_path* path = &pathData[backend->pathCount]; backend->pathCount++; @@ -1519,8 +1572,8 @@ int mg_gl_canvas_compile_render_program_named(const char* progName, #define mg_gl_canvas_compile_render_program(progName, shaderSrc, vertexSrc, out) \ mg_gl_canvas_compile_render_program_named(progName, #shaderSrc, #vertexSrc, shaderSrc, vertexSrc, out) -const u32 MG_GL_PATH_BUFFER_SIZE = (4<<20)*sizeof(mg_gl_path), - MG_GL_ELEMENT_BUFFER_SIZE = (4<<20)*sizeof(mg_gl_path_elt), +const u32 MG_GL_PATH_BUFFER_SIZE = (4<<10)*sizeof(mg_gl_path), + MG_GL_ELEMENT_BUFFER_SIZE = (4<<12)*sizeof(mg_gl_path_elt), MG_GL_SEGMENT_BUFFER_SIZE = (4<<20)*sizeof(mg_gl_segment), MG_GL_PATH_QUEUE_BUFFER_SIZE = (4<<20)*sizeof(mg_gl_path_queue), MG_GL_TILE_QUEUE_BUFFER_SIZE = (4<<20)*sizeof(mg_gl_tile_queue), From a35f0b82b2747d9370d749d03d174ff2c2c0e1b8 Mon Sep 17 00:00:00 2001 From: martinfouilleul Date: Thu, 27 Jul 2023 12:11:30 +0200 Subject: [PATCH 2/5] [win32, gl canvas] bound check intermediate buffers in canvas shaders --- examples/tiger/main.c | 5 + src/glsl_shaders/merge.glsl | 13 ++ src/glsl_shaders/path_setup.glsl | 22 +- src/glsl_shaders/segment_setup.glsl | 336 +++++++++++++++------------- 4 files changed, 208 insertions(+), 168 deletions(-) diff --git a/examples/tiger/main.c b/examples/tiger/main.c index 42831f2..761daa2 100644 --- a/examples/tiger/main.c +++ b/examples/tiger/main.c @@ -62,6 +62,11 @@ int main() //NOTE: create surface mg_surface surface = mg_surface_create_for_window(window, MG_CANVAS); + if(mg_surface_is_nil(surface)) + { + log_error("Couln't create surface\n"); + return(-1); + } mg_surface_swap_interval(surface, 0); //TODO: create canvas diff --git a/src/glsl_shaders/merge.glsl b/src/glsl_shaders/merge.glsl index fb796a3..a119305 100644 --- a/src/glsl_shaders/merge.glsl +++ b/src/glsl_shaders/merge.glsl @@ -106,6 +106,11 @@ void main() // Additionally if color is opaque and tile is fully inside clip, trim tile list. int pathOpIndex = atomicAdd(tileOpCountBuffer.elements[0], 1); + if(pathOpIndex >= tileOpBuffer.elements.length()) + { + return; + } + tileOpBuffer.elements[pathOpIndex].kind = MG_GL_OP_CLIP_FILL; tileOpBuffer.elements[pathOpIndex].next = -1; tileOpBuffer.elements[pathOpIndex].index = pathIndex; @@ -141,6 +146,10 @@ void main() { //NOTE: add path start op (with winding offset) int startOpIndex = atomicAdd(tileOpCountBuffer.elements[0], 1); + if(startOpIndex >= tileOpBuffer.elements.length()) + { + return; + } tileOpBuffer.elements[startOpIndex].kind = MG_GL_OP_START; tileOpBuffer.elements[startOpIndex].next = -1; @@ -163,6 +172,10 @@ void main() //NOTE: add path end op int endOpIndex = atomicAdd(tileOpCountBuffer.elements[0], 1); + if(endOpIndex >= tileOpBuffer.elements.length()) + { + return; + } tileOpBuffer.elements[endOpIndex].kind = MG_GL_OP_END; tileOpBuffer.elements[endOpIndex].next = -1; diff --git a/src/glsl_shaders/path_setup.glsl b/src/glsl_shaders/path_setup.glsl index fe711da..177247d 100644 --- a/src/glsl_shaders/path_setup.glsl +++ b/src/glsl_shaders/path_setup.glsl @@ -50,13 +50,21 @@ void main() int tileQueuesIndex = atomicAdd(tileQueueCountBuffer.elements[0], tileCount); - pathQueueBuffer.elements[pathQueueBufferStart + pathIndex].area = ivec4(firstTile.x, firstTile.y, nTilesX, nTilesY); - pathQueueBuffer.elements[pathQueueBufferStart + pathIndex].tileQueues = tileQueuesIndex; - - for(int i=0; i= tileQueueBuffer.elements.length()) { - tileQueueBuffer.elements[tileQueuesIndex + i].first = -1; - tileQueueBuffer.elements[tileQueuesIndex + i].last = -1; - tileQueueBuffer.elements[tileQueuesIndex + i].windingOffset = 0; + pathQueueBuffer.elements[pathIndex].area = ivec4(0); + pathQueueBuffer.elements[pathIndex].tileQueues = 0; + } + else + { + pathQueueBuffer.elements[pathQueueBufferStart + pathIndex].area = ivec4(firstTile.x, firstTile.y, nTilesX, nTilesY); + pathQueueBuffer.elements[pathQueueBufferStart + pathIndex].tileQueues = tileQueuesIndex; + + for(int i=0; i= s.y; + bool goingRight = e.x >= s.x; + + vec4 box = vec4(min(s.x, e.x), + min(s.y, e.y), + max(s.x, e.x), + max(s.y, e.y)); + + segmentBuffer.elements[segIndex].kind = kind; + segmentBuffer.elements[segIndex].pathIndex = pathIndex; + segmentBuffer.elements[segIndex].windingIncrement = goingUp ? 1 : -1; + segmentBuffer.elements[segIndex].box = box; + + float dx = c.x - box.x; + float dy = c.y - box.y; + float alpha = (box.w - box.y)/(box.z - box.x); + float ofs = box.w - box.y; + + if(goingUp == goingRight) + { + if(kind == MG_GL_LINE) + { + segmentBuffer.elements[segIndex].config = MG_GL_BR; + } + else if(dy > alpha*dx) + { + segmentBuffer.elements[segIndex].config = MG_GL_TL; } else { - c = p[1]; + segmentBuffer.elements[segIndex].config = MG_GL_BR; } - e = p[3]; - } break; - } - - bool goingUp = e.y >= s.y; - bool goingRight = e.x >= s.x; - - vec4 box = vec4(min(s.x, e.x), - min(s.y, e.y), - max(s.x, e.x), - max(s.y, e.y)); - - segmentBuffer.elements[segIndex].kind = kind; - segmentBuffer.elements[segIndex].pathIndex = pathIndex; - segmentBuffer.elements[segIndex].windingIncrement = goingUp ? 1 : -1; - segmentBuffer.elements[segIndex].box = box; - - float dx = c.x - box.x; - float dy = c.y - box.y; - float alpha = (box.w - box.y)/(box.z - box.x); - float ofs = box.w - box.y; - - if(goingUp == goingRight) - { - if(kind == MG_GL_LINE) - { - segmentBuffer.elements[segIndex].config = MG_GL_BR; - } - else if(dy > alpha*dx) - { - segmentBuffer.elements[segIndex].config = MG_GL_TL; } else { - segmentBuffer.elements[segIndex].config = MG_GL_BR; + if(kind == MG_GL_LINE) + { + segmentBuffer.elements[segIndex].config = MG_GL_TR; + } + else if(dy < ofs - alpha*dx) + { + segmentBuffer.elements[segIndex].config = MG_GL_BL; + } + else + { + segmentBuffer.elements[segIndex].config = MG_GL_TR; + } } } - else - { - if(kind == MG_GL_LINE) - { - segmentBuffer.elements[segIndex].config = MG_GL_TR; - } - else if(dy < ofs - alpha*dx) - { - segmentBuffer.elements[segIndex].config = MG_GL_BL; - } - else - { - segmentBuffer.elements[segIndex].config = MG_GL_TR; - } - } - return(segIndex); } @@ -229,9 +235,11 @@ int push_segment(in vec2 p[4], int kind, int pathIndex) void line_setup(vec2 p[4], int pathIndex) { int segIndex = push_segment(p, MG_GL_LINE, pathIndex); - segmentBuffer.elements[segIndex].hullVertex = p[0]; - - bin_to_tiles(segIndex); + if(segIndex < segmentBuffer.elements.length()) + { + segmentBuffer.elements[segIndex].hullVertex = p[0]; + bin_to_tiles(segIndex); + } } vec2 quadratic_blossom(vec2 p[4], float u, float v) @@ -298,27 +306,30 @@ void quadratic_emit(vec2 p[4], int pathIndex) { int segIndex = push_segment(p, MG_GL_QUADRATIC, pathIndex); - //NOTE: compute implicit equation matrix - float det = p[0].x*(p[1].y-p[2].y) + p[1].x*(p[2].y-p[0].y) + p[2].x*(p[0].y - p[1].y); + if(segIndex < segmentBuffer.elements.length()) + { + //NOTE: compute implicit equation matrix + float det = p[0].x*(p[1].y-p[2].y) + p[1].x*(p[2].y-p[0].y) + p[2].x*(p[0].y - p[1].y); - float a = p[0].y - p[1].y + 0.5*(p[2].y - p[0].y); - float b = p[1].x - p[0].x + 0.5*(p[0].x - p[2].x); - float c = p[0].x*p[1].y - p[1].x*p[0].y + 0.5*(p[2].x*p[0].y - p[0].x*p[2].y); - float d = p[0].y - p[1].y; - float e = p[1].x - p[0].x; - float f = p[0].x*p[1].y - p[1].x*p[0].y; + float a = p[0].y - p[1].y + 0.5*(p[2].y - p[0].y); + float b = p[1].x - p[0].x + 0.5*(p[0].x - p[2].x); + float c = p[0].x*p[1].y - p[1].x*p[0].y + 0.5*(p[2].x*p[0].y - p[0].x*p[2].y); + float d = p[0].y - p[1].y; + float e = p[1].x - p[0].x; + float f = p[0].x*p[1].y - p[1].x*p[0].y; - float flip = ( segmentBuffer.elements[segIndex].config == MG_GL_TL - || segmentBuffer.elements[segIndex].config == MG_GL_BL)? -1 : 1; + float flip = ( segmentBuffer.elements[segIndex].config == MG_GL_TL + || segmentBuffer.elements[segIndex].config == MG_GL_BL)? -1 : 1; - float g = flip*(p[2].x*(p[0].y - p[1].y) + p[0].x*(p[1].y - p[2].y) + p[1].x*(p[2].y - p[0].y)); + float g = flip*(p[2].x*(p[0].y - p[1].y) + p[0].x*(p[1].y - p[2].y) + p[1].x*(p[2].y - p[0].y)); - segmentBuffer.elements[segIndex].implicitMatrix = (1/det)*mat3(a, d, 0., - b, e, 0., - c, f, g); - segmentBuffer.elements[segIndex].hullVertex = p[1]; + segmentBuffer.elements[segIndex].implicitMatrix = (1/det)*mat3(a, d, 0., + b, e, 0., + c, f, g); + segmentBuffer.elements[segIndex].hullVertex = p[1]; - bin_to_tiles(segIndex); + bin_to_tiles(segIndex); + } } void quadratic_setup(vec2 p[4], int pathIndex) @@ -654,71 +665,74 @@ void cubic_emit(cubic_info curve, vec2 p[4], float s0, float s1, vec2 sp[4], int { int segIndex = push_segment(sp, MG_GL_CUBIC, pathIndex); - vec2 v0 = p[0]; - vec2 v1 = p[3]; - vec2 v2; - mat3 K; - - //TODO: haul that up in caller - float sqrNorm0 = dot(p[1]-p[0], p[1]-p[0]); - float sqrNorm1 = dot(p[2]-p[3], p[2]-p[3]); - - if(dot(p[0]-p[3], p[0]-p[3]) > 1e-5) + if(segIndex < segmentBuffer.elements.length()) { - if(sqrNorm0 >= sqrNorm1) - { - v2 = p[1]; - K = mat3(curve.K[0].xyz, curve.K[3].xyz, curve.K[1].xyz); + vec2 v0 = p[0]; + vec2 v1 = p[3]; + vec2 v2; + mat3 K; + + //TODO: haul that up in caller + float sqrNorm0 = dot(p[1]-p[0], p[1]-p[0]); + float sqrNorm1 = dot(p[2]-p[3], p[2]-p[3]); + + if(dot(p[0]-p[3], p[0]-p[3]) > 1e-5) + { + if(sqrNorm0 >= sqrNorm1) + { + v2 = p[1]; + K = mat3(curve.K[0].xyz, curve.K[3].xyz, curve.K[1].xyz); + } + else + { + v2 = p[2]; + K = mat3(curve.K[0].xyz, curve.K[3].xyz, curve.K[2].xyz); + } } else { + v1 = p[1]; v2 = p[2]; - K = mat3(curve.K[0].xyz, curve.K[3].xyz, curve.K[2].xyz); + K = mat3(curve.K[0].xyz, curve.K[1].xyz, curve.K[2].xyz); } - } - else - { - v1 = p[1]; - v2 = p[2]; - K = mat3(curve.K[0].xyz, curve.K[1].xyz, curve.K[2].xyz); - } - //NOTE: set matrices + //NOTE: set matrices - //TODO: should we compute matrix relative to a base point to avoid loss of precision - // when computing barycentric matrix? + //TODO: should we compute matrix relative to a base point to avoid loss of precision + // when computing barycentric matrix? - mat3 B = barycentric_matrix(v0, v1, v2); + mat3 B = barycentric_matrix(v0, v1, v2); - segmentBuffer.elements[segIndex].implicitMatrix = K*B; - segmentBuffer.elements[segIndex].hullVertex = select_hull_vertex(sp[0], sp[1], sp[2], sp[3]); + segmentBuffer.elements[segIndex].implicitMatrix = K*B; + segmentBuffer.elements[segIndex].hullVertex = select_hull_vertex(sp[0], sp[1], sp[2], sp[3]); - //NOTE: compute sign flip - segmentBuffer.elements[segIndex].sign = 1; + //NOTE: compute sign flip + segmentBuffer.elements[segIndex].sign = 1; - if( curve.kind == CUBIC_SERPENTINE - || curve.kind == CUBIC_CUSP) - { - segmentBuffer.elements[segIndex].sign = (curve.d1 < 0)? -1 : 1; + if( curve.kind == CUBIC_SERPENTINE + || curve.kind == CUBIC_CUSP) + { + segmentBuffer.elements[segIndex].sign = (curve.d1 < 0)? -1 : 1; + } + else if(curve.kind == CUBIC_LOOP) + { + float d1 = curve.d1; + float d2 = curve.d2; + float d3 = curve.d3; + + float H0 = d3*d1-square(d2) + d1*d2*s0 - square(d1)*square(s0); + float H1 = d3*d1-square(d2) + d1*d2*s1 - square(d1)*square(s1); + float H = (abs(H0) > abs(H1)) ? H0 : H1; + segmentBuffer.elements[segIndex].sign = (H*d1 > 0) ? -1 : 1; + } + + if(sp[3].y > sp[0].y) + { + segmentBuffer.elements[segIndex].sign *= -1; + } + + //NOTE: bin to tiles + bin_to_tiles(segIndex); } - else if(curve.kind == CUBIC_LOOP) - { - float d1 = curve.d1; - float d2 = curve.d2; - float d3 = curve.d3; - - float H0 = d3*d1-square(d2) + d1*d2*s0 - square(d1)*square(s0); - float H1 = d3*d1-square(d2) + d1*d2*s1 - square(d1)*square(s1); - float H = (abs(H0) > abs(H1)) ? H0 : H1; - segmentBuffer.elements[segIndex].sign = (H*d1 > 0) ? -1 : 1; - } - - if(sp[3].y > sp[0].y) - { - segmentBuffer.elements[segIndex].sign *= -1; - } - - //NOTE: bin to tiles - bin_to_tiles(segIndex); } void cubic_setup(vec2 p[4], int pathIndex) From ae862b39ba10dac8d4b9d0d8562bcbfad2312f5f Mon Sep 17 00:00:00 2001 From: martinfouilleul Date: Thu, 27 Jul 2023 12:40:52 +0200 Subject: [PATCH 3/5] [win32, gl canvas] grow intermediate gl buffers as needed --- src/gl_canvas.c | 51 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/src/gl_canvas.c b/src/gl_canvas.c index 4fbdcc0..80a855b 100644 --- a/src/gl_canvas.c +++ b/src/gl_canvas.c @@ -172,6 +172,9 @@ typedef struct mg_gl_canvas_backend vec4 pathScreenExtents; vec4 pathUserExtents; + int maxTileQueueCount; + int maxSegmentCount; + } mg_gl_canvas_backend; static void mg_update_path_extents(vec4* extents, vec2 p) @@ -231,16 +234,19 @@ void mg_gl_canvas_encode_element(mg_gl_canvas_backend* backend, mg_path_elt_type switch(kind) { case MG_PATH_LINE: + backend->maxSegmentCount += 1; elt->kind = MG_GL_LINE; count = 2; break; case MG_PATH_QUADRATIC: + backend->maxSegmentCount += 3; elt->kind = MG_GL_QUADRATIC; count = 3; break; case MG_PATH_CUBIC: + backend->maxSegmentCount += 7; elt->kind = MG_GL_CUBIC; count = 4; break; @@ -346,6 +352,10 @@ void mg_gl_canvas_encode_path(mg_gl_canvas_backend* backend, mg_primitive* primi path->uvTransform[10] = 1; path->uvTransform[11] = 0; } + + int nTilesX = ((path->box.z - path->box.x)*scale - 1) / MG_GL_TILE_SIZE + 1; + int nTilesY = ((path->box.w - path->box.y)*scale - 1) / MG_GL_TILE_SIZE + 1; + backend->maxTileQueueCount += (nTilesX * nTilesY); } bool mg_intersect_hull_legs(vec2 p0, vec2 p1, vec2 p2, vec2 p3, vec2* intersection) @@ -1001,6 +1011,25 @@ void mg_gl_encode_stroke(mg_gl_canvas_backend* backend, } } +void mg_gl_grow_buffer_if_needed(GLuint buffer, i32 wantedSize, const char* name) +{ + i32 oldSize = 0; + glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer); + glGetBufferParameteriv(GL_SHADER_STORAGE_BUFFER, GL_BUFFER_SIZE, &oldSize); + + if(oldSize < wantedSize) + { + log_info("growing %s buffer\n", name); + + int newSize = wantedSize * 1.2; + + glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer); + glBufferData(GL_SHADER_STORAGE_BUFFER, newSize, 0, GL_DYNAMIC_COPY); + } +} + + + void mg_gl_render_batch(mg_gl_canvas_backend* backend, mg_wgl_surface* surface, mg_image_data* image, @@ -1010,7 +1039,6 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, vec2 viewportSize, f32 scale) { - //NOTE: make the buffers visible to gl GLuint pathBuffer = backend->pathBuffer[backend->bufferIndex].buffer; GLuint elementBuffer = backend->elementBuffer[backend->bufferIndex].buffer; @@ -1024,6 +1052,16 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, return; } + //NOTE: update intermediate buffers size if needed + //TODO: compute correct sizes + + mg_gl_grow_buffer_if_needed(backend->pathQueueBuffer, pathCount * sizeof(mg_gl_path_queue), "path queues"); + mg_gl_grow_buffer_if_needed(backend->tileQueueBuffer, backend->maxTileQueueCount * sizeof(mg_gl_tile_queue), "tile queues"); + mg_gl_grow_buffer_if_needed(backend->segmentBuffer, backend->maxSegmentCount * sizeof(mg_gl_segment), "segments"); + mg_gl_grow_buffer_if_needed(backend->screenTilesBuffer, nTilesX * nTilesY * sizeof(mg_gl_screen_tile), "screen tiles"); + mg_gl_grow_buffer_if_needed(backend->tileOpBuffer, backend->maxSegmentCount * 30 * sizeof(mg_gl_tile_op), "tile ops"); + + //NOTE: make the buffers visible to gl glBindBuffer(GL_SHADER_STORAGE_BUFFER, pathBuffer); glFlushMappedBufferRange(GL_SHADER_STORAGE_BUFFER, pathBufferOffset, pathCount*sizeof(mg_gl_path)); @@ -1251,6 +1289,9 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, backend->pathBatchStart = backend->pathCount; backend->eltBatchStart = backend->eltCount; + + backend->maxSegmentCount = 0; + backend->maxTileQueueCount = 0; } void mg_gl_canvas_resize(mg_gl_canvas_backend* backend, vec2 size) @@ -1324,6 +1365,8 @@ void mg_gl_canvas_render(mg_canvas_backend* interface, backend->pathBatchStart = 0; backend->eltCount = 0; backend->eltBatchStart = 0; + backend->maxSegmentCount = 0; + backend->maxTileQueueCount = 0; //NOTE: encode and render batches vec2 currentPos = {0}; @@ -1574,9 +1617,9 @@ int mg_gl_canvas_compile_render_program_named(const char* progName, const u32 MG_GL_PATH_BUFFER_SIZE = (4<<10)*sizeof(mg_gl_path), MG_GL_ELEMENT_BUFFER_SIZE = (4<<12)*sizeof(mg_gl_path_elt), - MG_GL_SEGMENT_BUFFER_SIZE = (4<<20)*sizeof(mg_gl_segment), - MG_GL_PATH_QUEUE_BUFFER_SIZE = (4<<20)*sizeof(mg_gl_path_queue), - MG_GL_TILE_QUEUE_BUFFER_SIZE = (4<<20)*sizeof(mg_gl_tile_queue), + MG_GL_SEGMENT_BUFFER_SIZE = (4<<10)*sizeof(mg_gl_segment), + MG_GL_PATH_QUEUE_BUFFER_SIZE = (4<<10)*sizeof(mg_gl_path_queue), + MG_GL_TILE_QUEUE_BUFFER_SIZE = (4<<10)*sizeof(mg_gl_tile_queue), MG_GL_TILE_OP_BUFFER_SIZE = (4<<20)*sizeof(mg_gl_tile_op); mg_canvas_backend* gl_canvas_backend_create(mg_wgl_surface* surface) From 3c103eeb659f83cf60cabb6a50163f09da4c6044 Mon Sep 17 00:00:00 2001 From: martinfouilleul Date: Thu, 27 Jul 2023 14:38:12 +0200 Subject: [PATCH 4/5] [canvas, gl] compute intermediate glsl structs' sizes according to std430 --- src/gl_canvas.c | 113 ++++++++++++++++++++++++++---------------------- 1 file changed, 61 insertions(+), 52 deletions(-) diff --git a/src/gl_canvas.c b/src/gl_canvas.c index 80a855b..8b5039f 100644 --- a/src/gl_canvas.c +++ b/src/gl_canvas.c @@ -64,53 +64,62 @@ typedef struct mg_gl_dispatch_indirect_command //////////////////////////////////////////////////////////// //NOTE: these are just here for the sizes... -typedef struct mg_gl_segment +#define MG_GL_LAYOUT_FIRST(name, type) \ + MG_GL_##name##_OFFSET = 0, \ + MG_GL_##name##_SIZE = MG_GL_##type##_SIZE, + +#define MG_GL_LAYOUT_NEXT(name, type, prev) \ + MG_GL_##name##_OFFSET = AlignUpOnPow2(MG_GL_##prev##_OFFSET + MG_GL_##prev##_SIZE, MG_GL_##type##_ALIGN), \ + MG_GL_##name##_SIZE = MG_GL_##type##_SIZE, + +#define MG_GL_LAYOUT_SIZE(name, last, maxAlignType) \ + MG_GL_##name##_ALIGN = AlignUpOnPow2(MG_GL_##maxAlignType##_ALIGN, MG_GL_VEC4_ALIGN), \ + MG_GL_##name##_SIZE = AlignUpOnPow2(MG_GL_##last##_OFFSET + MG_GL_##last##_SIZE, MG_GL_##name##_ALIGN), + +enum { - int kind; - int pathIndex; - int config; - int windingIncrement; - vec4 box; - float hullMatrix[9]; - float implicitMatrix[9]; - float sign; - vec2 hullVertex; - int debugID; + MG_GL_I32_SIZE = sizeof(i32), + MG_GL_I32_ALIGN = sizeof(i32), + MG_GL_F32_SIZE = sizeof(f32), + MG_GL_F32_ALIGN = sizeof(f32), + MG_GL_VEC2_SIZE = 2*sizeof(f32), + MG_GL_VEC2_ALIGN = 2*sizeof(f32), + MG_GL_VEC3_SIZE = 4*sizeof(f32), + MG_GL_VEC3_ALIGN = 4*sizeof(f32), + MG_GL_VEC4_SIZE = 4*sizeof(f32), + MG_GL_VEC4_ALIGN = 4*sizeof(f32), + MG_GL_MAT3_SIZE = 3*3*MG_GL_VEC3_SIZE, + MG_GL_MAT3_ALIGN = MG_GL_VEC3_ALIGN, -} mg_gl_segment; + MG_GL_LAYOUT_FIRST(SEGMENT_KIND, I32) + MG_GL_LAYOUT_NEXT(SEGMENT_PATH_INDEX, I32, SEGMENT_KIND) + MG_GL_LAYOUT_NEXT(SEGMENT_CONFIG, I32, SEGMENT_PATH_INDEX) + MG_GL_LAYOUT_NEXT(SEGMENT_WINDING, I32, SEGMENT_CONFIG) + MG_GL_LAYOUT_NEXT(SEGMENT_BOX, VEC4, SEGMENT_WINDING) + MG_GL_LAYOUT_NEXT(SEGMENT_IMPLICIT_MATRIX, MAT3, SEGMENT_BOX) + MG_GL_LAYOUT_NEXT(SEGMENT_HULL_VERTEX, VEC2, SEGMENT_IMPLICIT_MATRIX) + MG_GL_LAYOUT_NEXT(SEGMENT_SIGN, F32, SEGMENT_HULL_VERTEX) + MG_GL_LAYOUT_SIZE(SEGMENT, SEGMENT_SIGN, MAT3) -typedef struct mg_gl_path_queue -{ - vec4 area; - int tileQueues; - u8 pad[12]; -} mg_gl_path_queue; + MG_GL_LAYOUT_FIRST(PATH_QUEUE_AREA, VEC4) + MG_GL_LAYOUT_NEXT(PATH_QUEUE_TILE_QUEUES, I32, PATH_QUEUE_AREA) + MG_GL_LAYOUT_SIZE(PATH_QUEUE, PATH_QUEUE_TILE_QUEUES, VEC4) -typedef struct mg_gl_tile_op -{ - int kind; - int index; - int next; - bool crossRight; - int windingOffset; + MG_GL_LAYOUT_FIRST(TILE_OP_KIND, I32) + MG_GL_LAYOUT_NEXT(TILE_OP_NEXT, I32, TILE_OP_KIND) + MG_GL_LAYOUT_NEXT(TILE_OP_INDEX, I32, TILE_OP_NEXT) + MG_GL_LAYOUT_NEXT(TILE_OP_WINDING, I32, TILE_OP_INDEX) + MG_GL_LAYOUT_SIZE(TILE_OP, TILE_OP_WINDING, I32) -} mg_gl_tile_op; + MG_GL_LAYOUT_FIRST(TILE_QUEUE_WINDING, I32) + MG_GL_LAYOUT_NEXT(TILE_QUEUE_FIRST, I32, TILE_QUEUE_WINDING) + MG_GL_LAYOUT_NEXT(TILE_QUEUE_LAST, I32, TILE_QUEUE_FIRST) + MG_GL_LAYOUT_SIZE(TILE_QUEUE, TILE_QUEUE_LAST, I32) -typedef struct mg_gl_tile_queue -{ - int windingOffset; - int first; - int last; - -} mg_gl_tile_queue; - -typedef struct mg_gl_screen_tile -{ - u32 tileCoord[2]; - i32 first; - u8 padding[4]; -} mg_gl_screen_tile; -//////////////////////////////////////////////////////////// + MG_GL_LAYOUT_FIRST(SCREEN_TILE_COORD, VEC2) + MG_GL_LAYOUT_NEXT(SCREEN_TILE_FIRST, I32, SCREEN_TILE_COORD) + MG_GL_LAYOUT_SIZE(SCREEN_TILE, SCREEN_TILE_FIRST, VEC2) +}; enum { MG_GL_INPUT_BUFFERS_COUNT = 3, @@ -1055,11 +1064,11 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, //NOTE: update intermediate buffers size if needed //TODO: compute correct sizes - mg_gl_grow_buffer_if_needed(backend->pathQueueBuffer, pathCount * sizeof(mg_gl_path_queue), "path queues"); - mg_gl_grow_buffer_if_needed(backend->tileQueueBuffer, backend->maxTileQueueCount * sizeof(mg_gl_tile_queue), "tile queues"); - mg_gl_grow_buffer_if_needed(backend->segmentBuffer, backend->maxSegmentCount * sizeof(mg_gl_segment), "segments"); - mg_gl_grow_buffer_if_needed(backend->screenTilesBuffer, nTilesX * nTilesY * sizeof(mg_gl_screen_tile), "screen tiles"); - mg_gl_grow_buffer_if_needed(backend->tileOpBuffer, backend->maxSegmentCount * 30 * sizeof(mg_gl_tile_op), "tile ops"); + mg_gl_grow_buffer_if_needed(backend->pathQueueBuffer, pathCount * MG_GL_PATH_QUEUE_SIZE, "path queues"); + mg_gl_grow_buffer_if_needed(backend->tileQueueBuffer, backend->maxTileQueueCount * MG_GL_TILE_QUEUE_SIZE, "tile queues"); + mg_gl_grow_buffer_if_needed(backend->segmentBuffer, backend->maxSegmentCount * MG_GL_SEGMENT_SIZE, "segments"); + mg_gl_grow_buffer_if_needed(backend->screenTilesBuffer, nTilesX * nTilesY * MG_GL_SCREEN_TILE_SIZE, "screen tiles"); + mg_gl_grow_buffer_if_needed(backend->tileOpBuffer, backend->maxSegmentCount * 30 * MG_GL_TILE_OP_SIZE, "tile ops"); //NOTE: make the buffers visible to gl glBindBuffer(GL_SHADER_STORAGE_BUFFER, pathBuffer); @@ -1301,7 +1310,7 @@ void mg_gl_canvas_resize(mg_gl_canvas_backend* backend, vec2 size) int nTilesY = (int)(size.y + tileSize - 1)/tileSize; glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->screenTilesBuffer); - glBufferData(GL_SHADER_STORAGE_BUFFER, nTilesX*nTilesY*sizeof(mg_gl_screen_tile), 0, GL_DYNAMIC_COPY); + glBufferData(GL_SHADER_STORAGE_BUFFER, nTilesX*nTilesY*MG_GL_SCREEN_TILE_SIZE, 0, GL_DYNAMIC_COPY); if(backend->outTexture) { @@ -1617,10 +1626,10 @@ int mg_gl_canvas_compile_render_program_named(const char* progName, const u32 MG_GL_PATH_BUFFER_SIZE = (4<<10)*sizeof(mg_gl_path), MG_GL_ELEMENT_BUFFER_SIZE = (4<<12)*sizeof(mg_gl_path_elt), - MG_GL_SEGMENT_BUFFER_SIZE = (4<<10)*sizeof(mg_gl_segment), - MG_GL_PATH_QUEUE_BUFFER_SIZE = (4<<10)*sizeof(mg_gl_path_queue), - MG_GL_TILE_QUEUE_BUFFER_SIZE = (4<<10)*sizeof(mg_gl_tile_queue), - MG_GL_TILE_OP_BUFFER_SIZE = (4<<20)*sizeof(mg_gl_tile_op); + MG_GL_SEGMENT_BUFFER_SIZE = (4<<10)*MG_GL_SEGMENT_SIZE, + MG_GL_PATH_QUEUE_BUFFER_SIZE = (4<<10)*MG_GL_PATH_QUEUE_SIZE, + MG_GL_TILE_QUEUE_BUFFER_SIZE = (4<<10)*MG_GL_TILE_QUEUE_SIZE, + MG_GL_TILE_OP_BUFFER_SIZE = (4<<20)*MG_GL_TILE_OP_SIZE; mg_canvas_backend* gl_canvas_backend_create(mg_wgl_surface* surface) { @@ -1733,7 +1742,7 @@ mg_canvas_backend* gl_canvas_backend_create(mg_wgl_surface* surface) glGenBuffers(1, &backend->screenTilesBuffer); glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->screenTilesBuffer); - glBufferData(GL_SHADER_STORAGE_BUFFER, nTilesX*nTilesY*sizeof(mg_gl_screen_tile), 0, GL_DYNAMIC_COPY); + glBufferData(GL_SHADER_STORAGE_BUFFER, nTilesX*nTilesY*MG_GL_SCREEN_TILE_SIZE, 0, GL_DYNAMIC_COPY); glGenBuffers(1, &backend->rasterDispatchBuffer); glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->rasterDispatchBuffer); From b300cc4d7d178ecfd7504e7998627bcfd55e9527 Mon Sep 17 00:00:00 2001 From: martinfouilleul Date: Thu, 27 Jul 2023 15:24:20 +0200 Subject: [PATCH 5/5] [gl canvas] balance dispatch of raster shader along 2 dimensions to avoid hitting the max workgroup count per dimension --- build.bat | 2 +- src/gl_canvas.c | 26 +++++++++++++++++++++-- src/glsl_shaders/balance_workgroups.glsl | 27 ++++++++++++++++++++++++ src/glsl_shaders/merge.glsl | 11 ++++------ src/glsl_shaders/raster.glsl | 15 ++++++++++++- 5 files changed, 70 insertions(+), 11 deletions(-) create mode 100644 src/glsl_shaders/balance_workgroups.glsl diff --git a/build.bat b/build.bat index 798dbe1..35fb163 100644 --- a/build.bat +++ b/build.bat @@ -4,7 +4,7 @@ setlocal EnableDelayedExpansion if not exist bin mkdir bin -set glsl_shaders=src\glsl_shaders\common.glsl src\glsl_shaders\blit_vertex.glsl src\glsl_shaders\blit_fragment.glsl src\glsl_shaders\path_setup.glsl src\glsl_shaders\segment_setup.glsl src\glsl_shaders\backprop.glsl src\glsl_shaders\merge.glsl src\glsl_shaders\raster.glsl +set glsl_shaders=src\glsl_shaders\common.glsl src\glsl_shaders\blit_vertex.glsl src\glsl_shaders\blit_fragment.glsl src\glsl_shaders\path_setup.glsl src\glsl_shaders\segment_setup.glsl src\glsl_shaders\backprop.glsl src\glsl_shaders\merge.glsl src\glsl_shaders\raster.glsl src\glsl_shaders\balance_workgroups.glsl call python3 scripts\embed_text.py %glsl_shaders% --prefix=glsl_ --output src\glsl_shaders.h diff --git a/src/gl_canvas.c b/src/gl_canvas.c index 8b5039f..bd82050 100644 --- a/src/gl_canvas.c +++ b/src/gl_canvas.c @@ -149,6 +149,7 @@ typedef struct mg_gl_canvas_backend GLuint segmentSetup; GLuint backprop; GLuint merge; + GLuint balanceWorkgroups; GLuint raster; GLuint blit; @@ -167,6 +168,7 @@ typedef struct mg_gl_canvas_backend GLuint tileOpBuffer; GLuint tileOpCountBuffer; GLuint screenTilesBuffer; + GLuint screenTilesCountBuffer; GLuint rasterDispatchBuffer; GLuint dummyVertexBuffer; @@ -1095,6 +1097,9 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->rasterDispatchBuffer); glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(mg_gl_dispatch_indirect_command), &zero, GL_DYNAMIC_COPY); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->screenTilesCountBuffer); + glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(int), &zero, GL_DYNAMIC_COPY); + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); int err = glGetError(); @@ -1210,7 +1215,7 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->tileOpCountBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, backend->tileOpBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, backend->screenTilesBuffer); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, backend->rasterDispatchBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, backend->screenTilesCountBuffer); glUniform1i(0, tileSize); glUniform1f(1, scale); @@ -1239,6 +1244,17 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, log_error("gl error %i\n", err); } } + + //NOTE: balance work groups + glUseProgram(backend->balanceWorkgroups); + + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, backend->screenTilesCountBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->rasterDispatchBuffer); + glUniform1ui(0, maxWorkGroupCount); + + glDispatchCompute(1, 1, 1); + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); + //NOTE: raster pass glUseProgram(backend->raster); @@ -1246,6 +1262,7 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->segmentBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, backend->tileOpBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->screenTilesBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, backend->screenTilesCountBuffer); glUniform1f(0, scale); glUniform1i(1, backend->msaaCount); @@ -1265,6 +1282,7 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, } glUniform1i(3, backend->pathBatchStart); + glUniform1ui(4, maxWorkGroupCount); glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, backend->rasterDispatchBuffer); glDispatchComputeIndirect(0); @@ -1659,6 +1677,7 @@ mg_canvas_backend* gl_canvas_backend_create(mg_wgl_surface* surface) err |= mg_gl_canvas_compile_compute_program(glsl_segment_setup, &backend->segmentSetup); err |= mg_gl_canvas_compile_compute_program(glsl_backprop, &backend->backprop); err |= mg_gl_canvas_compile_compute_program(glsl_merge, &backend->merge); + err |= mg_gl_canvas_compile_compute_program(glsl_balance_workgroups, &backend->balanceWorkgroups); err |= mg_gl_canvas_compile_compute_program(glsl_raster, &backend->raster); err |= mg_gl_canvas_compile_render_program("blit", glsl_blit_vertex, glsl_blit_fragment, &backend->blit); @@ -1744,11 +1763,14 @@ mg_canvas_backend* gl_canvas_backend_create(mg_wgl_surface* surface) glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->screenTilesBuffer); glBufferData(GL_SHADER_STORAGE_BUFFER, nTilesX*nTilesY*MG_GL_SCREEN_TILE_SIZE, 0, GL_DYNAMIC_COPY); + glGenBuffers(1, &backend->screenTilesCountBuffer); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->screenTilesCountBuffer); + glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(int), 0, GL_DYNAMIC_COPY); + glGenBuffers(1, &backend->rasterDispatchBuffer); glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->rasterDispatchBuffer); glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(mg_gl_dispatch_indirect_command), 0, GL_DYNAMIC_COPY); - if(err) { mg_gl_canvas_destroy((mg_canvas_backend*)backend); diff --git a/src/glsl_shaders/balance_workgroups.glsl b/src/glsl_shaders/balance_workgroups.glsl new file mode 100644 index 0000000..668c634 --- /dev/null +++ b/src/glsl_shaders/balance_workgroups.glsl @@ -0,0 +1,27 @@ + +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +precision mediump float; +layout(std430) buffer; + +layout(binding = 0) coherent restrict readonly buffer screenTilesCountBufferSSBO +{ + int elements[]; +} screenTilesCountBuffer; + +layout(binding = 1) coherent restrict writeonly buffer dispatchBufferSSBO +{ + mg_gl_dispatch_indirect_command elements[]; +} dispatchBuffer; + + +layout(location = 0) uniform uint maxWorkGroupCount; + +void main() +{ + uint totalWorkGroupCount = screenTilesCountBuffer.elements[0]; + + dispatchBuffer.elements[0].num_groups_x = totalWorkGroupCount > maxWorkGroupCount ? maxWorkGroupCount : totalWorkGroupCount; + dispatchBuffer.elements[0].num_groups_y = (totalWorkGroupCount + maxWorkGroupCount - 1) / maxWorkGroupCount; + dispatchBuffer.elements[0].num_groups_z = 1; +} diff --git a/src/glsl_shaders/merge.glsl b/src/glsl_shaders/merge.glsl index a119305..3a8ffa9 100644 --- a/src/glsl_shaders/merge.glsl +++ b/src/glsl_shaders/merge.glsl @@ -34,10 +34,10 @@ layout(binding = 5) restrict writeonly buffer screenTilesBufferSSBO mg_gl_screen_tile elements[]; } screenTilesBuffer; -layout(binding = 6) coherent restrict buffer dispatchBufferSSBO +layout(binding = 6) coherent restrict buffer screenTilesCountBufferSSBO { - mg_gl_dispatch_indirect_command elements[]; -} dispatchBuffer; + int elements[]; +} screenTilesCountBuffer; layout(location = 0) uniform int tileSize; @@ -53,9 +53,6 @@ void main() int lastOpIndex = -1; - dispatchBuffer.elements[0].num_groups_y = 1; - dispatchBuffer.elements[0].num_groups_z = 1; - for(int pathIndex = 0; pathIndex < pathCount; pathIndex++) { mg_gl_path_queue pathQueue = pathQueueBuffer.elements[pathIndex]; @@ -75,7 +72,7 @@ void main() { if(tileIndex < 0) { - tileIndex = int(atomicAdd(dispatchBuffer.elements[0].num_groups_x, 1)); + tileIndex = int(atomicAdd(screenTilesCountBuffer.elements[0], 1)); screenTilesBuffer.elements[tileIndex].tileCoord = uvec2(tileCoord); screenTilesBuffer.elements[tileIndex].first = -1; } diff --git a/src/glsl_shaders/raster.glsl b/src/glsl_shaders/raster.glsl index 526102e..e7f6188 100644 --- a/src/glsl_shaders/raster.glsl +++ b/src/glsl_shaders/raster.glsl @@ -24,17 +24,30 @@ layout(binding = 3) restrict readonly buffer screenTilesBufferSSBO mg_gl_screen_tile elements[]; } screenTilesBuffer; +layout(binding = 4) restrict readonly buffer screenTilesCountBufferSSBO +{ + int elements[]; +} screenTilesCountBuffer; + + layout(location = 0) uniform float scale; layout(location = 1) uniform int msaaSampleCount; layout(location = 2) uniform uint useTexture; layout(location = 3) uniform int pathBufferStart; +layout(location = 4) uniform uint maxWorkGroupCount; layout(rgba8, binding = 0) uniform restrict writeonly image2D outTexture; layout(binding = 1) uniform sampler2D srcTexture; void main() { - uint tileIndex = gl_WorkGroupID.x; + uint tileIndex = gl_WorkGroupID.y * maxWorkGroupCount + gl_WorkGroupID.x; + + if(tileIndex >= screenTilesCountBuffer.elements[0]) + { + return; + } + uvec2 tileCoord = screenTilesBuffer.elements[tileIndex].tileCoord; ivec2 pixelCoord = ivec2(tileCoord * gl_WorkGroupSize.x + gl_LocalInvocationID.xy);