diff --git a/build.bat b/build.bat index 798dbe1..35fb163 100644 --- a/build.bat +++ b/build.bat @@ -4,7 +4,7 @@ setlocal EnableDelayedExpansion if not exist bin mkdir bin -set glsl_shaders=src\glsl_shaders\common.glsl src\glsl_shaders\blit_vertex.glsl src\glsl_shaders\blit_fragment.glsl src\glsl_shaders\path_setup.glsl src\glsl_shaders\segment_setup.glsl src\glsl_shaders\backprop.glsl src\glsl_shaders\merge.glsl src\glsl_shaders\raster.glsl +set glsl_shaders=src\glsl_shaders\common.glsl src\glsl_shaders\blit_vertex.glsl src\glsl_shaders\blit_fragment.glsl src\glsl_shaders\path_setup.glsl src\glsl_shaders\segment_setup.glsl src\glsl_shaders\backprop.glsl src\glsl_shaders\merge.glsl src\glsl_shaders\raster.glsl src\glsl_shaders\balance_workgroups.glsl call python3 scripts\embed_text.py %glsl_shaders% --prefix=glsl_ --output src\glsl_shaders.h diff --git a/examples/perf_text/main.c b/examples/perf_text/main.c index 52328eb..bcb9952 100644 --- a/examples/perf_text/main.c +++ b/examples/perf_text/main.c @@ -201,7 +201,7 @@ int main() f32 trackX = mousePos.x/zoom - startX; f32 trackY = mousePos.y/zoom - startY; - zoom *= 1 + event->move.deltaY * 0.01; + zoom *= 1 + event->mouse.deltaY * 0.01; zoom = Clamp(zoom, 0.2, 10); startX = mousePos.x/zoom - trackX; diff --git a/examples/tiger/main.c b/examples/tiger/main.c index ea51073..761daa2 100644 --- a/examples/tiger/main.c +++ b/examples/tiger/main.c @@ -62,6 +62,11 @@ int main() //NOTE: create surface mg_surface surface = mg_surface_create_for_window(window, MG_CANVAS); + if(mg_surface_is_nil(surface)) + { + log_error("Couln't create surface\n"); + return(-1); + } mg_surface_swap_interval(surface, 0); //TODO: create canvas @@ -108,12 +113,6 @@ int main() mp_request_quit(); } break; - case MP_EVENT_WINDOW_RESIZE: - { - mp_rect frame = {0, 0, event->frame.rect.w, event->frame.rect.h}; - mg_surface_set_frame(surface, frame); - } break; - case MP_EVENT_MOUSE_BUTTON: { if(event->key.code == MP_MOUSE_LEFT) @@ -138,7 +137,7 @@ int main() f32 pinX = (mousePos.x - startX)/zoom; f32 pinY = (mousePos.y - startY)/zoom; - zoom *= 1 + event->move.deltaY * 0.01; + zoom *= 1 + event->mouse.deltaY * 0.01; zoom = Clamp(zoom, 0.5, 5); startX = mousePos.x - pinX*zoom; diff --git a/src/gl_canvas.c b/src/gl_canvas.c index 30c6b35..bd82050 100644 --- a/src/gl_canvas.c +++ b/src/gl_canvas.c @@ -64,53 +64,62 @@ typedef struct mg_gl_dispatch_indirect_command //////////////////////////////////////////////////////////// //NOTE: these are just here for the sizes... -typedef struct mg_gl_segment +#define MG_GL_LAYOUT_FIRST(name, type) \ + MG_GL_##name##_OFFSET = 0, \ + MG_GL_##name##_SIZE = MG_GL_##type##_SIZE, + +#define MG_GL_LAYOUT_NEXT(name, type, prev) \ + MG_GL_##name##_OFFSET = AlignUpOnPow2(MG_GL_##prev##_OFFSET + MG_GL_##prev##_SIZE, MG_GL_##type##_ALIGN), \ + MG_GL_##name##_SIZE = MG_GL_##type##_SIZE, + +#define MG_GL_LAYOUT_SIZE(name, last, maxAlignType) \ + MG_GL_##name##_ALIGN = AlignUpOnPow2(MG_GL_##maxAlignType##_ALIGN, MG_GL_VEC4_ALIGN), \ + MG_GL_##name##_SIZE = AlignUpOnPow2(MG_GL_##last##_OFFSET + MG_GL_##last##_SIZE, MG_GL_##name##_ALIGN), + +enum { - int kind; - int pathIndex; - int config; - int windingIncrement; - vec4 box; - float hullMatrix[9]; - float implicitMatrix[9]; - float sign; - vec2 hullVertex; - int debugID; + MG_GL_I32_SIZE = sizeof(i32), + MG_GL_I32_ALIGN = sizeof(i32), + MG_GL_F32_SIZE = sizeof(f32), + MG_GL_F32_ALIGN = sizeof(f32), + MG_GL_VEC2_SIZE = 2*sizeof(f32), + MG_GL_VEC2_ALIGN = 2*sizeof(f32), + MG_GL_VEC3_SIZE = 4*sizeof(f32), + MG_GL_VEC3_ALIGN = 4*sizeof(f32), + MG_GL_VEC4_SIZE = 4*sizeof(f32), + MG_GL_VEC4_ALIGN = 4*sizeof(f32), + MG_GL_MAT3_SIZE = 3*3*MG_GL_VEC3_SIZE, + MG_GL_MAT3_ALIGN = MG_GL_VEC3_ALIGN, -} mg_gl_segment; + MG_GL_LAYOUT_FIRST(SEGMENT_KIND, I32) + MG_GL_LAYOUT_NEXT(SEGMENT_PATH_INDEX, I32, SEGMENT_KIND) + MG_GL_LAYOUT_NEXT(SEGMENT_CONFIG, I32, SEGMENT_PATH_INDEX) + MG_GL_LAYOUT_NEXT(SEGMENT_WINDING, I32, SEGMENT_CONFIG) + MG_GL_LAYOUT_NEXT(SEGMENT_BOX, VEC4, SEGMENT_WINDING) + MG_GL_LAYOUT_NEXT(SEGMENT_IMPLICIT_MATRIX, MAT3, SEGMENT_BOX) + MG_GL_LAYOUT_NEXT(SEGMENT_HULL_VERTEX, VEC2, SEGMENT_IMPLICIT_MATRIX) + MG_GL_LAYOUT_NEXT(SEGMENT_SIGN, F32, SEGMENT_HULL_VERTEX) + MG_GL_LAYOUT_SIZE(SEGMENT, SEGMENT_SIGN, MAT3) -typedef struct mg_gl_path_queue -{ - vec4 area; - int tileQueues; - u8 pad[12]; -} mg_gl_path_queue; + MG_GL_LAYOUT_FIRST(PATH_QUEUE_AREA, VEC4) + MG_GL_LAYOUT_NEXT(PATH_QUEUE_TILE_QUEUES, I32, PATH_QUEUE_AREA) + MG_GL_LAYOUT_SIZE(PATH_QUEUE, PATH_QUEUE_TILE_QUEUES, VEC4) -typedef struct mg_gl_tile_op -{ - int kind; - int index; - int next; - bool crossRight; - int windingOffset; + MG_GL_LAYOUT_FIRST(TILE_OP_KIND, I32) + MG_GL_LAYOUT_NEXT(TILE_OP_NEXT, I32, TILE_OP_KIND) + MG_GL_LAYOUT_NEXT(TILE_OP_INDEX, I32, TILE_OP_NEXT) + MG_GL_LAYOUT_NEXT(TILE_OP_WINDING, I32, TILE_OP_INDEX) + MG_GL_LAYOUT_SIZE(TILE_OP, TILE_OP_WINDING, I32) -} mg_gl_tile_op; + MG_GL_LAYOUT_FIRST(TILE_QUEUE_WINDING, I32) + MG_GL_LAYOUT_NEXT(TILE_QUEUE_FIRST, I32, TILE_QUEUE_WINDING) + MG_GL_LAYOUT_NEXT(TILE_QUEUE_LAST, I32, TILE_QUEUE_FIRST) + MG_GL_LAYOUT_SIZE(TILE_QUEUE, TILE_QUEUE_LAST, I32) -typedef struct mg_gl_tile_queue -{ - int windingOffset; - int first; - int last; - -} mg_gl_tile_queue; - -typedef struct mg_gl_screen_tile -{ - u32 tileCoord[2]; - i32 first; - u8 padding[4]; -} mg_gl_screen_tile; -//////////////////////////////////////////////////////////// + MG_GL_LAYOUT_FIRST(SCREEN_TILE_COORD, VEC2) + MG_GL_LAYOUT_NEXT(SCREEN_TILE_FIRST, I32, SCREEN_TILE_COORD) + MG_GL_LAYOUT_SIZE(SCREEN_TILE, SCREEN_TILE_FIRST, VEC2) +}; enum { MG_GL_INPUT_BUFFERS_COUNT = 3, @@ -140,6 +149,7 @@ typedef struct mg_gl_canvas_backend GLuint segmentSetup; GLuint backprop; GLuint merge; + GLuint balanceWorkgroups; GLuint raster; GLuint blit; @@ -158,6 +168,7 @@ typedef struct mg_gl_canvas_backend GLuint tileOpBuffer; GLuint tileOpCountBuffer; GLuint screenTilesBuffer; + GLuint screenTilesCountBuffer; GLuint rasterDispatchBuffer; GLuint dummyVertexBuffer; @@ -172,6 +183,9 @@ typedef struct mg_gl_canvas_backend vec4 pathScreenExtents; vec4 pathUserExtents; + int maxTileQueueCount; + int maxSegmentCount; + } mg_gl_canvas_backend; static void mg_update_path_extents(vec4* extents, vec2 p) @@ -182,9 +196,47 @@ static void mg_update_path_extents(vec4* extents, vec2 p) extents->w = maximum(extents->w, p.y); } +void mg_gl_grow_input_buffer(mg_gl_mapped_buffer* buffer, int copyStart, int copySize, int newSize) +{ + mg_gl_mapped_buffer newBuffer = {0}; + newBuffer.size = newSize; + glGenBuffers(1, &newBuffer.buffer); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, newBuffer.buffer); + glBufferStorage(GL_SHADER_STORAGE_BUFFER, newBuffer.size, 0, GL_MAP_WRITE_BIT|GL_MAP_PERSISTENT_BIT); + newBuffer.contents = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, + 0, + newBuffer.size, + GL_MAP_WRITE_BIT + |GL_MAP_PERSISTENT_BIT + |GL_MAP_FLUSH_EXPLICIT_BIT); + + memcpy(newBuffer.contents + copyStart, buffer->contents + copyStart, copySize); + + glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer->buffer); + glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); + glDeleteBuffers(1, &buffer->buffer); + + *buffer = newBuffer; +} + void mg_gl_canvas_encode_element(mg_gl_canvas_backend* backend, mg_path_elt_type kind, vec2* p) { - mg_gl_path_elt* elementData = (mg_gl_path_elt*)backend->elementBuffer[backend->bufferIndex].contents; + int bufferIndex = backend->bufferIndex; + int bufferCap = backend->elementBuffer[bufferIndex].size / sizeof(mg_gl_path_elt); + if(backend->eltCount >= bufferCap) + { + int newBufferCap = (int)(bufferCap * 1.5); + int newBufferSize = newBufferCap * sizeof(mg_gl_path_elt); + + log_info("growing element buffer to %i elements\n", newBufferCap); + + mg_gl_grow_input_buffer(&backend->elementBuffer[bufferIndex], + backend->eltBatchStart * sizeof(mg_gl_path_elt), + backend->eltCount * sizeof(mg_gl_path_elt), + newBufferSize); + } + + mg_gl_path_elt* elementData = (mg_gl_path_elt*)backend->elementBuffer[bufferIndex].contents; mg_gl_path_elt* elt = &elementData[backend->eltCount]; backend->eltCount++; @@ -193,16 +245,19 @@ void mg_gl_canvas_encode_element(mg_gl_canvas_backend* backend, mg_path_elt_type switch(kind) { case MG_PATH_LINE: + backend->maxSegmentCount += 1; elt->kind = MG_GL_LINE; count = 2; break; case MG_PATH_QUADRATIC: + backend->maxSegmentCount += 3; elt->kind = MG_GL_QUADRATIC; count = 3; break; case MG_PATH_CUBIC: + backend->maxSegmentCount += 7; elt->kind = MG_GL_CUBIC; count = 4; break; @@ -224,6 +279,21 @@ void mg_gl_canvas_encode_element(mg_gl_canvas_backend* backend, mg_path_elt_type void mg_gl_canvas_encode_path(mg_gl_canvas_backend* backend, mg_primitive* primitive, f32 scale) { + int bufferIndex = backend->bufferIndex; + int bufferCap = backend->pathBuffer[bufferIndex].size / sizeof(mg_gl_path); + if(backend->pathCount >= bufferCap) + { + int newBufferCap = (int)(bufferCap * 1.5); + int newBufferSize = newBufferCap * sizeof(mg_gl_path); + + log_info("growing path buffer to %i elements\n", newBufferCap); + + mg_gl_grow_input_buffer(&backend->pathBuffer[bufferIndex], + backend->pathBatchStart * sizeof(mg_gl_path), + backend->eltCount * sizeof(mg_gl_path), + newBufferSize); + } + mg_gl_path* pathData = (mg_gl_path*)backend->pathBuffer[backend->bufferIndex].contents; mg_gl_path* path = &pathData[backend->pathCount]; backend->pathCount++; @@ -293,6 +363,10 @@ void mg_gl_canvas_encode_path(mg_gl_canvas_backend* backend, mg_primitive* primi path->uvTransform[10] = 1; path->uvTransform[11] = 0; } + + int nTilesX = ((path->box.z - path->box.x)*scale - 1) / MG_GL_TILE_SIZE + 1; + int nTilesY = ((path->box.w - path->box.y)*scale - 1) / MG_GL_TILE_SIZE + 1; + backend->maxTileQueueCount += (nTilesX * nTilesY); } bool mg_intersect_hull_legs(vec2 p0, vec2 p1, vec2 p2, vec2 p3, vec2* intersection) @@ -948,6 +1022,25 @@ void mg_gl_encode_stroke(mg_gl_canvas_backend* backend, } } +void mg_gl_grow_buffer_if_needed(GLuint buffer, i32 wantedSize, const char* name) +{ + i32 oldSize = 0; + glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer); + glGetBufferParameteriv(GL_SHADER_STORAGE_BUFFER, GL_BUFFER_SIZE, &oldSize); + + if(oldSize < wantedSize) + { + log_info("growing %s buffer\n", name); + + int newSize = wantedSize * 1.2; + + glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer); + glBufferData(GL_SHADER_STORAGE_BUFFER, newSize, 0, GL_DYNAMIC_COPY); + } +} + + + void mg_gl_render_batch(mg_gl_canvas_backend* backend, mg_wgl_surface* surface, mg_image_data* image, @@ -957,7 +1050,6 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, vec2 viewportSize, f32 scale) { - //NOTE: make the buffers visible to gl GLuint pathBuffer = backend->pathBuffer[backend->bufferIndex].buffer; GLuint elementBuffer = backend->elementBuffer[backend->bufferIndex].buffer; @@ -971,6 +1063,16 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, return; } + //NOTE: update intermediate buffers size if needed + //TODO: compute correct sizes + + mg_gl_grow_buffer_if_needed(backend->pathQueueBuffer, pathCount * MG_GL_PATH_QUEUE_SIZE, "path queues"); + mg_gl_grow_buffer_if_needed(backend->tileQueueBuffer, backend->maxTileQueueCount * MG_GL_TILE_QUEUE_SIZE, "tile queues"); + mg_gl_grow_buffer_if_needed(backend->segmentBuffer, backend->maxSegmentCount * MG_GL_SEGMENT_SIZE, "segments"); + mg_gl_grow_buffer_if_needed(backend->screenTilesBuffer, nTilesX * nTilesY * MG_GL_SCREEN_TILE_SIZE, "screen tiles"); + mg_gl_grow_buffer_if_needed(backend->tileOpBuffer, backend->maxSegmentCount * 30 * MG_GL_TILE_OP_SIZE, "tile ops"); + + //NOTE: make the buffers visible to gl glBindBuffer(GL_SHADER_STORAGE_BUFFER, pathBuffer); glFlushMappedBufferRange(GL_SHADER_STORAGE_BUFFER, pathBufferOffset, pathCount*sizeof(mg_gl_path)); @@ -995,6 +1097,9 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->rasterDispatchBuffer); glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(mg_gl_dispatch_indirect_command), &zero, GL_DYNAMIC_COPY); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->screenTilesCountBuffer); + glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(int), &zero, GL_DYNAMIC_COPY); + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); int err = glGetError(); @@ -1110,7 +1215,7 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->tileOpCountBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, backend->tileOpBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, backend->screenTilesBuffer); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, backend->rasterDispatchBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, backend->screenTilesCountBuffer); glUniform1i(0, tileSize); glUniform1f(1, scale); @@ -1139,6 +1244,17 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, log_error("gl error %i\n", err); } } + + //NOTE: balance work groups + glUseProgram(backend->balanceWorkgroups); + + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, backend->screenTilesCountBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->rasterDispatchBuffer); + glUniform1ui(0, maxWorkGroupCount); + + glDispatchCompute(1, 1, 1); + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); + //NOTE: raster pass glUseProgram(backend->raster); @@ -1146,6 +1262,7 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->segmentBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, backend->tileOpBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->screenTilesBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, backend->screenTilesCountBuffer); glUniform1f(0, scale); glUniform1i(1, backend->msaaCount); @@ -1165,6 +1282,7 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, } glUniform1i(3, backend->pathBatchStart); + glUniform1ui(4, maxWorkGroupCount); glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, backend->rasterDispatchBuffer); glDispatchComputeIndirect(0); @@ -1198,6 +1316,9 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend, backend->pathBatchStart = backend->pathCount; backend->eltBatchStart = backend->eltCount; + + backend->maxSegmentCount = 0; + backend->maxTileQueueCount = 0; } void mg_gl_canvas_resize(mg_gl_canvas_backend* backend, vec2 size) @@ -1207,7 +1328,7 @@ void mg_gl_canvas_resize(mg_gl_canvas_backend* backend, vec2 size) int nTilesY = (int)(size.y + tileSize - 1)/tileSize; glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->screenTilesBuffer); - glBufferData(GL_SHADER_STORAGE_BUFFER, nTilesX*nTilesY*sizeof(mg_gl_screen_tile), 0, GL_DYNAMIC_COPY); + glBufferData(GL_SHADER_STORAGE_BUFFER, nTilesX*nTilesY*MG_GL_SCREEN_TILE_SIZE, 0, GL_DYNAMIC_COPY); if(backend->outTexture) { @@ -1271,6 +1392,8 @@ void mg_gl_canvas_render(mg_canvas_backend* interface, backend->pathBatchStart = 0; backend->eltCount = 0; backend->eltBatchStart = 0; + backend->maxSegmentCount = 0; + backend->maxTileQueueCount = 0; //NOTE: encode and render batches vec2 currentPos = {0}; @@ -1519,12 +1642,12 @@ int mg_gl_canvas_compile_render_program_named(const char* progName, #define mg_gl_canvas_compile_render_program(progName, shaderSrc, vertexSrc, out) \ mg_gl_canvas_compile_render_program_named(progName, #shaderSrc, #vertexSrc, shaderSrc, vertexSrc, out) -const u32 MG_GL_PATH_BUFFER_SIZE = (4<<20)*sizeof(mg_gl_path), - MG_GL_ELEMENT_BUFFER_SIZE = (4<<20)*sizeof(mg_gl_path_elt), - MG_GL_SEGMENT_BUFFER_SIZE = (4<<20)*sizeof(mg_gl_segment), - MG_GL_PATH_QUEUE_BUFFER_SIZE = (4<<20)*sizeof(mg_gl_path_queue), - MG_GL_TILE_QUEUE_BUFFER_SIZE = (4<<20)*sizeof(mg_gl_tile_queue), - MG_GL_TILE_OP_BUFFER_SIZE = (4<<20)*sizeof(mg_gl_tile_op); +const u32 MG_GL_PATH_BUFFER_SIZE = (4<<10)*sizeof(mg_gl_path), + MG_GL_ELEMENT_BUFFER_SIZE = (4<<12)*sizeof(mg_gl_path_elt), + MG_GL_SEGMENT_BUFFER_SIZE = (4<<10)*MG_GL_SEGMENT_SIZE, + MG_GL_PATH_QUEUE_BUFFER_SIZE = (4<<10)*MG_GL_PATH_QUEUE_SIZE, + MG_GL_TILE_QUEUE_BUFFER_SIZE = (4<<10)*MG_GL_TILE_QUEUE_SIZE, + MG_GL_TILE_OP_BUFFER_SIZE = (4<<20)*MG_GL_TILE_OP_SIZE; mg_canvas_backend* gl_canvas_backend_create(mg_wgl_surface* surface) { @@ -1554,6 +1677,7 @@ mg_canvas_backend* gl_canvas_backend_create(mg_wgl_surface* surface) err |= mg_gl_canvas_compile_compute_program(glsl_segment_setup, &backend->segmentSetup); err |= mg_gl_canvas_compile_compute_program(glsl_backprop, &backend->backprop); err |= mg_gl_canvas_compile_compute_program(glsl_merge, &backend->merge); + err |= mg_gl_canvas_compile_compute_program(glsl_balance_workgroups, &backend->balanceWorkgroups); err |= mg_gl_canvas_compile_compute_program(glsl_raster, &backend->raster); err |= mg_gl_canvas_compile_render_program("blit", glsl_blit_vertex, glsl_blit_fragment, &backend->blit); @@ -1637,13 +1761,16 @@ mg_canvas_backend* gl_canvas_backend_create(mg_wgl_surface* surface) glGenBuffers(1, &backend->screenTilesBuffer); glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->screenTilesBuffer); - glBufferData(GL_SHADER_STORAGE_BUFFER, nTilesX*nTilesY*sizeof(mg_gl_screen_tile), 0, GL_DYNAMIC_COPY); + glBufferData(GL_SHADER_STORAGE_BUFFER, nTilesX*nTilesY*MG_GL_SCREEN_TILE_SIZE, 0, GL_DYNAMIC_COPY); + + glGenBuffers(1, &backend->screenTilesCountBuffer); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->screenTilesCountBuffer); + glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(int), 0, GL_DYNAMIC_COPY); glGenBuffers(1, &backend->rasterDispatchBuffer); glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->rasterDispatchBuffer); glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(mg_gl_dispatch_indirect_command), 0, GL_DYNAMIC_COPY); - if(err) { mg_gl_canvas_destroy((mg_canvas_backend*)backend); diff --git a/src/glsl_shaders/balance_workgroups.glsl b/src/glsl_shaders/balance_workgroups.glsl new file mode 100644 index 0000000..668c634 --- /dev/null +++ b/src/glsl_shaders/balance_workgroups.glsl @@ -0,0 +1,27 @@ + +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +precision mediump float; +layout(std430) buffer; + +layout(binding = 0) coherent restrict readonly buffer screenTilesCountBufferSSBO +{ + int elements[]; +} screenTilesCountBuffer; + +layout(binding = 1) coherent restrict writeonly buffer dispatchBufferSSBO +{ + mg_gl_dispatch_indirect_command elements[]; +} dispatchBuffer; + + +layout(location = 0) uniform uint maxWorkGroupCount; + +void main() +{ + uint totalWorkGroupCount = screenTilesCountBuffer.elements[0]; + + dispatchBuffer.elements[0].num_groups_x = totalWorkGroupCount > maxWorkGroupCount ? maxWorkGroupCount : totalWorkGroupCount; + dispatchBuffer.elements[0].num_groups_y = (totalWorkGroupCount + maxWorkGroupCount - 1) / maxWorkGroupCount; + dispatchBuffer.elements[0].num_groups_z = 1; +} diff --git a/src/glsl_shaders/merge.glsl b/src/glsl_shaders/merge.glsl index fb796a3..3a8ffa9 100644 --- a/src/glsl_shaders/merge.glsl +++ b/src/glsl_shaders/merge.glsl @@ -34,10 +34,10 @@ layout(binding = 5) restrict writeonly buffer screenTilesBufferSSBO mg_gl_screen_tile elements[]; } screenTilesBuffer; -layout(binding = 6) coherent restrict buffer dispatchBufferSSBO +layout(binding = 6) coherent restrict buffer screenTilesCountBufferSSBO { - mg_gl_dispatch_indirect_command elements[]; -} dispatchBuffer; + int elements[]; +} screenTilesCountBuffer; layout(location = 0) uniform int tileSize; @@ -53,9 +53,6 @@ void main() int lastOpIndex = -1; - dispatchBuffer.elements[0].num_groups_y = 1; - dispatchBuffer.elements[0].num_groups_z = 1; - for(int pathIndex = 0; pathIndex < pathCount; pathIndex++) { mg_gl_path_queue pathQueue = pathQueueBuffer.elements[pathIndex]; @@ -75,7 +72,7 @@ void main() { if(tileIndex < 0) { - tileIndex = int(atomicAdd(dispatchBuffer.elements[0].num_groups_x, 1)); + tileIndex = int(atomicAdd(screenTilesCountBuffer.elements[0], 1)); screenTilesBuffer.elements[tileIndex].tileCoord = uvec2(tileCoord); screenTilesBuffer.elements[tileIndex].first = -1; } @@ -106,6 +103,11 @@ void main() // Additionally if color is opaque and tile is fully inside clip, trim tile list. int pathOpIndex = atomicAdd(tileOpCountBuffer.elements[0], 1); + if(pathOpIndex >= tileOpBuffer.elements.length()) + { + return; + } + tileOpBuffer.elements[pathOpIndex].kind = MG_GL_OP_CLIP_FILL; tileOpBuffer.elements[pathOpIndex].next = -1; tileOpBuffer.elements[pathOpIndex].index = pathIndex; @@ -141,6 +143,10 @@ void main() { //NOTE: add path start op (with winding offset) int startOpIndex = atomicAdd(tileOpCountBuffer.elements[0], 1); + if(startOpIndex >= tileOpBuffer.elements.length()) + { + return; + } tileOpBuffer.elements[startOpIndex].kind = MG_GL_OP_START; tileOpBuffer.elements[startOpIndex].next = -1; @@ -163,6 +169,10 @@ void main() //NOTE: add path end op int endOpIndex = atomicAdd(tileOpCountBuffer.elements[0], 1); + if(endOpIndex >= tileOpBuffer.elements.length()) + { + return; + } tileOpBuffer.elements[endOpIndex].kind = MG_GL_OP_END; tileOpBuffer.elements[endOpIndex].next = -1; diff --git a/src/glsl_shaders/path_setup.glsl b/src/glsl_shaders/path_setup.glsl index fe711da..177247d 100644 --- a/src/glsl_shaders/path_setup.glsl +++ b/src/glsl_shaders/path_setup.glsl @@ -50,13 +50,21 @@ void main() int tileQueuesIndex = atomicAdd(tileQueueCountBuffer.elements[0], tileCount); - pathQueueBuffer.elements[pathQueueBufferStart + pathIndex].area = ivec4(firstTile.x, firstTile.y, nTilesX, nTilesY); - pathQueueBuffer.elements[pathQueueBufferStart + pathIndex].tileQueues = tileQueuesIndex; - - for(int i=0; i= tileQueueBuffer.elements.length()) { - tileQueueBuffer.elements[tileQueuesIndex + i].first = -1; - tileQueueBuffer.elements[tileQueuesIndex + i].last = -1; - tileQueueBuffer.elements[tileQueuesIndex + i].windingOffset = 0; + pathQueueBuffer.elements[pathIndex].area = ivec4(0); + pathQueueBuffer.elements[pathIndex].tileQueues = 0; + } + else + { + pathQueueBuffer.elements[pathQueueBufferStart + pathIndex].area = ivec4(firstTile.x, firstTile.y, nTilesX, nTilesY); + pathQueueBuffer.elements[pathQueueBufferStart + pathIndex].tileQueues = tileQueuesIndex; + + for(int i=0; i= screenTilesCountBuffer.elements[0]) + { + return; + } + uvec2 tileCoord = screenTilesBuffer.elements[tileIndex].tileCoord; ivec2 pixelCoord = ivec2(tileCoord * gl_WorkGroupSize.x + gl_LocalInvocationID.xy); diff --git a/src/glsl_shaders/segment_setup.glsl b/src/glsl_shaders/segment_setup.glsl index 8823a27..0f39d10 100644 --- a/src/glsl_shaders/segment_setup.glsl +++ b/src/glsl_shaders/segment_setup.glsl @@ -105,29 +105,33 @@ void bin_to_tiles(int segIndex) { int tileOpIndex = atomicAdd(tileOpCountBuffer.elements[0], 1); - tileOpBuffer.elements[tileOpIndex].kind = MG_GL_OP_SEGMENT; - tileOpBuffer.elements[tileOpIndex].index = segIndex; - tileOpBuffer.elements[tileOpIndex].windingOffsetOrCrossRight = 0; - tileOpBuffer.elements[tileOpIndex].next = -1; - - int tileQueueIndex = pathQueue.tileQueues + y*pathArea.z + x; - - tileOpBuffer.elements[tileOpIndex].next = atomicExchange(tileQueueBuffer.elements[tileQueueIndex].first, tileOpIndex); - if(tileOpBuffer.elements[tileOpIndex].next == -1) + if(tileOpIndex < tileOpBuffer.elements.length()) { - tileQueueBuffer.elements[tileQueueIndex].last = tileOpIndex; - } + tileOpBuffer.elements[tileOpIndex].kind = MG_GL_OP_SEGMENT; + tileOpBuffer.elements[tileOpIndex].index = segIndex; + tileOpBuffer.elements[tileOpIndex].windingOffsetOrCrossRight = 0; + tileOpBuffer.elements[tileOpIndex].next = -1; - //NOTE: if the segment crosses the tile's bottom boundary, update the tile's winding offset - if(crossB) - { - atomicAdd(tileQueueBuffer.elements[tileQueueIndex].windingOffset, seg.windingIncrement); - } + int tileQueueIndex = pathQueue.tileQueues + y*pathArea.z + x; - //NOTE: if the segment crosses the right boundary, mark it. - if(crossR) - { - tileOpBuffer.elements[tileOpIndex].windingOffsetOrCrossRight = 1; + tileOpBuffer.elements[tileOpIndex].next = atomicExchange(tileQueueBuffer.elements[tileQueueIndex].first, + tileOpIndex); + if(tileOpBuffer.elements[tileOpIndex].next == -1) + { + tileQueueBuffer.elements[tileQueueIndex].last = tileOpIndex; + } + + //NOTE: if the segment crosses the tile's bottom boundary, update the tile's winding offset + if(crossB) + { + atomicAdd(tileQueueBuffer.elements[tileQueueIndex].windingOffset, seg.windingIncrement); + } + + //NOTE: if the segment crosses the right boundary, mark it. + if(crossR) + { + tileOpBuffer.elements[tileOpIndex].windingOffsetOrCrossRight = 1; + } } } } @@ -138,88 +142,90 @@ int push_segment(in vec2 p[4], int kind, int pathIndex) { int segIndex = atomicAdd(segmentCountBuffer.elements[0], 1); - vec2 s, c, e; - - switch(kind) + if(segIndex < segmentBuffer.elements.length()) { - case MG_GL_LINE: - s = p[0]; - c = p[0]; - e = p[1]; - break; + vec2 s, c, e; - case MG_GL_QUADRATIC: - s = p[0]; - c = p[1]; - e = p[2]; - break; - - case MG_GL_CUBIC: + switch(kind) { - s = p[0]; - float sqrNorm0 = dot(p[1]-p[0], p[1]-p[0]); - float sqrNorm1 = dot(p[3]-p[2], p[3]-p[2]); - if(sqrNorm0 < sqrNorm1) + case MG_GL_LINE: + s = p[0]; + c = p[0]; + e = p[1]; + break; + + case MG_GL_QUADRATIC: + s = p[0]; + c = p[1]; + e = p[2]; + break; + + case MG_GL_CUBIC: { - c = p[2]; + s = p[0]; + float sqrNorm0 = dot(p[1]-p[0], p[1]-p[0]); + float sqrNorm1 = dot(p[3]-p[2], p[3]-p[2]); + if(sqrNorm0 < sqrNorm1) + { + c = p[2]; + } + else + { + c = p[1]; + } + e = p[3]; + } break; + } + + bool goingUp = e.y >= s.y; + bool goingRight = e.x >= s.x; + + vec4 box = vec4(min(s.x, e.x), + min(s.y, e.y), + max(s.x, e.x), + max(s.y, e.y)); + + segmentBuffer.elements[segIndex].kind = kind; + segmentBuffer.elements[segIndex].pathIndex = pathIndex; + segmentBuffer.elements[segIndex].windingIncrement = goingUp ? 1 : -1; + segmentBuffer.elements[segIndex].box = box; + + float dx = c.x - box.x; + float dy = c.y - box.y; + float alpha = (box.w - box.y)/(box.z - box.x); + float ofs = box.w - box.y; + + if(goingUp == goingRight) + { + if(kind == MG_GL_LINE) + { + segmentBuffer.elements[segIndex].config = MG_GL_BR; + } + else if(dy > alpha*dx) + { + segmentBuffer.elements[segIndex].config = MG_GL_TL; } else { - c = p[1]; + segmentBuffer.elements[segIndex].config = MG_GL_BR; } - e = p[3]; - } break; - } - - bool goingUp = e.y >= s.y; - bool goingRight = e.x >= s.x; - - vec4 box = vec4(min(s.x, e.x), - min(s.y, e.y), - max(s.x, e.x), - max(s.y, e.y)); - - segmentBuffer.elements[segIndex].kind = kind; - segmentBuffer.elements[segIndex].pathIndex = pathIndex; - segmentBuffer.elements[segIndex].windingIncrement = goingUp ? 1 : -1; - segmentBuffer.elements[segIndex].box = box; - - float dx = c.x - box.x; - float dy = c.y - box.y; - float alpha = (box.w - box.y)/(box.z - box.x); - float ofs = box.w - box.y; - - if(goingUp == goingRight) - { - if(kind == MG_GL_LINE) - { - segmentBuffer.elements[segIndex].config = MG_GL_BR; - } - else if(dy > alpha*dx) - { - segmentBuffer.elements[segIndex].config = MG_GL_TL; } else { - segmentBuffer.elements[segIndex].config = MG_GL_BR; + if(kind == MG_GL_LINE) + { + segmentBuffer.elements[segIndex].config = MG_GL_TR; + } + else if(dy < ofs - alpha*dx) + { + segmentBuffer.elements[segIndex].config = MG_GL_BL; + } + else + { + segmentBuffer.elements[segIndex].config = MG_GL_TR; + } } } - else - { - if(kind == MG_GL_LINE) - { - segmentBuffer.elements[segIndex].config = MG_GL_TR; - } - else if(dy < ofs - alpha*dx) - { - segmentBuffer.elements[segIndex].config = MG_GL_BL; - } - else - { - segmentBuffer.elements[segIndex].config = MG_GL_TR; - } - } - return(segIndex); } @@ -229,9 +235,11 @@ int push_segment(in vec2 p[4], int kind, int pathIndex) void line_setup(vec2 p[4], int pathIndex) { int segIndex = push_segment(p, MG_GL_LINE, pathIndex); - segmentBuffer.elements[segIndex].hullVertex = p[0]; - - bin_to_tiles(segIndex); + if(segIndex < segmentBuffer.elements.length()) + { + segmentBuffer.elements[segIndex].hullVertex = p[0]; + bin_to_tiles(segIndex); + } } vec2 quadratic_blossom(vec2 p[4], float u, float v) @@ -298,27 +306,30 @@ void quadratic_emit(vec2 p[4], int pathIndex) { int segIndex = push_segment(p, MG_GL_QUADRATIC, pathIndex); - //NOTE: compute implicit equation matrix - float det = p[0].x*(p[1].y-p[2].y) + p[1].x*(p[2].y-p[0].y) + p[2].x*(p[0].y - p[1].y); + if(segIndex < segmentBuffer.elements.length()) + { + //NOTE: compute implicit equation matrix + float det = p[0].x*(p[1].y-p[2].y) + p[1].x*(p[2].y-p[0].y) + p[2].x*(p[0].y - p[1].y); - float a = p[0].y - p[1].y + 0.5*(p[2].y - p[0].y); - float b = p[1].x - p[0].x + 0.5*(p[0].x - p[2].x); - float c = p[0].x*p[1].y - p[1].x*p[0].y + 0.5*(p[2].x*p[0].y - p[0].x*p[2].y); - float d = p[0].y - p[1].y; - float e = p[1].x - p[0].x; - float f = p[0].x*p[1].y - p[1].x*p[0].y; + float a = p[0].y - p[1].y + 0.5*(p[2].y - p[0].y); + float b = p[1].x - p[0].x + 0.5*(p[0].x - p[2].x); + float c = p[0].x*p[1].y - p[1].x*p[0].y + 0.5*(p[2].x*p[0].y - p[0].x*p[2].y); + float d = p[0].y - p[1].y; + float e = p[1].x - p[0].x; + float f = p[0].x*p[1].y - p[1].x*p[0].y; - float flip = ( segmentBuffer.elements[segIndex].config == MG_GL_TL - || segmentBuffer.elements[segIndex].config == MG_GL_BL)? -1 : 1; + float flip = ( segmentBuffer.elements[segIndex].config == MG_GL_TL + || segmentBuffer.elements[segIndex].config == MG_GL_BL)? -1 : 1; - float g = flip*(p[2].x*(p[0].y - p[1].y) + p[0].x*(p[1].y - p[2].y) + p[1].x*(p[2].y - p[0].y)); + float g = flip*(p[2].x*(p[0].y - p[1].y) + p[0].x*(p[1].y - p[2].y) + p[1].x*(p[2].y - p[0].y)); - segmentBuffer.elements[segIndex].implicitMatrix = (1/det)*mat3(a, d, 0., - b, e, 0., - c, f, g); - segmentBuffer.elements[segIndex].hullVertex = p[1]; + segmentBuffer.elements[segIndex].implicitMatrix = (1/det)*mat3(a, d, 0., + b, e, 0., + c, f, g); + segmentBuffer.elements[segIndex].hullVertex = p[1]; - bin_to_tiles(segIndex); + bin_to_tiles(segIndex); + } } void quadratic_setup(vec2 p[4], int pathIndex) @@ -654,71 +665,74 @@ void cubic_emit(cubic_info curve, vec2 p[4], float s0, float s1, vec2 sp[4], int { int segIndex = push_segment(sp, MG_GL_CUBIC, pathIndex); - vec2 v0 = p[0]; - vec2 v1 = p[3]; - vec2 v2; - mat3 K; - - //TODO: haul that up in caller - float sqrNorm0 = dot(p[1]-p[0], p[1]-p[0]); - float sqrNorm1 = dot(p[2]-p[3], p[2]-p[3]); - - if(dot(p[0]-p[3], p[0]-p[3]) > 1e-5) + if(segIndex < segmentBuffer.elements.length()) { - if(sqrNorm0 >= sqrNorm1) - { - v2 = p[1]; - K = mat3(curve.K[0].xyz, curve.K[3].xyz, curve.K[1].xyz); + vec2 v0 = p[0]; + vec2 v1 = p[3]; + vec2 v2; + mat3 K; + + //TODO: haul that up in caller + float sqrNorm0 = dot(p[1]-p[0], p[1]-p[0]); + float sqrNorm1 = dot(p[2]-p[3], p[2]-p[3]); + + if(dot(p[0]-p[3], p[0]-p[3]) > 1e-5) + { + if(sqrNorm0 >= sqrNorm1) + { + v2 = p[1]; + K = mat3(curve.K[0].xyz, curve.K[3].xyz, curve.K[1].xyz); + } + else + { + v2 = p[2]; + K = mat3(curve.K[0].xyz, curve.K[3].xyz, curve.K[2].xyz); + } } else { + v1 = p[1]; v2 = p[2]; - K = mat3(curve.K[0].xyz, curve.K[3].xyz, curve.K[2].xyz); + K = mat3(curve.K[0].xyz, curve.K[1].xyz, curve.K[2].xyz); } - } - else - { - v1 = p[1]; - v2 = p[2]; - K = mat3(curve.K[0].xyz, curve.K[1].xyz, curve.K[2].xyz); - } - //NOTE: set matrices + //NOTE: set matrices - //TODO: should we compute matrix relative to a base point to avoid loss of precision - // when computing barycentric matrix? + //TODO: should we compute matrix relative to a base point to avoid loss of precision + // when computing barycentric matrix? - mat3 B = barycentric_matrix(v0, v1, v2); + mat3 B = barycentric_matrix(v0, v1, v2); - segmentBuffer.elements[segIndex].implicitMatrix = K*B; - segmentBuffer.elements[segIndex].hullVertex = select_hull_vertex(sp[0], sp[1], sp[2], sp[3]); + segmentBuffer.elements[segIndex].implicitMatrix = K*B; + segmentBuffer.elements[segIndex].hullVertex = select_hull_vertex(sp[0], sp[1], sp[2], sp[3]); - //NOTE: compute sign flip - segmentBuffer.elements[segIndex].sign = 1; + //NOTE: compute sign flip + segmentBuffer.elements[segIndex].sign = 1; - if( curve.kind == CUBIC_SERPENTINE - || curve.kind == CUBIC_CUSP) - { - segmentBuffer.elements[segIndex].sign = (curve.d1 < 0)? -1 : 1; + if( curve.kind == CUBIC_SERPENTINE + || curve.kind == CUBIC_CUSP) + { + segmentBuffer.elements[segIndex].sign = (curve.d1 < 0)? -1 : 1; + } + else if(curve.kind == CUBIC_LOOP) + { + float d1 = curve.d1; + float d2 = curve.d2; + float d3 = curve.d3; + + float H0 = d3*d1-square(d2) + d1*d2*s0 - square(d1)*square(s0); + float H1 = d3*d1-square(d2) + d1*d2*s1 - square(d1)*square(s1); + float H = (abs(H0) > abs(H1)) ? H0 : H1; + segmentBuffer.elements[segIndex].sign = (H*d1 > 0) ? -1 : 1; + } + + if(sp[3].y > sp[0].y) + { + segmentBuffer.elements[segIndex].sign *= -1; + } + + //NOTE: bin to tiles + bin_to_tiles(segIndex); } - else if(curve.kind == CUBIC_LOOP) - { - float d1 = curve.d1; - float d2 = curve.d2; - float d3 = curve.d3; - - float H0 = d3*d1-square(d2) + d1*d2*s0 - square(d1)*square(s0); - float H1 = d3*d1-square(d2) + d1*d2*s1 - square(d1)*square(s1); - float H = (abs(H0) > abs(H1)) ? H0 : H1; - segmentBuffer.elements[segIndex].sign = (H*d1 > 0) ? -1 : 1; - } - - if(sp[3].y > sp[0].y) - { - segmentBuffer.elements[segIndex].sign *= -1; - } - - //NOTE: bin to tiles - bin_to_tiles(segIndex); } void cubic_setup(vec2 p[4], int pathIndex)