Merge branch 'new_gl_canvas'

This commit is contained in:
martinfouilleul 2023-07-27 15:38:55 +02:00
commit 392bd3a756
9 changed files with 436 additions and 238 deletions

View File

@ -4,7 +4,7 @@ setlocal EnableDelayedExpansion
if not exist bin mkdir bin if not exist bin mkdir bin
set glsl_shaders=src\glsl_shaders\common.glsl src\glsl_shaders\blit_vertex.glsl src\glsl_shaders\blit_fragment.glsl src\glsl_shaders\path_setup.glsl src\glsl_shaders\segment_setup.glsl src\glsl_shaders\backprop.glsl src\glsl_shaders\merge.glsl src\glsl_shaders\raster.glsl set glsl_shaders=src\glsl_shaders\common.glsl src\glsl_shaders\blit_vertex.glsl src\glsl_shaders\blit_fragment.glsl src\glsl_shaders\path_setup.glsl src\glsl_shaders\segment_setup.glsl src\glsl_shaders\backprop.glsl src\glsl_shaders\merge.glsl src\glsl_shaders\raster.glsl src\glsl_shaders\balance_workgroups.glsl
call python3 scripts\embed_text.py %glsl_shaders% --prefix=glsl_ --output src\glsl_shaders.h call python3 scripts\embed_text.py %glsl_shaders% --prefix=glsl_ --output src\glsl_shaders.h

View File

@ -201,7 +201,7 @@ int main()
f32 trackX = mousePos.x/zoom - startX; f32 trackX = mousePos.x/zoom - startX;
f32 trackY = mousePos.y/zoom - startY; f32 trackY = mousePos.y/zoom - startY;
zoom *= 1 + event->move.deltaY * 0.01; zoom *= 1 + event->mouse.deltaY * 0.01;
zoom = Clamp(zoom, 0.2, 10); zoom = Clamp(zoom, 0.2, 10);
startX = mousePos.x/zoom - trackX; startX = mousePos.x/zoom - trackX;

View File

@ -62,6 +62,11 @@ int main()
//NOTE: create surface //NOTE: create surface
mg_surface surface = mg_surface_create_for_window(window, MG_CANVAS); mg_surface surface = mg_surface_create_for_window(window, MG_CANVAS);
if(mg_surface_is_nil(surface))
{
log_error("Couln't create surface\n");
return(-1);
}
mg_surface_swap_interval(surface, 0); mg_surface_swap_interval(surface, 0);
//TODO: create canvas //TODO: create canvas
@ -108,12 +113,6 @@ int main()
mp_request_quit(); mp_request_quit();
} break; } break;
case MP_EVENT_WINDOW_RESIZE:
{
mp_rect frame = {0, 0, event->frame.rect.w, event->frame.rect.h};
mg_surface_set_frame(surface, frame);
} break;
case MP_EVENT_MOUSE_BUTTON: case MP_EVENT_MOUSE_BUTTON:
{ {
if(event->key.code == MP_MOUSE_LEFT) if(event->key.code == MP_MOUSE_LEFT)
@ -138,7 +137,7 @@ int main()
f32 pinX = (mousePos.x - startX)/zoom; f32 pinX = (mousePos.x - startX)/zoom;
f32 pinY = (mousePos.y - startY)/zoom; f32 pinY = (mousePos.y - startY)/zoom;
zoom *= 1 + event->move.deltaY * 0.01; zoom *= 1 + event->mouse.deltaY * 0.01;
zoom = Clamp(zoom, 0.5, 5); zoom = Clamp(zoom, 0.5, 5);
startX = mousePos.x - pinX*zoom; startX = mousePos.x - pinX*zoom;

View File

@ -64,53 +64,62 @@ typedef struct mg_gl_dispatch_indirect_command
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//NOTE: these are just here for the sizes... //NOTE: these are just here for the sizes...
typedef struct mg_gl_segment #define MG_GL_LAYOUT_FIRST(name, type) \
MG_GL_##name##_OFFSET = 0, \
MG_GL_##name##_SIZE = MG_GL_##type##_SIZE,
#define MG_GL_LAYOUT_NEXT(name, type, prev) \
MG_GL_##name##_OFFSET = AlignUpOnPow2(MG_GL_##prev##_OFFSET + MG_GL_##prev##_SIZE, MG_GL_##type##_ALIGN), \
MG_GL_##name##_SIZE = MG_GL_##type##_SIZE,
#define MG_GL_LAYOUT_SIZE(name, last, maxAlignType) \
MG_GL_##name##_ALIGN = AlignUpOnPow2(MG_GL_##maxAlignType##_ALIGN, MG_GL_VEC4_ALIGN), \
MG_GL_##name##_SIZE = AlignUpOnPow2(MG_GL_##last##_OFFSET + MG_GL_##last##_SIZE, MG_GL_##name##_ALIGN),
enum
{ {
int kind; MG_GL_I32_SIZE = sizeof(i32),
int pathIndex; MG_GL_I32_ALIGN = sizeof(i32),
int config; MG_GL_F32_SIZE = sizeof(f32),
int windingIncrement; MG_GL_F32_ALIGN = sizeof(f32),
vec4 box; MG_GL_VEC2_SIZE = 2*sizeof(f32),
float hullMatrix[9]; MG_GL_VEC2_ALIGN = 2*sizeof(f32),
float implicitMatrix[9]; MG_GL_VEC3_SIZE = 4*sizeof(f32),
float sign; MG_GL_VEC3_ALIGN = 4*sizeof(f32),
vec2 hullVertex; MG_GL_VEC4_SIZE = 4*sizeof(f32),
int debugID; MG_GL_VEC4_ALIGN = 4*sizeof(f32),
MG_GL_MAT3_SIZE = 3*3*MG_GL_VEC3_SIZE,
MG_GL_MAT3_ALIGN = MG_GL_VEC3_ALIGN,
} mg_gl_segment; MG_GL_LAYOUT_FIRST(SEGMENT_KIND, I32)
MG_GL_LAYOUT_NEXT(SEGMENT_PATH_INDEX, I32, SEGMENT_KIND)
MG_GL_LAYOUT_NEXT(SEGMENT_CONFIG, I32, SEGMENT_PATH_INDEX)
MG_GL_LAYOUT_NEXT(SEGMENT_WINDING, I32, SEGMENT_CONFIG)
MG_GL_LAYOUT_NEXT(SEGMENT_BOX, VEC4, SEGMENT_WINDING)
MG_GL_LAYOUT_NEXT(SEGMENT_IMPLICIT_MATRIX, MAT3, SEGMENT_BOX)
MG_GL_LAYOUT_NEXT(SEGMENT_HULL_VERTEX, VEC2, SEGMENT_IMPLICIT_MATRIX)
MG_GL_LAYOUT_NEXT(SEGMENT_SIGN, F32, SEGMENT_HULL_VERTEX)
MG_GL_LAYOUT_SIZE(SEGMENT, SEGMENT_SIGN, MAT3)
typedef struct mg_gl_path_queue MG_GL_LAYOUT_FIRST(PATH_QUEUE_AREA, VEC4)
{ MG_GL_LAYOUT_NEXT(PATH_QUEUE_TILE_QUEUES, I32, PATH_QUEUE_AREA)
vec4 area; MG_GL_LAYOUT_SIZE(PATH_QUEUE, PATH_QUEUE_TILE_QUEUES, VEC4)
int tileQueues;
u8 pad[12];
} mg_gl_path_queue;
typedef struct mg_gl_tile_op MG_GL_LAYOUT_FIRST(TILE_OP_KIND, I32)
{ MG_GL_LAYOUT_NEXT(TILE_OP_NEXT, I32, TILE_OP_KIND)
int kind; MG_GL_LAYOUT_NEXT(TILE_OP_INDEX, I32, TILE_OP_NEXT)
int index; MG_GL_LAYOUT_NEXT(TILE_OP_WINDING, I32, TILE_OP_INDEX)
int next; MG_GL_LAYOUT_SIZE(TILE_OP, TILE_OP_WINDING, I32)
bool crossRight;
int windingOffset;
} mg_gl_tile_op; MG_GL_LAYOUT_FIRST(TILE_QUEUE_WINDING, I32)
MG_GL_LAYOUT_NEXT(TILE_QUEUE_FIRST, I32, TILE_QUEUE_WINDING)
MG_GL_LAYOUT_NEXT(TILE_QUEUE_LAST, I32, TILE_QUEUE_FIRST)
MG_GL_LAYOUT_SIZE(TILE_QUEUE, TILE_QUEUE_LAST, I32)
typedef struct mg_gl_tile_queue MG_GL_LAYOUT_FIRST(SCREEN_TILE_COORD, VEC2)
{ MG_GL_LAYOUT_NEXT(SCREEN_TILE_FIRST, I32, SCREEN_TILE_COORD)
int windingOffset; MG_GL_LAYOUT_SIZE(SCREEN_TILE, SCREEN_TILE_FIRST, VEC2)
int first; };
int last;
} mg_gl_tile_queue;
typedef struct mg_gl_screen_tile
{
u32 tileCoord[2];
i32 first;
u8 padding[4];
} mg_gl_screen_tile;
////////////////////////////////////////////////////////////
enum { enum {
MG_GL_INPUT_BUFFERS_COUNT = 3, MG_GL_INPUT_BUFFERS_COUNT = 3,
@ -140,6 +149,7 @@ typedef struct mg_gl_canvas_backend
GLuint segmentSetup; GLuint segmentSetup;
GLuint backprop; GLuint backprop;
GLuint merge; GLuint merge;
GLuint balanceWorkgroups;
GLuint raster; GLuint raster;
GLuint blit; GLuint blit;
@ -158,6 +168,7 @@ typedef struct mg_gl_canvas_backend
GLuint tileOpBuffer; GLuint tileOpBuffer;
GLuint tileOpCountBuffer; GLuint tileOpCountBuffer;
GLuint screenTilesBuffer; GLuint screenTilesBuffer;
GLuint screenTilesCountBuffer;
GLuint rasterDispatchBuffer; GLuint rasterDispatchBuffer;
GLuint dummyVertexBuffer; GLuint dummyVertexBuffer;
@ -172,6 +183,9 @@ typedef struct mg_gl_canvas_backend
vec4 pathScreenExtents; vec4 pathScreenExtents;
vec4 pathUserExtents; vec4 pathUserExtents;
int maxTileQueueCount;
int maxSegmentCount;
} mg_gl_canvas_backend; } mg_gl_canvas_backend;
static void mg_update_path_extents(vec4* extents, vec2 p) static void mg_update_path_extents(vec4* extents, vec2 p)
@ -182,9 +196,47 @@ static void mg_update_path_extents(vec4* extents, vec2 p)
extents->w = maximum(extents->w, p.y); extents->w = maximum(extents->w, p.y);
} }
void mg_gl_grow_input_buffer(mg_gl_mapped_buffer* buffer, int copyStart, int copySize, int newSize)
{
mg_gl_mapped_buffer newBuffer = {0};
newBuffer.size = newSize;
glGenBuffers(1, &newBuffer.buffer);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, newBuffer.buffer);
glBufferStorage(GL_SHADER_STORAGE_BUFFER, newBuffer.size, 0, GL_MAP_WRITE_BIT|GL_MAP_PERSISTENT_BIT);
newBuffer.contents = glMapBufferRange(GL_SHADER_STORAGE_BUFFER,
0,
newBuffer.size,
GL_MAP_WRITE_BIT
|GL_MAP_PERSISTENT_BIT
|GL_MAP_FLUSH_EXPLICIT_BIT);
memcpy(newBuffer.contents + copyStart, buffer->contents + copyStart, copySize);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer->buffer);
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
glDeleteBuffers(1, &buffer->buffer);
*buffer = newBuffer;
}
void mg_gl_canvas_encode_element(mg_gl_canvas_backend* backend, mg_path_elt_type kind, vec2* p) void mg_gl_canvas_encode_element(mg_gl_canvas_backend* backend, mg_path_elt_type kind, vec2* p)
{ {
mg_gl_path_elt* elementData = (mg_gl_path_elt*)backend->elementBuffer[backend->bufferIndex].contents; int bufferIndex = backend->bufferIndex;
int bufferCap = backend->elementBuffer[bufferIndex].size / sizeof(mg_gl_path_elt);
if(backend->eltCount >= bufferCap)
{
int newBufferCap = (int)(bufferCap * 1.5);
int newBufferSize = newBufferCap * sizeof(mg_gl_path_elt);
log_info("growing element buffer to %i elements\n", newBufferCap);
mg_gl_grow_input_buffer(&backend->elementBuffer[bufferIndex],
backend->eltBatchStart * sizeof(mg_gl_path_elt),
backend->eltCount * sizeof(mg_gl_path_elt),
newBufferSize);
}
mg_gl_path_elt* elementData = (mg_gl_path_elt*)backend->elementBuffer[bufferIndex].contents;
mg_gl_path_elt* elt = &elementData[backend->eltCount]; mg_gl_path_elt* elt = &elementData[backend->eltCount];
backend->eltCount++; backend->eltCount++;
@ -193,16 +245,19 @@ void mg_gl_canvas_encode_element(mg_gl_canvas_backend* backend, mg_path_elt_type
switch(kind) switch(kind)
{ {
case MG_PATH_LINE: case MG_PATH_LINE:
backend->maxSegmentCount += 1;
elt->kind = MG_GL_LINE; elt->kind = MG_GL_LINE;
count = 2; count = 2;
break; break;
case MG_PATH_QUADRATIC: case MG_PATH_QUADRATIC:
backend->maxSegmentCount += 3;
elt->kind = MG_GL_QUADRATIC; elt->kind = MG_GL_QUADRATIC;
count = 3; count = 3;
break; break;
case MG_PATH_CUBIC: case MG_PATH_CUBIC:
backend->maxSegmentCount += 7;
elt->kind = MG_GL_CUBIC; elt->kind = MG_GL_CUBIC;
count = 4; count = 4;
break; break;
@ -224,6 +279,21 @@ void mg_gl_canvas_encode_element(mg_gl_canvas_backend* backend, mg_path_elt_type
void mg_gl_canvas_encode_path(mg_gl_canvas_backend* backend, mg_primitive* primitive, f32 scale) void mg_gl_canvas_encode_path(mg_gl_canvas_backend* backend, mg_primitive* primitive, f32 scale)
{ {
int bufferIndex = backend->bufferIndex;
int bufferCap = backend->pathBuffer[bufferIndex].size / sizeof(mg_gl_path);
if(backend->pathCount >= bufferCap)
{
int newBufferCap = (int)(bufferCap * 1.5);
int newBufferSize = newBufferCap * sizeof(mg_gl_path);
log_info("growing path buffer to %i elements\n", newBufferCap);
mg_gl_grow_input_buffer(&backend->pathBuffer[bufferIndex],
backend->pathBatchStart * sizeof(mg_gl_path),
backend->eltCount * sizeof(mg_gl_path),
newBufferSize);
}
mg_gl_path* pathData = (mg_gl_path*)backend->pathBuffer[backend->bufferIndex].contents; mg_gl_path* pathData = (mg_gl_path*)backend->pathBuffer[backend->bufferIndex].contents;
mg_gl_path* path = &pathData[backend->pathCount]; mg_gl_path* path = &pathData[backend->pathCount];
backend->pathCount++; backend->pathCount++;
@ -293,6 +363,10 @@ void mg_gl_canvas_encode_path(mg_gl_canvas_backend* backend, mg_primitive* primi
path->uvTransform[10] = 1; path->uvTransform[10] = 1;
path->uvTransform[11] = 0; path->uvTransform[11] = 0;
} }
int nTilesX = ((path->box.z - path->box.x)*scale - 1) / MG_GL_TILE_SIZE + 1;
int nTilesY = ((path->box.w - path->box.y)*scale - 1) / MG_GL_TILE_SIZE + 1;
backend->maxTileQueueCount += (nTilesX * nTilesY);
} }
bool mg_intersect_hull_legs(vec2 p0, vec2 p1, vec2 p2, vec2 p3, vec2* intersection) bool mg_intersect_hull_legs(vec2 p0, vec2 p1, vec2 p2, vec2 p3, vec2* intersection)
@ -948,6 +1022,25 @@ void mg_gl_encode_stroke(mg_gl_canvas_backend* backend,
} }
} }
void mg_gl_grow_buffer_if_needed(GLuint buffer, i32 wantedSize, const char* name)
{
i32 oldSize = 0;
glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
glGetBufferParameteriv(GL_SHADER_STORAGE_BUFFER, GL_BUFFER_SIZE, &oldSize);
if(oldSize < wantedSize)
{
log_info("growing %s buffer\n", name);
int newSize = wantedSize * 1.2;
glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
glBufferData(GL_SHADER_STORAGE_BUFFER, newSize, 0, GL_DYNAMIC_COPY);
}
}
void mg_gl_render_batch(mg_gl_canvas_backend* backend, void mg_gl_render_batch(mg_gl_canvas_backend* backend,
mg_wgl_surface* surface, mg_wgl_surface* surface,
mg_image_data* image, mg_image_data* image,
@ -957,7 +1050,6 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
vec2 viewportSize, vec2 viewportSize,
f32 scale) f32 scale)
{ {
//NOTE: make the buffers visible to gl
GLuint pathBuffer = backend->pathBuffer[backend->bufferIndex].buffer; GLuint pathBuffer = backend->pathBuffer[backend->bufferIndex].buffer;
GLuint elementBuffer = backend->elementBuffer[backend->bufferIndex].buffer; GLuint elementBuffer = backend->elementBuffer[backend->bufferIndex].buffer;
@ -971,6 +1063,16 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
return; return;
} }
//NOTE: update intermediate buffers size if needed
//TODO: compute correct sizes
mg_gl_grow_buffer_if_needed(backend->pathQueueBuffer, pathCount * MG_GL_PATH_QUEUE_SIZE, "path queues");
mg_gl_grow_buffer_if_needed(backend->tileQueueBuffer, backend->maxTileQueueCount * MG_GL_TILE_QUEUE_SIZE, "tile queues");
mg_gl_grow_buffer_if_needed(backend->segmentBuffer, backend->maxSegmentCount * MG_GL_SEGMENT_SIZE, "segments");
mg_gl_grow_buffer_if_needed(backend->screenTilesBuffer, nTilesX * nTilesY * MG_GL_SCREEN_TILE_SIZE, "screen tiles");
mg_gl_grow_buffer_if_needed(backend->tileOpBuffer, backend->maxSegmentCount * 30 * MG_GL_TILE_OP_SIZE, "tile ops");
//NOTE: make the buffers visible to gl
glBindBuffer(GL_SHADER_STORAGE_BUFFER, pathBuffer); glBindBuffer(GL_SHADER_STORAGE_BUFFER, pathBuffer);
glFlushMappedBufferRange(GL_SHADER_STORAGE_BUFFER, pathBufferOffset, pathCount*sizeof(mg_gl_path)); glFlushMappedBufferRange(GL_SHADER_STORAGE_BUFFER, pathBufferOffset, pathCount*sizeof(mg_gl_path));
@ -995,6 +1097,9 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->rasterDispatchBuffer); glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->rasterDispatchBuffer);
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(mg_gl_dispatch_indirect_command), &zero, GL_DYNAMIC_COPY); glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(mg_gl_dispatch_indirect_command), &zero, GL_DYNAMIC_COPY);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->screenTilesCountBuffer);
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(int), &zero, GL_DYNAMIC_COPY);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
int err = glGetError(); int err = glGetError();
@ -1110,7 +1215,7 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->tileOpCountBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->tileOpCountBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, backend->tileOpBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, backend->tileOpBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, backend->screenTilesBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, backend->screenTilesBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, backend->rasterDispatchBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, backend->screenTilesCountBuffer);
glUniform1i(0, tileSize); glUniform1i(0, tileSize);
glUniform1f(1, scale); glUniform1f(1, scale);
@ -1139,6 +1244,17 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
log_error("gl error %i\n", err); log_error("gl error %i\n", err);
} }
} }
//NOTE: balance work groups
glUseProgram(backend->balanceWorkgroups);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, backend->screenTilesCountBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->rasterDispatchBuffer);
glUniform1ui(0, maxWorkGroupCount);
glDispatchCompute(1, 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
//NOTE: raster pass //NOTE: raster pass
glUseProgram(backend->raster); glUseProgram(backend->raster);
@ -1146,6 +1262,7 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->segmentBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, backend->segmentBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, backend->tileOpBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, backend->tileOpBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->screenTilesBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, backend->screenTilesBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, backend->screenTilesCountBuffer);
glUniform1f(0, scale); glUniform1f(0, scale);
glUniform1i(1, backend->msaaCount); glUniform1i(1, backend->msaaCount);
@ -1165,6 +1282,7 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
} }
glUniform1i(3, backend->pathBatchStart); glUniform1i(3, backend->pathBatchStart);
glUniform1ui(4, maxWorkGroupCount);
glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, backend->rasterDispatchBuffer); glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, backend->rasterDispatchBuffer);
glDispatchComputeIndirect(0); glDispatchComputeIndirect(0);
@ -1198,6 +1316,9 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
backend->pathBatchStart = backend->pathCount; backend->pathBatchStart = backend->pathCount;
backend->eltBatchStart = backend->eltCount; backend->eltBatchStart = backend->eltCount;
backend->maxSegmentCount = 0;
backend->maxTileQueueCount = 0;
} }
void mg_gl_canvas_resize(mg_gl_canvas_backend* backend, vec2 size) void mg_gl_canvas_resize(mg_gl_canvas_backend* backend, vec2 size)
@ -1207,7 +1328,7 @@ void mg_gl_canvas_resize(mg_gl_canvas_backend* backend, vec2 size)
int nTilesY = (int)(size.y + tileSize - 1)/tileSize; int nTilesY = (int)(size.y + tileSize - 1)/tileSize;
glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->screenTilesBuffer); glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->screenTilesBuffer);
glBufferData(GL_SHADER_STORAGE_BUFFER, nTilesX*nTilesY*sizeof(mg_gl_screen_tile), 0, GL_DYNAMIC_COPY); glBufferData(GL_SHADER_STORAGE_BUFFER, nTilesX*nTilesY*MG_GL_SCREEN_TILE_SIZE, 0, GL_DYNAMIC_COPY);
if(backend->outTexture) if(backend->outTexture)
{ {
@ -1271,6 +1392,8 @@ void mg_gl_canvas_render(mg_canvas_backend* interface,
backend->pathBatchStart = 0; backend->pathBatchStart = 0;
backend->eltCount = 0; backend->eltCount = 0;
backend->eltBatchStart = 0; backend->eltBatchStart = 0;
backend->maxSegmentCount = 0;
backend->maxTileQueueCount = 0;
//NOTE: encode and render batches //NOTE: encode and render batches
vec2 currentPos = {0}; vec2 currentPos = {0};
@ -1519,12 +1642,12 @@ int mg_gl_canvas_compile_render_program_named(const char* progName,
#define mg_gl_canvas_compile_render_program(progName, shaderSrc, vertexSrc, out) \ #define mg_gl_canvas_compile_render_program(progName, shaderSrc, vertexSrc, out) \
mg_gl_canvas_compile_render_program_named(progName, #shaderSrc, #vertexSrc, shaderSrc, vertexSrc, out) mg_gl_canvas_compile_render_program_named(progName, #shaderSrc, #vertexSrc, shaderSrc, vertexSrc, out)
const u32 MG_GL_PATH_BUFFER_SIZE = (4<<20)*sizeof(mg_gl_path), const u32 MG_GL_PATH_BUFFER_SIZE = (4<<10)*sizeof(mg_gl_path),
MG_GL_ELEMENT_BUFFER_SIZE = (4<<20)*sizeof(mg_gl_path_elt), MG_GL_ELEMENT_BUFFER_SIZE = (4<<12)*sizeof(mg_gl_path_elt),
MG_GL_SEGMENT_BUFFER_SIZE = (4<<20)*sizeof(mg_gl_segment), MG_GL_SEGMENT_BUFFER_SIZE = (4<<10)*MG_GL_SEGMENT_SIZE,
MG_GL_PATH_QUEUE_BUFFER_SIZE = (4<<20)*sizeof(mg_gl_path_queue), MG_GL_PATH_QUEUE_BUFFER_SIZE = (4<<10)*MG_GL_PATH_QUEUE_SIZE,
MG_GL_TILE_QUEUE_BUFFER_SIZE = (4<<20)*sizeof(mg_gl_tile_queue), MG_GL_TILE_QUEUE_BUFFER_SIZE = (4<<10)*MG_GL_TILE_QUEUE_SIZE,
MG_GL_TILE_OP_BUFFER_SIZE = (4<<20)*sizeof(mg_gl_tile_op); MG_GL_TILE_OP_BUFFER_SIZE = (4<<20)*MG_GL_TILE_OP_SIZE;
mg_canvas_backend* gl_canvas_backend_create(mg_wgl_surface* surface) mg_canvas_backend* gl_canvas_backend_create(mg_wgl_surface* surface)
{ {
@ -1554,6 +1677,7 @@ mg_canvas_backend* gl_canvas_backend_create(mg_wgl_surface* surface)
err |= mg_gl_canvas_compile_compute_program(glsl_segment_setup, &backend->segmentSetup); err |= mg_gl_canvas_compile_compute_program(glsl_segment_setup, &backend->segmentSetup);
err |= mg_gl_canvas_compile_compute_program(glsl_backprop, &backend->backprop); err |= mg_gl_canvas_compile_compute_program(glsl_backprop, &backend->backprop);
err |= mg_gl_canvas_compile_compute_program(glsl_merge, &backend->merge); err |= mg_gl_canvas_compile_compute_program(glsl_merge, &backend->merge);
err |= mg_gl_canvas_compile_compute_program(glsl_balance_workgroups, &backend->balanceWorkgroups);
err |= mg_gl_canvas_compile_compute_program(glsl_raster, &backend->raster); err |= mg_gl_canvas_compile_compute_program(glsl_raster, &backend->raster);
err |= mg_gl_canvas_compile_render_program("blit", glsl_blit_vertex, glsl_blit_fragment, &backend->blit); err |= mg_gl_canvas_compile_render_program("blit", glsl_blit_vertex, glsl_blit_fragment, &backend->blit);
@ -1637,13 +1761,16 @@ mg_canvas_backend* gl_canvas_backend_create(mg_wgl_surface* surface)
glGenBuffers(1, &backend->screenTilesBuffer); glGenBuffers(1, &backend->screenTilesBuffer);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->screenTilesBuffer); glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->screenTilesBuffer);
glBufferData(GL_SHADER_STORAGE_BUFFER, nTilesX*nTilesY*sizeof(mg_gl_screen_tile), 0, GL_DYNAMIC_COPY); glBufferData(GL_SHADER_STORAGE_BUFFER, nTilesX*nTilesY*MG_GL_SCREEN_TILE_SIZE, 0, GL_DYNAMIC_COPY);
glGenBuffers(1, &backend->screenTilesCountBuffer);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->screenTilesCountBuffer);
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(int), 0, GL_DYNAMIC_COPY);
glGenBuffers(1, &backend->rasterDispatchBuffer); glGenBuffers(1, &backend->rasterDispatchBuffer);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->rasterDispatchBuffer); glBindBuffer(GL_SHADER_STORAGE_BUFFER, backend->rasterDispatchBuffer);
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(mg_gl_dispatch_indirect_command), 0, GL_DYNAMIC_COPY); glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(mg_gl_dispatch_indirect_command), 0, GL_DYNAMIC_COPY);
if(err) if(err)
{ {
mg_gl_canvas_destroy((mg_canvas_backend*)backend); mg_gl_canvas_destroy((mg_canvas_backend*)backend);

View File

@ -0,0 +1,27 @@
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
precision mediump float;
layout(std430) buffer;
layout(binding = 0) coherent restrict readonly buffer screenTilesCountBufferSSBO
{
int elements[];
} screenTilesCountBuffer;
layout(binding = 1) coherent restrict writeonly buffer dispatchBufferSSBO
{
mg_gl_dispatch_indirect_command elements[];
} dispatchBuffer;
layout(location = 0) uniform uint maxWorkGroupCount;
void main()
{
uint totalWorkGroupCount = screenTilesCountBuffer.elements[0];
dispatchBuffer.elements[0].num_groups_x = totalWorkGroupCount > maxWorkGroupCount ? maxWorkGroupCount : totalWorkGroupCount;
dispatchBuffer.elements[0].num_groups_y = (totalWorkGroupCount + maxWorkGroupCount - 1) / maxWorkGroupCount;
dispatchBuffer.elements[0].num_groups_z = 1;
}

View File

@ -34,10 +34,10 @@ layout(binding = 5) restrict writeonly buffer screenTilesBufferSSBO
mg_gl_screen_tile elements[]; mg_gl_screen_tile elements[];
} screenTilesBuffer; } screenTilesBuffer;
layout(binding = 6) coherent restrict buffer dispatchBufferSSBO layout(binding = 6) coherent restrict buffer screenTilesCountBufferSSBO
{ {
mg_gl_dispatch_indirect_command elements[]; int elements[];
} dispatchBuffer; } screenTilesCountBuffer;
layout(location = 0) uniform int tileSize; layout(location = 0) uniform int tileSize;
@ -53,9 +53,6 @@ void main()
int lastOpIndex = -1; int lastOpIndex = -1;
dispatchBuffer.elements[0].num_groups_y = 1;
dispatchBuffer.elements[0].num_groups_z = 1;
for(int pathIndex = 0; pathIndex < pathCount; pathIndex++) for(int pathIndex = 0; pathIndex < pathCount; pathIndex++)
{ {
mg_gl_path_queue pathQueue = pathQueueBuffer.elements[pathIndex]; mg_gl_path_queue pathQueue = pathQueueBuffer.elements[pathIndex];
@ -75,7 +72,7 @@ void main()
{ {
if(tileIndex < 0) if(tileIndex < 0)
{ {
tileIndex = int(atomicAdd(dispatchBuffer.elements[0].num_groups_x, 1)); tileIndex = int(atomicAdd(screenTilesCountBuffer.elements[0], 1));
screenTilesBuffer.elements[tileIndex].tileCoord = uvec2(tileCoord); screenTilesBuffer.elements[tileIndex].tileCoord = uvec2(tileCoord);
screenTilesBuffer.elements[tileIndex].first = -1; screenTilesBuffer.elements[tileIndex].first = -1;
} }
@ -106,6 +103,11 @@ void main()
// Additionally if color is opaque and tile is fully inside clip, trim tile list. // Additionally if color is opaque and tile is fully inside clip, trim tile list.
int pathOpIndex = atomicAdd(tileOpCountBuffer.elements[0], 1); int pathOpIndex = atomicAdd(tileOpCountBuffer.elements[0], 1);
if(pathOpIndex >= tileOpBuffer.elements.length())
{
return;
}
tileOpBuffer.elements[pathOpIndex].kind = MG_GL_OP_CLIP_FILL; tileOpBuffer.elements[pathOpIndex].kind = MG_GL_OP_CLIP_FILL;
tileOpBuffer.elements[pathOpIndex].next = -1; tileOpBuffer.elements[pathOpIndex].next = -1;
tileOpBuffer.elements[pathOpIndex].index = pathIndex; tileOpBuffer.elements[pathOpIndex].index = pathIndex;
@ -141,6 +143,10 @@ void main()
{ {
//NOTE: add path start op (with winding offset) //NOTE: add path start op (with winding offset)
int startOpIndex = atomicAdd(tileOpCountBuffer.elements[0], 1); int startOpIndex = atomicAdd(tileOpCountBuffer.elements[0], 1);
if(startOpIndex >= tileOpBuffer.elements.length())
{
return;
}
tileOpBuffer.elements[startOpIndex].kind = MG_GL_OP_START; tileOpBuffer.elements[startOpIndex].kind = MG_GL_OP_START;
tileOpBuffer.elements[startOpIndex].next = -1; tileOpBuffer.elements[startOpIndex].next = -1;
@ -163,6 +169,10 @@ void main()
//NOTE: add path end op //NOTE: add path end op
int endOpIndex = atomicAdd(tileOpCountBuffer.elements[0], 1); int endOpIndex = atomicAdd(tileOpCountBuffer.elements[0], 1);
if(endOpIndex >= tileOpBuffer.elements.length())
{
return;
}
tileOpBuffer.elements[endOpIndex].kind = MG_GL_OP_END; tileOpBuffer.elements[endOpIndex].kind = MG_GL_OP_END;
tileOpBuffer.elements[endOpIndex].next = -1; tileOpBuffer.elements[endOpIndex].next = -1;

View File

@ -50,6 +50,13 @@ void main()
int tileQueuesIndex = atomicAdd(tileQueueCountBuffer.elements[0], tileCount); int tileQueuesIndex = atomicAdd(tileQueueCountBuffer.elements[0], tileCount);
if(tileQueuesIndex + tileCount >= tileQueueBuffer.elements.length())
{
pathQueueBuffer.elements[pathIndex].area = ivec4(0);
pathQueueBuffer.elements[pathIndex].tileQueues = 0;
}
else
{
pathQueueBuffer.elements[pathQueueBufferStart + pathIndex].area = ivec4(firstTile.x, firstTile.y, nTilesX, nTilesY); pathQueueBuffer.elements[pathQueueBufferStart + pathIndex].area = ivec4(firstTile.x, firstTile.y, nTilesX, nTilesY);
pathQueueBuffer.elements[pathQueueBufferStart + pathIndex].tileQueues = tileQueuesIndex; pathQueueBuffer.elements[pathQueueBufferStart + pathIndex].tileQueues = tileQueuesIndex;
@ -59,4 +66,5 @@ void main()
tileQueueBuffer.elements[tileQueuesIndex + i].last = -1; tileQueueBuffer.elements[tileQueuesIndex + i].last = -1;
tileQueueBuffer.elements[tileQueuesIndex + i].windingOffset = 0; tileQueueBuffer.elements[tileQueuesIndex + i].windingOffset = 0;
} }
}
} }

View File

@ -24,17 +24,30 @@ layout(binding = 3) restrict readonly buffer screenTilesBufferSSBO
mg_gl_screen_tile elements[]; mg_gl_screen_tile elements[];
} screenTilesBuffer; } screenTilesBuffer;
layout(binding = 4) restrict readonly buffer screenTilesCountBufferSSBO
{
int elements[];
} screenTilesCountBuffer;
layout(location = 0) uniform float scale; layout(location = 0) uniform float scale;
layout(location = 1) uniform int msaaSampleCount; layout(location = 1) uniform int msaaSampleCount;
layout(location = 2) uniform uint useTexture; layout(location = 2) uniform uint useTexture;
layout(location = 3) uniform int pathBufferStart; layout(location = 3) uniform int pathBufferStart;
layout(location = 4) uniform uint maxWorkGroupCount;
layout(rgba8, binding = 0) uniform restrict writeonly image2D outTexture; layout(rgba8, binding = 0) uniform restrict writeonly image2D outTexture;
layout(binding = 1) uniform sampler2D srcTexture; layout(binding = 1) uniform sampler2D srcTexture;
void main() void main()
{ {
uint tileIndex = gl_WorkGroupID.x; uint tileIndex = gl_WorkGroupID.y * maxWorkGroupCount + gl_WorkGroupID.x;
if(tileIndex >= screenTilesCountBuffer.elements[0])
{
return;
}
uvec2 tileCoord = screenTilesBuffer.elements[tileIndex].tileCoord; uvec2 tileCoord = screenTilesBuffer.elements[tileIndex].tileCoord;
ivec2 pixelCoord = ivec2(tileCoord * gl_WorkGroupSize.x + gl_LocalInvocationID.xy); ivec2 pixelCoord = ivec2(tileCoord * gl_WorkGroupSize.x + gl_LocalInvocationID.xy);

View File

@ -105,6 +105,8 @@ void bin_to_tiles(int segIndex)
{ {
int tileOpIndex = atomicAdd(tileOpCountBuffer.elements[0], 1); int tileOpIndex = atomicAdd(tileOpCountBuffer.elements[0], 1);
if(tileOpIndex < tileOpBuffer.elements.length())
{
tileOpBuffer.elements[tileOpIndex].kind = MG_GL_OP_SEGMENT; tileOpBuffer.elements[tileOpIndex].kind = MG_GL_OP_SEGMENT;
tileOpBuffer.elements[tileOpIndex].index = segIndex; tileOpBuffer.elements[tileOpIndex].index = segIndex;
tileOpBuffer.elements[tileOpIndex].windingOffsetOrCrossRight = 0; tileOpBuffer.elements[tileOpIndex].windingOffsetOrCrossRight = 0;
@ -112,7 +114,8 @@ void bin_to_tiles(int segIndex)
int tileQueueIndex = pathQueue.tileQueues + y*pathArea.z + x; int tileQueueIndex = pathQueue.tileQueues + y*pathArea.z + x;
tileOpBuffer.elements[tileOpIndex].next = atomicExchange(tileQueueBuffer.elements[tileQueueIndex].first, tileOpIndex); tileOpBuffer.elements[tileOpIndex].next = atomicExchange(tileQueueBuffer.elements[tileQueueIndex].first,
tileOpIndex);
if(tileOpBuffer.elements[tileOpIndex].next == -1) if(tileOpBuffer.elements[tileOpIndex].next == -1)
{ {
tileQueueBuffer.elements[tileQueueIndex].last = tileOpIndex; tileQueueBuffer.elements[tileQueueIndex].last = tileOpIndex;
@ -132,12 +135,15 @@ void bin_to_tiles(int segIndex)
} }
} }
} }
}
} }
int push_segment(in vec2 p[4], int kind, int pathIndex) int push_segment(in vec2 p[4], int kind, int pathIndex)
{ {
int segIndex = atomicAdd(segmentCountBuffer.elements[0], 1); int segIndex = atomicAdd(segmentCountBuffer.elements[0], 1);
if(segIndex < segmentBuffer.elements.length())
{
vec2 s, c, e; vec2 s, c, e;
switch(kind) switch(kind)
@ -219,7 +225,7 @@ int push_segment(in vec2 p[4], int kind, int pathIndex)
segmentBuffer.elements[segIndex].config = MG_GL_TR; segmentBuffer.elements[segIndex].config = MG_GL_TR;
} }
} }
}
return(segIndex); return(segIndex);
} }
@ -229,9 +235,11 @@ int push_segment(in vec2 p[4], int kind, int pathIndex)
void line_setup(vec2 p[4], int pathIndex) void line_setup(vec2 p[4], int pathIndex)
{ {
int segIndex = push_segment(p, MG_GL_LINE, pathIndex); int segIndex = push_segment(p, MG_GL_LINE, pathIndex);
if(segIndex < segmentBuffer.elements.length())
{
segmentBuffer.elements[segIndex].hullVertex = p[0]; segmentBuffer.elements[segIndex].hullVertex = p[0];
bin_to_tiles(segIndex); bin_to_tiles(segIndex);
}
} }
vec2 quadratic_blossom(vec2 p[4], float u, float v) vec2 quadratic_blossom(vec2 p[4], float u, float v)
@ -298,6 +306,8 @@ void quadratic_emit(vec2 p[4], int pathIndex)
{ {
int segIndex = push_segment(p, MG_GL_QUADRATIC, pathIndex); int segIndex = push_segment(p, MG_GL_QUADRATIC, pathIndex);
if(segIndex < segmentBuffer.elements.length())
{
//NOTE: compute implicit equation matrix //NOTE: compute implicit equation matrix
float det = p[0].x*(p[1].y-p[2].y) + p[1].x*(p[2].y-p[0].y) + p[2].x*(p[0].y - p[1].y); float det = p[0].x*(p[1].y-p[2].y) + p[1].x*(p[2].y-p[0].y) + p[2].x*(p[0].y - p[1].y);
@ -319,6 +329,7 @@ void quadratic_emit(vec2 p[4], int pathIndex)
segmentBuffer.elements[segIndex].hullVertex = p[1]; segmentBuffer.elements[segIndex].hullVertex = p[1];
bin_to_tiles(segIndex); bin_to_tiles(segIndex);
}
} }
void quadratic_setup(vec2 p[4], int pathIndex) void quadratic_setup(vec2 p[4], int pathIndex)
@ -654,6 +665,8 @@ void cubic_emit(cubic_info curve, vec2 p[4], float s0, float s1, vec2 sp[4], int
{ {
int segIndex = push_segment(sp, MG_GL_CUBIC, pathIndex); int segIndex = push_segment(sp, MG_GL_CUBIC, pathIndex);
if(segIndex < segmentBuffer.elements.length())
{
vec2 v0 = p[0]; vec2 v0 = p[0];
vec2 v1 = p[3]; vec2 v1 = p[3];
vec2 v2; vec2 v2;
@ -719,6 +732,7 @@ void cubic_emit(cubic_info curve, vec2 p[4], float s0, float s1, vec2 sp[4], int
//NOTE: bin to tiles //NOTE: bin to tiles
bin_to_tiles(segIndex); bin_to_tiles(segIndex);
}
} }
void cubic_setup(vec2 p[4], int pathIndex) void cubic_setup(vec2 p[4], int pathIndex)