Merge branch 'batch_multi_image' into main

This allows the renderer to use multiple source textures per batch, and thus create less batches (and less draw calls). It also doesn't create a separate batch for non-image fills/strokes. This greatly improves perf especially when interleaving images and solid color commands. When using lots of images though, it is still recommended to use an atlas, which ensures everything fits in a single batch.
This commit is contained in:
Martin Fouilleul 2023-07-28 10:48:08 +02:00
commit 442d86386e
8 changed files with 176 additions and 88 deletions

View File

@ -30,7 +30,8 @@ typedef struct mg_gl_path
vec4 box; vec4 box;
vec4 clip; vec4 clip;
mg_gl_cmd cmd; mg_gl_cmd cmd;
u8 pad[12]; int textureID;
u8 pad[8];
} mg_gl_path; } mg_gl_path;
enum _mg_gl_seg_kind{ enum _mg_gl_seg_kind{
@ -125,6 +126,7 @@ enum {
MG_GL_INPUT_BUFFERS_COUNT = 3, MG_GL_INPUT_BUFFERS_COUNT = 3,
MG_GL_TILE_SIZE = 16, MG_GL_TILE_SIZE = 16,
MG_GL_MSAA_COUNT = 8, MG_GL_MSAA_COUNT = 8,
MG_GL_MAX_IMAGES_PER_BATCH = 8,
}; };
typedef struct mg_gl_mapped_buffer typedef struct mg_gl_mapped_buffer
@ -186,6 +188,7 @@ typedef struct mg_gl_canvas_backend
int maxTileQueueCount; int maxTileQueueCount;
int maxSegmentCount; int maxSegmentCount;
int currentImageIndex;
} mg_gl_canvas_backend; } mg_gl_canvas_backend;
static void mg_update_path_extents(vec4* extents, vec2 p) static void mg_update_path_extents(vec4* extents, vec2 p)
@ -362,6 +365,12 @@ void mg_gl_canvas_encode_path(mg_gl_canvas_backend* backend, mg_primitive* primi
path->uvTransform[9] = uvTransform.m[5]; path->uvTransform[9] = uvTransform.m[5];
path->uvTransform[10] = 1; path->uvTransform[10] = 1;
path->uvTransform[11] = 0; path->uvTransform[11] = 0;
path->textureID = backend->currentImageIndex;
}
else
{
path->textureID = -1;
} }
int nTilesX = ((path->box.z - path->box.x)*scale - 1) / MG_GL_TILE_SIZE + 1; int nTilesX = ((path->box.z - path->box.x)*scale - 1) / MG_GL_TILE_SIZE + 1;
@ -1043,7 +1052,7 @@ void mg_gl_grow_buffer_if_needed(GLuint buffer, i32 wantedSize, const char* name
void mg_gl_render_batch(mg_gl_canvas_backend* backend, void mg_gl_render_batch(mg_gl_canvas_backend* backend,
mg_wgl_surface* surface, mg_wgl_surface* surface,
mg_image_data* image, mg_image* images,
int tileSize, int tileSize,
int nTilesX, int nTilesX,
int nTilesY, int nTilesY,
@ -1220,18 +1229,7 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
glUniform1i(0, tileSize); glUniform1i(0, tileSize);
glUniform1f(1, scale); glUniform1f(1, scale);
glUniform1i(2, pathCount); glUniform1i(2, pathCount);
glUniform1i(3, backend->pathBatchStart);
// if there's an image, don't cull solid tiles
if(image)
{
glUniform1i(3, 0);
}
else
{
glUniform1i(3, 1);
}
glUniform1i(4, backend->pathBatchStart);
glDispatchCompute(nTilesX, nTilesY, 1); glDispatchCompute(nTilesX, nTilesY, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
@ -1269,20 +1267,21 @@ void mg_gl_render_batch(mg_gl_canvas_backend* backend,
glBindImageTexture(0, backend->outTexture, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8); glBindImageTexture(0, backend->outTexture, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8);
for(int i=0; i<MG_GL_MAX_IMAGES_PER_BATCH; i++)
{
if(images[i].h)
{
mg_gl_image* image = (mg_gl_image*)mg_image_data_from_handle(images[i]);
if(image) if(image)
{ {
mg_gl_image* glImage = (mg_gl_image*)image; glActiveTexture(GL_TEXTURE1+i);
glActiveTexture(GL_TEXTURE1); glBindTexture(GL_TEXTURE_2D, image->texture);
glBindTexture(GL_TEXTURE_2D, glImage->texture); }
glUniform1ui(2, 1);
} }
else
{
glUniform1ui(2, 0);
} }
glUniform1i(3, backend->pathBatchStart); glUniform1i(2, backend->pathBatchStart);
glUniform1ui(4, maxWorkGroupCount); glUniform1ui(3, maxWorkGroupCount);
glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, backend->rasterDispatchBuffer); glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, backend->rasterDispatchBuffer);
glDispatchComputeIndirect(0); glDispatchComputeIndirect(0);
@ -1397,28 +1396,53 @@ void mg_gl_canvas_render(mg_canvas_backend* interface,
//NOTE: encode and render batches //NOTE: encode and render batches
vec2 currentPos = {0}; vec2 currentPos = {0};
mg_image currentImage = mg_image_nil(); mg_image images[MG_GL_MAX_IMAGES_PER_BATCH] = {0};
int imageCount = 0;
backend->eltCount = 0; backend->eltCount = 0;
for(int primitiveIndex = 0; primitiveIndex < primitiveCount; primitiveIndex++) for(int primitiveIndex = 0; primitiveIndex < primitiveCount; primitiveIndex++)
{ {
mg_primitive* primitive = &primitives[primitiveIndex]; mg_primitive* primitive = &primitives[primitiveIndex];
if(primitiveIndex && (primitive->attributes.image.h != currentImage.h)) if(primitive->attributes.image.h != 0)
{
backend->currentImageIndex = -1;
for(int i=0; i<imageCount; i++)
{
if(images[i].h == primitive->attributes.image.h)
{
backend->currentImageIndex = i;
}
}
if(backend->currentImageIndex <= 0)
{
if(imageCount<MG_GL_MAX_IMAGES_PER_BATCH)
{
images[imageCount] = primitive->attributes.image;
backend->currentImageIndex = imageCount;
imageCount++;
}
else
{ {
mg_image_data* imageData = mg_image_data_from_handle(currentImage);
mg_gl_render_batch(backend, mg_gl_render_batch(backend,
surface, surface,
imageData, images,
tileSize, tileSize,
nTilesX, nTilesX,
nTilesY, nTilesY,
viewportSize, viewportSize,
scale); scale);
images[0] = primitive->attributes.image;
backend->currentImageIndex = 0;
imageCount = 1;
}
}
}
else
{
backend->currentImageIndex = -1;
} }
currentImage = primitive->attributes.image;
if(primitive->path.count) if(primitive->path.count)
{ {
@ -1470,10 +1494,9 @@ void mg_gl_canvas_render(mg_canvas_backend* interface,
} }
} }
mg_image_data* imageData = mg_image_data_from_handle(currentImage);
mg_gl_render_batch(backend, mg_gl_render_batch(backend,
surface, surface,
imageData, images,
tileSize, tileSize,
nTilesX, nTilesX,
nTilesY, nTilesY,

View File

@ -34,6 +34,7 @@ struct mg_gl_path
vec4 box; vec4 box;
vec4 clip; vec4 clip;
int cmd; int cmd;
int textureID;
}; };
struct mg_gl_path_elt struct mg_gl_path_elt

View File

@ -43,8 +43,7 @@ layout(binding = 6) coherent restrict buffer screenTilesCountBufferSSBO
layout(location = 0) uniform int tileSize; layout(location = 0) uniform int tileSize;
layout(location = 1) uniform float scale; layout(location = 1) uniform float scale;
layout(location = 2) uniform int pathCount; layout(location = 2) uniform int pathCount;
layout(location = 3) uniform int cullSolidTiles; layout(location = 3) uniform int pathBufferStart;
layout(location = 4) uniform int pathBufferStart;
void main() void main()
{ {
@ -130,7 +129,7 @@ void main()
tileOpBuffer.elements[pathOpIndex].kind = MG_GL_OP_FILL; tileOpBuffer.elements[pathOpIndex].kind = MG_GL_OP_FILL;
if( pathBuffer.elements[pathBufferStart + pathIndex].color.a == 1 if( pathBuffer.elements[pathBufferStart + pathIndex].color.a == 1
&& cullSolidTiles != 0) && pathBuffer.elements[pathBufferStart + pathIndex].textureID < 0)
{ {
screenTilesBuffer.elements[tileIndex].first = pathOpIndex; screenTilesBuffer.elements[tileIndex].first = pathOpIndex;
} }

View File

@ -32,12 +32,19 @@ layout(binding = 4) restrict readonly buffer screenTilesCountBufferSSBO
layout(location = 0) uniform float scale; layout(location = 0) uniform float scale;
layout(location = 1) uniform int msaaSampleCount; layout(location = 1) uniform int msaaSampleCount;
layout(location = 2) uniform uint useTexture; layout(location = 2) uniform int pathBufferStart;
layout(location = 3) uniform int pathBufferStart; layout(location = 3) uniform uint maxWorkGroupCount;
layout(location = 4) uniform uint maxWorkGroupCount;
layout(rgba8, binding = 0) uniform restrict writeonly image2D outTexture; layout(rgba8, binding = 0) uniform restrict writeonly image2D outTexture;
layout(binding = 1) uniform sampler2D srcTexture;
layout(binding = 1) uniform sampler2D srcTexture0;
layout(binding = 2) uniform sampler2D srcTexture1;
layout(binding = 3) uniform sampler2D srcTexture2;
layout(binding = 4) uniform sampler2D srcTexture3;
layout(binding = 5) uniform sampler2D srcTexture4;
layout(binding = 6) uniform sampler2D srcTexture5;
layout(binding = 7) uniform sampler2D srcTexture6;
layout(binding = 8) uniform sampler2D srcTexture7;
void main() void main()
{ {
@ -146,15 +153,49 @@ void main()
vec4 nextColor = pathBuffer.elements[pathBufferStart + pathIndex].color; vec4 nextColor = pathBuffer.elements[pathBufferStart + pathIndex].color;
nextColor.rgb *= nextColor.a; nextColor.rgb *= nextColor.a;
if(useTexture != 0) int textureID = pathBuffer.elements[pathBufferStart+pathIndex].textureID;
if(textureID >= 0)
{ {
vec4 texColor = vec4(0); vec4 texColor = vec4(0);
for(int sampleIndex = 0; sampleIndex<srcSampleCount; sampleIndex++) for(int sampleIndex = 0; sampleIndex<srcSampleCount; sampleIndex++)
{ {
vec2 sampleCoord = imgSampleCoords[sampleIndex]; vec2 sampleCoord = imgSampleCoords[sampleIndex];
vec3 ph = vec3(sampleCoord.xy, 1); vec3 ph = vec3(sampleCoord.xy, 1);
vec2 uv = (pathBuffer.elements[pathBufferStart + pathIndex].uvTransform * ph).xy; vec2 uv = (pathBuffer.elements[pathBufferStart + pathIndex].uvTransform * ph).xy;
texColor += texture(srcTexture, uv);
if(textureID == 0)
{
texColor += texture(srcTexture0, uv);
}
else if(textureID == 1)
{
texColor += texture(srcTexture1, uv);
}
else if(textureID == 2)
{
texColor += texture(srcTexture2, uv);
}
else if(textureID == 3)
{
texColor += texture(srcTexture3, uv);
}
else if(textureID == 4)
{
texColor += texture(srcTexture4, uv);
}
else if(textureID == 5)
{
texColor += texture(srcTexture5, uv);
}
else if(textureID == 6)
{
texColor += texture(srcTexture6, uv);
}
else if(textureID == 7)
{
texColor += texture(srcTexture7, uv);
}
} }
texColor /= srcSampleCount; texColor /= srcSampleCount;
texColor.rgb *= texColor.a; texColor.rgb *= texColor.a;

View File

@ -78,14 +78,6 @@ typedef struct mg_image_data
} mg_image_data; } mg_image_data;
typedef void (*mg_canvas_backend_destroy_proc)(mg_canvas_backend* backend); typedef void (*mg_canvas_backend_destroy_proc)(mg_canvas_backend* backend);
typedef void (*mg_canvas_backend_begin_proc)(mg_canvas_backend* backend, mg_color clearColor);
typedef void (*mg_canvas_backend_end_proc)(mg_canvas_backend* backend);
typedef void (*mg_canvas_backend_draw_batch_proc)(mg_canvas_backend* backend,
mg_image_data* imageData,
u32 vertexCount,
u32 shapeCount,
u32 indexCount);
typedef mg_image_data* (*mg_canvas_backend_image_create_proc)(mg_canvas_backend* backend, vec2 size); typedef mg_image_data* (*mg_canvas_backend_image_create_proc)(mg_canvas_backend* backend, vec2 size);
typedef void (*mg_canvas_backend_image_destroy_proc)(mg_canvas_backend* backend, mg_image_data* image); typedef void (*mg_canvas_backend_image_destroy_proc)(mg_canvas_backend* backend, mg_image_data* image);
@ -104,9 +96,6 @@ typedef void (*mg_canvas_backend_render_proc)(mg_canvas_backend* backend,
typedef struct mg_canvas_backend typedef struct mg_canvas_backend
{ {
mg_canvas_backend_destroy_proc destroy; mg_canvas_backend_destroy_proc destroy;
mg_canvas_backend_begin_proc begin;
mg_canvas_backend_end_proc end;
mg_canvas_backend_draw_batch_proc drawBatch;
mg_canvas_backend_image_create_proc imageCreate; mg_canvas_backend_image_create_proc imageCreate;
mg_canvas_backend_image_destroy_proc imageDestroy; mg_canvas_backend_image_destroy_proc imageDestroy;

View File

@ -23,6 +23,7 @@ typedef struct mg_mtl_path
vector_float4 color; vector_float4 color;
vector_float4 box; vector_float4 box;
vector_float4 clip; vector_float4 clip;
int texture;
} mg_mtl_path; } mg_mtl_path;
typedef enum { typedef enum {
@ -103,4 +104,8 @@ typedef struct mg_mtl_screen_tile
} mg_mtl_screen_tile; } mg_mtl_screen_tile;
enum {
MG_MTL_MAX_IMAGES_PER_BATCH = 30
};
#endif //__MTL_RENDERER_H_ #endif //__MTL_RENDERER_H_

View File

@ -71,6 +71,8 @@ typedef struct mg_mtl_canvas_backend
int maxTileQueueCount; int maxTileQueueCount;
int maxSegmentCount; int maxSegmentCount;
int currentImageIndex;
} mg_mtl_canvas_backend; } mg_mtl_canvas_backend;
typedef struct mg_mtl_image_data typedef struct mg_mtl_image_data
@ -249,7 +251,9 @@ void mg_mtl_encode_path(mg_mtl_canvas_backend* backend, mg_primitive* primitive,
path->uvTransform = simd_matrix(simd_make_float3(uvTransform.m[0]/scale, uvTransform.m[3]/scale, 0), path->uvTransform = simd_matrix(simd_make_float3(uvTransform.m[0]/scale, uvTransform.m[3]/scale, 0),
simd_make_float3(uvTransform.m[1]/scale, uvTransform.m[4]/scale, 0), simd_make_float3(uvTransform.m[1]/scale, uvTransform.m[4]/scale, 0),
simd_make_float3(uvTransform.m[2], uvTransform.m[5], 1)); simd_make_float3(uvTransform.m[2], uvTransform.m[5], 1));
} }
path->texture = backend->currentImageIndex;
int nTilesX = ((path->box.z - path->box.x)*scale - 1) / MG_MTL_TILE_SIZE + 1; int nTilesX = ((path->box.z - path->box.x)*scale - 1) / MG_MTL_TILE_SIZE + 1;
int nTilesY = ((path->box.w - path->box.y)*scale - 1) / MG_MTL_TILE_SIZE + 1; int nTilesY = ((path->box.w - path->box.y)*scale - 1) / MG_MTL_TILE_SIZE + 1;
@ -932,7 +936,7 @@ void mg_mtl_grow_buffer_if_needed(mg_mtl_canvas_backend* backend, id<MTLBuffer>*
void mg_mtl_render_batch(mg_mtl_canvas_backend* backend, void mg_mtl_render_batch(mg_mtl_canvas_backend* backend,
mg_mtl_surface* surface, mg_mtl_surface* surface,
mg_image_data* image, mg_image* images,
int tileSize, int tileSize,
int nTilesX, int nTilesX,
int nTilesY, int nTilesY,
@ -1084,14 +1088,17 @@ void mg_mtl_render_batch(mg_mtl_canvas_backend* backend,
[rasterEncoder setTexture:backend->outTexture atIndex:0]; [rasterEncoder setTexture:backend->outTexture atIndex:0];
int useTexture = 0; for(int i=0; i<MG_MTL_MAX_IMAGES_PER_BATCH; i++)
{
if(images[i].h)
{
mg_mtl_image_data* image = (mg_mtl_image_data*)mg_image_data_from_handle(images[i]);
if(image) if(image)
{ {
mg_mtl_image_data* mtlImage = (mg_mtl_image_data*)image; [rasterEncoder setTexture: image->texture atIndex: 1+i];
[rasterEncoder setTexture: mtlImage->texture atIndex: 1]; }
useTexture = 1; }
} }
[rasterEncoder setBytes: &useTexture length:sizeof(int) atIndex: 9];
MTLSize rasterGridSize = MTLSizeMake(viewportSize.x, viewportSize.y, 1); MTLSize rasterGridSize = MTLSizeMake(viewportSize.x, viewportSize.y, 1);
MTLSize rasterGroupSize = MTLSizeMake(MG_MTL_TILE_SIZE, MG_MTL_TILE_SIZE, 1); MTLSize rasterGroupSize = MTLSizeMake(MG_MTL_TILE_SIZE, MG_MTL_TILE_SIZE, 1);
@ -1226,27 +1233,52 @@ void mg_mtl_canvas_render(mg_canvas_backend* interface,
//NOTE: encode and render batches //NOTE: encode and render batches
vec2 currentPos = {0}; vec2 currentPos = {0};
mg_image images[MG_MTL_MAX_IMAGES_PER_BATCH] = {0};
mg_image currentImage = mg_image_nil(); int imageCount = 0;
for(int primitiveIndex = 0; primitiveIndex < primitiveCount; primitiveIndex++) for(int primitiveIndex = 0; primitiveIndex < primitiveCount; primitiveIndex++)
{ {
mg_primitive* primitive = &primitives[primitiveIndex]; mg_primitive* primitive = &primitives[primitiveIndex];
if(primitiveIndex && (primitive->attributes.image.h != currentImage.h)) if(primitive->attributes.image.h != 0)
{
backend->currentImageIndex = -1;
for(int i=0; i<imageCount; i++)
{
if(images[i].h == primitive->attributes.image.h)
{
backend->currentImageIndex = i;
}
}
if(backend->currentImageIndex <= 0)
{
if(imageCount<MG_MTL_MAX_IMAGES_PER_BATCH)
{
images[imageCount] = primitive->attributes.image;
backend->currentImageIndex = imageCount;
imageCount++;
}
else
{ {
mg_image_data* imageData = mg_image_data_from_handle(currentImage);
mg_mtl_render_batch(backend, mg_mtl_render_batch(backend,
surface, surface,
imageData, images,
tileSize, tileSize,
nTilesX, nTilesX,
nTilesY, nTilesY,
viewportSize, viewportSize,
scale); scale);
images[0] = primitive->attributes.image;
backend->currentImageIndex = 0;
imageCount = 1;
}
}
}
else
{
backend->currentImageIndex = -1;
} }
currentImage = primitive->attributes.image;
if(primitive->path.count) if(primitive->path.count)
{ {
@ -1296,11 +1328,9 @@ void mg_mtl_canvas_render(mg_canvas_backend* interface,
} }
} }
mg_image_data* imageData = mg_image_data_from_handle(currentImage);
mg_mtl_render_batch(backend, mg_mtl_render_batch(backend,
surface, surface,
imageData, images,
tileSize, tileSize,
nTilesX, nTilesX,
nTilesY, nTilesY,

View File

@ -1450,7 +1450,7 @@ kernel void mtl_merge(constant int* pathCount [[buffer(0)]],
{ {
pathOp->kind = MG_MTL_OP_FILL; pathOp->kind = MG_MTL_OP_FILL;
if(pathBuffer[pathIndex].color.a == 1) if(pathBuffer[pathIndex].color.a == 1 && pathBuffer[pathIndex].texture < 0)
{ {
screenTilesBuffer[tileIndex].first = pathOpIndex; screenTilesBuffer[tileIndex].first = pathOpIndex;
} }
@ -1512,9 +1512,8 @@ kernel void mtl_raster(const device mg_mtl_screen_tile* screenTilesBuffer [[buff
constant int* sampleCountBuffer [[buffer(6)]], constant int* sampleCountBuffer [[buffer(6)]],
device char* logBuffer [[buffer(7)]], device char* logBuffer [[buffer(7)]],
device atomic_int* logOffsetBuffer [[buffer(8)]], device atomic_int* logOffsetBuffer [[buffer(8)]],
constant int* useTexture [[buffer(9)]],
texture2d<float, access::write> outTexture [[texture(0)]], texture2d<float, access::write> outTexture [[texture(0)]],
texture2d<float> srcTexture [[texture(1)]], array<texture2d<float>, MG_MTL_MAX_IMAGES_PER_BATCH> srcTextures [[texture(1)]],
uint2 threadGroupCoord [[threadgroup_position_in_grid]], uint2 threadGroupCoord [[threadgroup_position_in_grid]],
uint2 localCoord [[thread_position_in_threadgroup]]) uint2 localCoord [[thread_position_in_threadgroup]])
{ {
@ -1611,7 +1610,8 @@ kernel void mtl_raster(const device mg_mtl_screen_tile* screenTilesBuffer [[buff
float4 nextColor = pathBuffer[pathIndex].color; float4 nextColor = pathBuffer[pathIndex].color;
nextColor.rgb *= nextColor.a; nextColor.rgb *= nextColor.a;
if(useTexture[0]) int textureIndex = pathBuffer[pathIndex].texture;
if(textureIndex >= 0 && textureIndex < MG_MTL_MAX_IMAGES_PER_BATCH)
{ {
constexpr sampler smp(mip_filter::nearest, mag_filter::linear, min_filter::linear); constexpr sampler smp(mip_filter::nearest, mag_filter::linear, min_filter::linear);
@ -1622,7 +1622,7 @@ kernel void mtl_raster(const device mg_mtl_screen_tile* screenTilesBuffer [[buff
float3 ph = float3(sampleCoord.xy, 1); float3 ph = float3(sampleCoord.xy, 1);
float2 uv = (pathBuffer[pathIndex].uvTransform * ph).xy; float2 uv = (pathBuffer[pathIndex].uvTransform * ph).xy;
texColor += srcTexture.sample(smp, uv); texColor += srcTextures[textureIndex].sample(smp, uv);
} }
texColor /= srcSampleCount; texColor /= srcSampleCount;
texColor.rgb *= texColor.a; texColor.rgb *= texColor.a;