From d1fab449bcbebf6d41ccff1fb4110d18ec6304de Mon Sep 17 00:00:00 2001 From: Martin Fouilleul Date: Fri, 7 Apr 2023 10:15:37 +0200 Subject: [PATCH] [mtl canvas] Fixed loop implicit matrix --- build.sh | 2 +- examples/polygon/main.c | 1 - src/mtl_renderer.h | 2 + src/mtl_renderer.m | 68 ++++- src/mtl_renderer.metal | 569 ++++++++++++++++++++++++++++++++-------- 5 files changed, 519 insertions(+), 123 deletions(-) diff --git a/build.sh b/build.sh index 3ac7a17..5fcf322 100755 --- a/build.sh +++ b/build.sh @@ -55,7 +55,7 @@ fi if [ $target = 'lib' ] ; then # compile metal shader - xcrun -sdk macosx metal $shaderFlagParam -c -o $BINDIR/mtl_renderer.air $SRCDIR/mtl_renderer.metal + xcrun -sdk macosx metal $shaderFlagParam -fno-fast-math -c -o $BINDIR/mtl_renderer.air $SRCDIR/mtl_renderer.metal xcrun -sdk macosx metallib -o $RESDIR/mtl_renderer.metallib $BINDIR/mtl_renderer.air # compile milepost. We use one compilation unit for all C code, and one compilation diff --git a/examples/polygon/main.c b/examples/polygon/main.c index 9aebb5f..74dfc29 100644 --- a/examples/polygon/main.c +++ b/examples/polygon/main.c @@ -151,7 +151,6 @@ int main() mg_set_color_rgba(0, 0, 1, 1); mg_stroke(); - /* mg_move_to(x+8, y+8); mg_line_to(x+33, y+8); diff --git a/src/mtl_renderer.h b/src/mtl_renderer.h index ae9abe3..fd969a3 100644 --- a/src/mtl_renderer.h +++ b/src/mtl_renderer.h @@ -33,6 +33,7 @@ typedef enum { typedef struct mg_mtl_path_elt { int pathIndex; + int localEltIndex; mg_mtl_seg_kind kind; vector_float2 p[4]; } mg_mtl_path_elt; @@ -51,6 +52,7 @@ typedef struct mg_mtl_segment mg_mtl_seg_config config; //TODO pack these int windingIncrement; vector_float4 box; + matrix_float3x3 hullMatrix; matrix_float3x3 implicitMatrix; int debugID; diff --git a/src/mtl_renderer.m b/src/mtl_renderer.m index 3c86418..858cdec 100644 --- a/src/mtl_renderer.m +++ b/src/mtl_renderer.m @@ -88,8 +88,9 @@ typedef struct mg_mtl_encoding_context int mtlEltCount; mg_mtl_path_elt* elementBufferData; int pathIndex; + int localEltIndex; mg_primitive* primitive; - vec4 pathExtents; + vec4 pathScreenExtents; } mg_mtl_encoding_context; @@ -121,10 +122,12 @@ void mg_mtl_canvas_encode_element(mg_mtl_encoding_context* context, mg_path_elt_ break; } + mtlElt->localEltIndex = context->localEltIndex; + for(int i=0; ipathExtents, p[i]); vec2 screenP = mg_mat2x3_mul(context->primitive->attributes.transform, p[i]); + mg_update_path_extents(&context->pathScreenExtents, screenP); mtlElt->p[i] = (vector_float2){screenP.x, screenP.y}; } } @@ -155,6 +158,19 @@ void mg_mtl_render_stroke_quadratic(mg_mtl_encoding_context* context, vec2* p) f32 width = context->primitive->attributes.width; f32 tolerance = minimum(context->primitive->attributes.tolerance, 0.5 * width); + //NOTE: check for degenerate line case + const f32 equalEps = 1e-3; + if(vec2_close(p[0], p[1], equalEps)) + { + mg_mtl_render_stroke_line(context, p+1); + return; + } + else if(vec2_close(p[1], p[2], equalEps)) + { + mg_mtl_render_stroke_line(context, p); + return; + } + vec2 leftHull[3]; vec2 rightHull[3]; @@ -233,6 +249,30 @@ void mg_mtl_render_stroke_cubic(mg_mtl_encoding_context* context, vec2* p) f32 width = context->primitive->attributes.width; f32 tolerance = minimum(context->primitive->attributes.tolerance, 0.5 * width); + //NOTE: check degenerate line cases + f32 equalEps = 1e-3; + + if( (vec2_close(p[0], p[1], equalEps) && vec2_close(p[2], p[3], equalEps)) + ||(vec2_close(p[0], p[1], equalEps) && vec2_close(p[1], p[2], equalEps)) + ||(vec2_close(p[1], p[2], equalEps) && vec2_close(p[2], p[3], equalEps))) + { + vec2 line[2] = {p[0], p[3]}; + mg_mtl_render_stroke_line(context, line); + return; + } + else if(vec2_close(p[0], p[1], equalEps) && vec2_close(p[1], p[3], equalEps)) + { + vec2 line[2] = {p[0], vec2_add(vec2_mul(5./9, p[0]), vec2_mul(4./9, p[2]))}; + mg_mtl_render_stroke_line(context, line); + return; + } + else if(vec2_close(p[0], p[2], equalEps) && vec2_close(p[2], p[3], equalEps)) + { + vec2 line[2] = {p[0], vec2_add(vec2_mul(5./9, p[0]), vec2_mul(4./9, p[1]))}; + mg_mtl_render_stroke_line(context, line); + return; + } + vec2 leftHull[4]; vec2 rightHull[4]; @@ -373,10 +413,6 @@ void mg_mtl_stroke_cap(mg_mtl_encoding_context* context, vec2 p0, vec2 direction) { - ////////////////////////////////////////////////////////// - //TODO: fix orientation here! - ////////////////////////////////////////////////////////// - mg_attributes* attributes = &context->primitive->attributes; //NOTE(martin): compute the tangent and normal vectors (multiplied by half width) at the cap point @@ -525,7 +561,6 @@ u32 mg_mtl_render_stroke_subpath(mg_mtl_encoding_context* context, { //NOTE(martin): add a closing joint if the path is closed mg_mtl_stroke_joint(context, endPoint, endTangent, firstTangent); - printf("closing joint for shape %i\n", context->pathIndex); } } else if(attributes->cap == MG_CAP_SQUARE) @@ -592,25 +627,30 @@ void mg_mtl_canvas_render(mg_canvas_backend* interface, if(primitive->path.count) { context.primitive = primitive; - context.pathIndex = primitiveIndex; - context.pathExtents = (vec4){FLT_MAX, FLT_MAX, -FLT_MAX, -FLT_MAX}; + context.pathIndex = pathCount; + context.pathScreenExtents = (vec4){FLT_MAX, FLT_MAX, -FLT_MAX, -FLT_MAX}; if(primitive->cmd == MG_CMD_STROKE) { - mg_mtl_render_stroke(&context, pathElements + primitive->path.startIndex, &primitive->path); + continue; + // mg_mtl_render_stroke(&context, pathElements + primitive->path.startIndex, &primitive->path); } else { + int segCount = 0; for(int eltIndex = 0; (eltIndex < primitive->path.count) && (primitive->path.startIndex + eltIndex < eltCount); eltIndex++) { + context.localEltIndex = segCount; + mg_path_elt* elt = &pathElements[primitive->path.startIndex + eltIndex]; if(elt->type != MG_PATH_MOVE) { vec2 p[4] = {currentPos, elt->p[0], elt->p[1], elt->p[2]}; mg_mtl_canvas_encode_element(&context, elt->type, p); + segCount++; } switch(elt->type) { @@ -637,10 +677,10 @@ void mg_mtl_canvas_render(mg_canvas_backend* interface, pathCount++; path->cmd = (mg_mtl_cmd)primitive->cmd; - path->box = (vector_float4){maximum(primitive->attributes.clip.x, context.pathExtents.x), - maximum(primitive->attributes.clip.y, context.pathExtents.y), - minimum(primitive->attributes.clip.x + primitive->attributes.clip.w, context.pathExtents.z), - minimum(primitive->attributes.clip.y + primitive->attributes.clip.h, context.pathExtents.w)}; + path->box = (vector_float4){maximum(primitive->attributes.clip.x, context.pathScreenExtents.x), + maximum(primitive->attributes.clip.y, context.pathScreenExtents.y), + minimum(primitive->attributes.clip.x + primitive->attributes.clip.w, context.pathScreenExtents.z), + minimum(primitive->attributes.clip.y + primitive->attributes.clip.h, context.pathScreenExtents.w)}; path->color = (vector_float4){primitive->attributes.color.r, primitive->attributes.color.g, diff --git a/src/mtl_renderer.metal b/src/mtl_renderer.metal index d5eff37..3632da0 100644 --- a/src/mtl_renderer.metal +++ b/src/mtl_renderer.metal @@ -65,8 +65,10 @@ void mtl_log(mtl_log_context context, const thread char* msg) } } -int mtl_itoa(int bufSize, thread char* buffer, thread char** start, int64_t value) +int mtl_itoa_right_aligned(int bufSize, thread char* buffer, int64_t value, bool zeroPad) { + // convert value to a null-terminated string at end of buffer and returns the size + // (excluding the final null). bool minus = false; if(value < 0) { @@ -75,12 +77,23 @@ int mtl_itoa(int bufSize, thread char* buffer, thread char** start, int64_t valu } buffer[bufSize-1] = '\0'; int index = bufSize-2; + int stop = minus ? 1 : 0; + do { buffer[index] = '0' + (value % 10); index--; value /= 10; - } while(value != 0 && index >= 1); + } while(value != 0 && index >= stop); + + if(zeroPad) + { + while(index >= stop) + { + buffer[index] = '0'; + index--; + } + } if(minus) { @@ -88,62 +101,110 @@ int mtl_itoa(int bufSize, thread char* buffer, thread char** start, int64_t valu index--; } - *start = buffer+index+1; - return(bufSize - (index+1) - 1); + int count = bufSize - (index+1); + return(count - 1); } +int mtl_itoa(int bufSize, thread char* buffer, int64_t value) +{ + int count = mtl_itoa_right_aligned(bufSize, buffer, value, false); + int start = bufSize - (count+1); + + for(int i=0; i 999999) - { - decimal /= 10; - } - if(decimal < 0) - { - decimal *= -1; - } + int64_t integral = (int64_t)value; + int64_t decimal = (int64_t)((value - (float)integral)*1e6); const int bufSize = 64; char buffer[bufSize]; - thread char* start = 0; - int integralSize = mtl_itoa(bufSize, buffer, &start, integral); - - for(int i=0; i 0) { - buffer[integralSize+i] = start[i]; + decimal /= 10; + width--; } + + int decSize = min(bufSize-index, width+1); + mtl_itoa_right_aligned(decSize, buffer+index, decimal, true); } - buffer[integralSize+decimalSize] = '\0'; + buffer[bufSize-1] = '\0'; mtl_log(context, buffer); } +void mtl_log_point(mtl_log_context context, float2 p) +{ + mtl_log(context, "("); + mtl_log_f32(context, p.x); + mtl_log(context, ", "); + mtl_log_f32(context, p.y); + mtl_log(context, ")"); +} + +void log_line(thread float2* p, mtl_log_context logCtx) +{ + mtl_log(logCtx, "("); + mtl_log_f32(logCtx, p[0].x); + mtl_log(logCtx, ", "); + mtl_log_f32(logCtx, p[0].y); + mtl_log(logCtx, ") ("); + mtl_log_f32(logCtx, p[1].x); + mtl_log(logCtx, ", "); + mtl_log_f32(logCtx, p[1].y); + mtl_log(logCtx, ")\n"); +} + +void log_quadratic_bezier(thread float2* p, mtl_log_context logCtx) +{ + mtl_log(logCtx, "("); + mtl_log_f32(logCtx, p[0].x); + mtl_log(logCtx, ", "); + mtl_log_f32(logCtx, p[0].y); + mtl_log(logCtx, ") ("); + mtl_log_f32(logCtx, p[1].x); + mtl_log(logCtx, ", "); + mtl_log_f32(logCtx, p[1].y); + mtl_log(logCtx, ") ("); + mtl_log_f32(logCtx, p[2].x); + mtl_log(logCtx, ", "); + mtl_log_f32(logCtx, p[2].y); + mtl_log(logCtx, ")\n"); +} + + void log_cubic_bezier(thread float2* p, mtl_log_context logCtx) { mtl_log(logCtx, "("); @@ -258,8 +319,16 @@ int mtl_side_of_segment(float2 p, const device mg_mtl_segment* seg, mtl_log_cont case MG_MTL_CUBIC: { float3 ph = {p.x, p.y, 1}; - float3 klm = seg->implicitMatrix * ph; - side = (klm.x*klm.x*klm.x - klm.y*klm.z < 0)? -1 : 1; + float3 hullCoords = seg->hullMatrix * ph; + if(all(hullCoords > 0)) + { + float3 klm = seg->implicitMatrix * ph; + side = (klm.x*klm.x*klm.x - klm.y*klm.z < 0)? -1 : 1; + } + else + { + side = (seg->config == MG_MTL_BL || seg->config == MG_MTL_TL) ? -1 : 1; + } } break; } } @@ -391,17 +460,20 @@ device mg_mtl_segment* mtl_segment_push(thread mtl_segment_setup_context* contex break; case MG_MTL_CUBIC: + { s = p[0]; - if(any(p[1] != p[0])) - { - c = p[1]; - } - else + float sqrNorm0 = length_squared(p[1]-p[0]); + float sqrNorm1 = length_squared(p[3]-p[2]); + if(sqrNorm0 < sqrNorm1) { c = p[2]; } + else + { + c = p[1]; + } e = p[3]; - break; + } break; } int segIndex = atomic_fetch_add_explicit(context->segmentCount, 1, memory_order_relaxed); @@ -513,6 +585,16 @@ int mtl_quadratic_monotonize(float2 p[3], float splits[4]) return(count); } +matrix_float3x3 mtl_barycentric_matrix(float2 v0, float2 v1, float2 v2) +{ + float det = v0.x*(v1.y-v2.y) + v1.x*(v2.y-v0.y) + v2.x*(v0.y - v1.y); + matrix_float3x3 B = {{v1.y - v2.y, v2.y-v0.y, v0.y-v1.y}, + {v2.x - v1.x, v0.x-v2.x, v1.x-v0.x}, + {v1.x*v2.y-v2.x*v1.y, v2.x*v0.y-v0.x*v2.y, v0.x*v1.y-v1.x*v0.y}}; + B *= (1/det); + return(B); +} + void mtl_quadratic_emit(thread mtl_segment_setup_context* context, thread float2* p) { @@ -552,25 +634,86 @@ void mtl_quadratic_setup(thread mtl_segment_setup_context* context, thread float } } -int mtl_quadratic_roots(float a, float b, float c, thread float* r) +/* + diff_of_products() computes a*b-c*d with a maximum error <= 1.5 ulp + + Claude-Pierre Jeannerod, Nicolas Louvet, and Jean-Michel Muller, + "Further Analysis of Kahan's Algorithm for the Accurate Computation + of 2x2 Determinants". Mathematics of Computation, Vol. 82, No. 284, + Oct. 2013, pp. 2245-2264 +*/ +float diff_of_products (float a, float b, float c, float d) +{ + float w = d * c; + float e = fma(-d, c, w); + float f = fma(a, b, -w); + return(f + e); +} + +int mtl_quadratic_roots_with_det(float a, float b, float c, float det, thread float* r, mtl_log_context log = {.enabled = false}) { - //TODO: replace by something more numerically stable int count = 0; - float det = square(b) - 4*a*c; - if(det > 0) + + if(a == 0) { - count = 2; - r[0] = (-b - sqrt(det))/(2*a); - r[1] = (-b + sqrt(det))/(2*a); + if(b) + { + count = 1; + r[0] = -c/b; + } } - else if(det == 0) + else { - count = 1; - r[0] = -b/(2*a); + b /= 2.0; + + if(det >= 0) + { + count = (det == 0) ? 1 : 2; + + if(b > 0) + { + float q = b + sqrt(det); + r[0] = -c/q; + r[1] = -q/a; + } + else if(b < 0) + { + float q = -b + sqrt(det); + r[0] = q/a; + r[1] = c/q; + } + else + { + float q = sqrt(-a*c); + if(fabs(a) >= fabs(c)) + { + r[0] = q/a; + r[1] = -q/a; + } + else + { + r[0] = -c/q; + r[1] = c/q; + } + } + } + } + if(count>1 && r[0] > r[1]) + { + float tmp = r[0]; + r[0] = r[1]; + r[1] = tmp; } return(count); } +int mtl_quadratic_roots(float a, float b, float c, thread float* r, mtl_log_context log = {.enabled = false}) +{ + //float det = diff_of_products(b, b, a, c); + float det = square(b)/4. - a*c; + return(mtl_quadratic_roots_with_det(a, b, c, det, r, log)); +} + void mtl_cubic_slice(float2 p[4], float s0, float s1, float2 sp[4]) { float sr = (s1 - s0)/(1-s0); @@ -597,7 +740,7 @@ void mtl_cubic_slice(float2 p[4], float s0, float s1, float2 sp[4]) sp[3] = float2(qx.w, qy.w); } -int mtl_cubic_monotonize(float2 p[4], float splits[8]) +int mtl_cubic_monotonize(float2 p[4], float splits[8], mtl_log_context log) { //NOTE(martin): first convert the control points to power basis float2 c[4]; @@ -612,10 +755,31 @@ int mtl_cubic_monotonize(float2 p[4], float splits[8]) rootCount += mtl_quadratic_roots(3*c[3].y, 2*c[2].y, c[1].y, roots+rootCount); //NOTE: compute inflection points - rootCount += mtl_quadratic_roots(6*(c[2].x*c[3].y-c[3].x*c[2].y), - 6*(c[1].x*c[3].y-c[1].y*c[3].x), - 2*(c[1].x*c[2].y-c[1].y*c[2].x), - roots + rootCount); + rootCount += mtl_quadratic_roots(3*(c[2].x*c[3].y - c[3].x*c[2].y), + 3*(c[1].x*c[3].y - c[1].y*c[3].x), + (c[1].x*c[2].y - c[1].y*c[2].x), + roots + rootCount, + log); +/* + mtl_log(log, "bezier basis: "); + log_cubic_bezier(p, log); + + mtl_log(log, "power basis: "); + log_cubic_bezier(c, log); + + mtl_log(log, "inflection equation: "); + mtl_log_f32(log, 3*(c[2].x*c[3].y-c[3].x*c[2].y)); + mtl_log(log, ", "); + mtl_log_f32(log, 3*(c[1].x*c[3].y-c[1].y*c[3].x)); + mtl_log(log, ", "); + mtl_log_f32(log, (c[1].x*c[2].y-c[1].y*c[2].x)); + mtl_log(log, "\n"); + + + mtl_log(log, "inflection split count: "); + mtl_log_i32(log, rootCount-tmp); + mtl_log(log, "\n"); +*/ //NOTE: sort roots for(int i=1; i 0 && roots[i] < 1) { splits[splitCount] = roots[i]; @@ -687,9 +856,9 @@ mtl_cubic_info mtl_cubic_classify(thread float2* p, mtl_log_context log = {.enab c2 = 3*p0 - 6*p1 + 3*p2 c3 = -p0 + 3*p1 - 3*p2 + p3 */ - float2 c1 = 3.0*p[1] - 3.0*p[0]; - float2 c2 = 3.0*p[0] + 3.0*p[2] - 6.0*p[1]; - float2 c3 = 3.0*p[1] - 3.0*p[2] + p[3] - p[0]; + float2 c1 = 3.0*(p[1] - p[0]); + float2 c2 = 3.0*(p[0] + p[2] - 2*p[1]); + float2 c3 = 3.0*(p[1] - p[2]) + p[3] - p[0]; /*NOTE(martin): now, compute determinants d0, d1, d2, d3, which gives the coefficients of the @@ -706,32 +875,65 @@ mtl_cubic_info mtl_cubic_classify(thread float2* p, mtl_log_context log = {.enab In our case, the pi.w equal 1 (no point at infinity), so _in_the_power_basis_, w1 = w2 = w3 = 0 and w0 = 1 (which also means d0 = 0) - */ - float d1 = c3.y*c2.x - c3.x*c2.y; - float d2 = c3.x*c1.y - c3.y*c1.x; - float d3 = c2.y*c1.x - c2.x*c1.y; + //WARN: there seems to be a mismatch between the signs of the d_i and the orientation test in the Loop-Blinn paper? + // flipping the sign of the d_i doesn't change the roots (and the implicit matrix), but it does change the orientation. + // Keeping the signs of the paper puts the interior on the left of parametric travel, unlike what's stated in the paper. + // this may very well be an error on my part that's cancelled by flipping the signs of the d_i though! + */ + +/* + mtl_log(log, "bezier basis: "); + log_cubic_bezier(p, log); + + float2 c[4] = {p[0], c1, c2, c3}; + mtl_log(log, "power basis: "); + log_cubic_bezier(c, log); +*/ + float d1 = -(c3.y*c2.x - c3.x*c2.y); + float d2 = -(c3.x*c1.y - c3.y*c1.x); + float d3 = -(c2.y*c1.x - c2.x*c1.y); + +// mtl_log(log, "d1 = "); +/* mtl_log_f32(log, d1); + mtl_log(log, ", d2 = "); + mtl_log_f32(log, d2); + mtl_log(log, ", d3 = "); + mtl_log_f32(log, d3); + mtl_log(log, "\n"); +*/ //NOTE(martin): compute the second factor of the discriminant discr(I) = d1^2*(3*d2^2 - 4*d3*d1) float discrFactor2 = 3.0*square(d2) - 4.0*d3*d1; //NOTE(martin): each following case gives the number of roots, hence the category of the parametric curve - if(fabs(d1) < 0.1 && fabs(d2) < 0.1 && d3 != 0) + if(fabs(d1) < 1e-6 && fabs(d2) < 1e-6 && fabs(d3) > 1e-6) { //NOTE(martin): quadratic degenerate case //NOTE(martin): compute quadratic curve control point, which is at p0 + 1.5*(p1-p0) = 1.5*p1 - 0.5*p0 result.kind = MTL_CUBIC_DEGENERATE_QUADRATIC; result.quadPoint = float2(1.5*p[1].x - 0.5*p[0].x, 1.5*p[1].y - 0.5*p[0].y); } - else if( (discrFactor2 > 0 && d1 != 0) - ||(discrFactor2 == 0 && d1 != 0)) + else if( (discrFactor2 > 0 && fabs(d1) > 1e-6) + ||(discrFactor2 == 0 && fabs(d1) > 1e-6)) { //NOTE(martin): serpentine curve or cusp with inflection at infinity // (these two cases are handled the same way). //NOTE(martin): compute the solutions (tl, sl), (tm, sm), and (tn, sn) of the inflection point equation - float tl = d2 + sqrt(discrFactor2/3); + float tmtl[2]; + mtl_quadratic_roots_with_det(1, -2*d2, (4./3.*d1*d3), (1./3.)*discrFactor2, tmtl); + + float tm = tmtl[0]; + float sm = 2*d1; + float tl = tmtl[1]; float sl = 2*d1; - float tm = d2 - sqrt(discrFactor2/3); - float sm = sl; + + float invNorm = 1/sqrt(square(tm) + square(sm)); + tm *= invNorm; + sm *= invNorm; + + invNorm = 1/sqrt(square(tl) + square(sl)); + tl *= invNorm; + sl *= invNorm; /*NOTE(martin): the power basis coefficients of points k,l,m,n are collected into the rows of the 4x4 matrix F: @@ -749,31 +951,61 @@ mtl_cubic_info mtl_cubic_classify(thread float2* p, mtl_log_context log = {.enab {1, 0, 0, 0}}; //NOTE: if necessary, flip sign of k and l to ensure the interior is west from the curve - float flip = (d1 < 0)^(p[3].y < p[0].y) ? -1 : 1; + float flip = (d1 < 0)? -1 : 1; + + if(p[3].y > p[0].y) + { + flip *= -1; + } + F[0] *= flip; F[1] *= flip; } - else if(discrFactor2 < 0 && d1 != 0) + else if(discrFactor2 < 0 && fabs(d1) > 1e-6) { //NOTE(martin): loop curve - float td = d2 + sqrt(-discrFactor2); + float tetd[2]; + mtl_quadratic_roots_with_det(1, -2*d2, 4*(square(d2)-d1*d3), -discrFactor2, tetd, log); + + float td = tetd[1]; float sd = 2*d1; - float te = d2 - sqrt(-discrFactor2); - float se = sd; + float te = tetd[0]; + float se = 2*d1; + + float invNorm = 1/sqrt(square(td) + square(sd)); + td *= invNorm; + sd *= invNorm; + + invNorm = 1/sqrt(square(te) + square(se)); + te *= invNorm; + se *= invNorm; //NOTE(martin): if one of the parameters (td/sd) or (te/se) is in the interval [0,1], the double point // is inside the control points convex hull and would cause a shading anomaly. If this is // the case, subdivide the curve at that point - - //TODO: study edge case where td/sd ~ 1 or 0 (which causes an infinite recursion in split and fill). - // quick fix for now is adding a little slop in the check... - +//* + mtl_log(log, "td = "); + mtl_log_f32(log, td); + mtl_log(log, ", sd = "); + mtl_log_f32(log, sd); + mtl_log(log, ", te = "); + mtl_log_f32(log, te); + mtl_log(log, ", se = "); + mtl_log_f32(log, td); + mtl_log(log, ", td/sd = "); + mtl_log_f32(log, td/sd); + mtl_log(log, ", te/se = "); + mtl_log_f32(log, te/se); + mtl_log(log, "\n"); +//*/ + //TODO: investigate better margins here. The problem is that if we have a double point around 0 or 1, + // splitting the curve might also produce a root in [0, 1] due to numerical errors. if(sd != 0 && td/sd < 0.99 && td/sd > 0.01) { result.kind = MTL_CUBIC_LOOP_SPLIT; result.split = td/sd; } - if(se != 0 && te/se < 0.99 && te/se > 0.01) + else if(se != 0 && te/se < 0.99 && te/se > 0.01) { result.kind = MTL_CUBIC_LOOP_SPLIT; result.split = te/se; @@ -796,21 +1028,43 @@ mtl_cubic_info mtl_cubic_classify(thread float2* p, mtl_log_context log = {.enab {1, 0, 0, 0}}; //NOTE: if necessary, flip sign of k and l to ensure the interior is west from the curve - float H0 = 36*(d3*d1-square(d2)); - float H1 = 36*(d3*d1-square(d2) + d1*d2 - square(d1)); + float H0 = d3*d1-square(d2); + float H1 = d3*d1-square(d2) + d1*d2 - square(d1); float H = (abs(H0) > abs(H1)) ? H0 : H1; - float flip = (H*d1 > 0)^(p[3].y < p[0].y) ? -1 : 1; + float flip = (H*d1 > 0) ? -1 : 1; +/* + mtl_log(log, "H0 = "); + mtl_log_f32(log, H0); + mtl_log(log, ", H1 = "); + mtl_log_f32(log, H1); + mtl_log(log, ", flip = "); + mtl_log_f32(log, flip); + mtl_log(log, "\n"); +*/ + if(p[3].y > p[0].y) + { +/* mtl_log(log, "fixed flip = "); + mtl_log_f32(log, flip); + mtl_log(log, "\n"); +*/ + flip *= -1; + } + F[0] *= flip; F[1] *= flip; } } - else if(d1 == 0 && d2 != 0) + else if(d2 != 0) { //NOTE(martin): cusp with cusp at infinity float tl = d3; float sl = 3*d2; + float invNorm = 1/sqrt(square(tl)+square(sl)); + tl *= invNorm; + sl *= invNorm; + /*NOTE(martin): the power basis coefficients of points k,l,m,n are collected into the rows of the 4x4 matrix F: @@ -831,16 +1085,11 @@ mtl_cubic_info mtl_cubic_classify(thread float2* p, mtl_log_context log = {.enab F[0] *= flip; F[1] *= flip; } - else if(d1 == 0 && d2 == 0 && d3 == 0) + else { //NOTE(martin): line or point degenerate case result.kind = MTL_CUBIC_DEGENERATE_LINE; } - else - { - //TODO(martin): handle error ? put some epsilon slack on the conditions ? - result.kind = MTL_CUBIC_ERROR; - } /* F is then multiplied by M3^(-1) on the left which yelds the bezier coefficients k, l, m, n @@ -861,6 +1110,44 @@ mtl_cubic_info mtl_cubic_classify(thread float2* p, mtl_log_context log = {.enab return(result); } +matrix_float3x3 mtl_hull_matrix(float2 p0, float2 p1, float2 p2, float2 p3, mtl_log_context log) +{ + /*NOTE: check intersection of lines (p1-p0) and (p3-p2) + P = p0 + u(p1-p0) + P = p2 + w(p3-p2) + + control points are inside a right triangle so we should always find an intersection + */ + float2 pm; + + float det = (p1.x - p0.x)*(p3.y - p2.y) - (p1.y - p0.y)*(p3.x - p2.x); + float sqrNorm0 = length_squared(p1-p0); + float sqrNorm1 = length_squared(p2-p3); + + if(fabs(det) < 1e-3 || sqrNorm0 < 0.1 || sqrNorm1 < 0.1) + { + float sqrNorm0 = length_squared(p1-p0); + float sqrNorm1 = length_squared(p2-p3); + + if(sqrNorm0 < sqrNorm1) + { + pm = p2; + } + else + { + pm = p1; + } + } + else + { + float u = ((p0.x - p2.x)*(p2.y - p3.y) - (p0.y - p2.y)*(p2.x - p3.x))/det; + pm = p0 + u*(p1-p0); + } + + matrix_float3x3 m = mtl_barycentric_matrix(p0, p3, pm); + return(m); +} + void mtl_cubic_emit(thread mtl_segment_setup_context* context, float2 p[4], mtl_cubic_info info) { device mg_mtl_segment* seg = mtl_segment_push(context, p, MG_MTL_CUBIC); @@ -870,7 +1157,10 @@ void mtl_cubic_emit(thread mtl_segment_setup_context* context, float2 p[4], mtl_ float2 v2; matrix_float3x3 K; - if(any(p[0] != p[1])) + float sqrNorm0 = length_squared(p[1]-p[0]); + float sqrNorm1 = length_squared(p[2]-p[3]); + + if(sqrNorm0 >= sqrNorm1) { v2 = p[1]; K = {info.K[0].xyz, info.K[3].xyz, info.K[1].xyz}; @@ -880,31 +1170,42 @@ void mtl_cubic_emit(thread mtl_segment_setup_context* context, float2 p[4], mtl_ v2 = p[2]; K = {info.K[0].xyz, info.K[3].xyz, info.K[2].xyz}; } - - //NOTE: compute barycentric matrix - float det = v0.x*(v1.y-v2.y) + v1.x*(v2.y-v0.y) + v2.x*(v0.y - v1.y); - matrix_float3x3 B = {{v1.y - v2.y, v2.y-v0.y, v0.y-v1.y}, - {v2.x - v1.x, v0.x-v2.x, v1.x-v0.x}, - {v1.x*v2.y-v2.x*v1.y, v2.x*v0.y-v0.x*v2.y, v0.x*v1.y-v1.x*v0.y}}; - B *= (1/det); - - //NOTE: set implicit matrix and bin segment + //NOTE: set matrices and bin segment + matrix_float3x3 B = mtl_barycentric_matrix(v0, v1, v2); seg->implicitMatrix = K*B; + seg->hullMatrix = mtl_hull_matrix(p[0], p[1], p[2], p[3], context->log); + mtl_segment_bin_to_tiles(context, seg); } void mtl_cubic_setup(thread mtl_segment_setup_context* context, float2 p[4]) { float splits[8]; - int splitCount = mtl_cubic_monotonize(p, splits); + int splitCount = mtl_cubic_monotonize(p, splits, context->log); + + mtl_log(context->log, "curve = "); + log_cubic_bezier(p, context->log); + + mtl_log(context->log, "split count = "); + mtl_log_i32(context->log, splitCount-1); + mtl_log(context->log, "\n"); //NOTE: produce bézier curve for each consecutive pair of roots for(int sliceIndex=0; sliceIndexlog, "slice = "); + log_cubic_bezier(sp, context->log); + mtl_cubic_info curve = mtl_cubic_classify(sp, context->log); switch(curve.kind) { @@ -926,6 +1227,10 @@ void mtl_cubic_setup(thread mtl_segment_setup_context* context, float2 p[4]) case MTL_CUBIC_LOOP_SPLIT: { + mtl_log(context->log, "loop split: \n"); + mtl_log_f32(context->log, curve.split); + mtl_log(context->log, "\n"); + //NOTE: split and reclassify, check that we have a valid loop and emit float2 ssp[8]; mtl_cubic_slice(sp, 0, curve.split, ssp); @@ -933,18 +1238,35 @@ void mtl_cubic_setup(thread mtl_segment_setup_context* context, float2 p[4]) for(int i=0; i<2; i++) { - curve = mtl_cubic_classify(ssp + 4*i); + mtl_cubic_info splitCurve = mtl_cubic_classify(ssp + 4*i, context->log); - if(curve.kind != MTL_CUBIC_LOOP_OK) + mtl_log(context->log, "loop slice \n"); + mtl_log_i32(context->log, i); + mtl_log(context->log, ": "); + log_cubic_bezier(ssp+i*4, context->log); + + mtl_log_i32(context->log, splitCurve.kind); + mtl_log(context->log, "\n"); + + //////////////////////////////////////////////////////////////////////////////////// + //TODO: here the result of mtl_cubic_classify seems to be changed if we print something + // inside it... + // Anyway, we shouldn't reclassify split curves, just find the new hull matrix? + //////////////////////////////////////////////////////////////////////////////////// + CONTINUE_HERE; + + if(splitCurve.kind == MTL_CUBIC_LOOP_SPLIT) { - mtl_log(context->log, "loop split left error\n"); + mtl_log(context->log, "loop split error ("); + mtl_log_f32(context->log, splitCurve.split); + mtl_log(context->log, ") ****************************************\n"); } else { - mtl_cubic_emit(context, ssp + 4*i, curve); + mtl_cubic_emit(context, ssp + 4*i, splitCurve); } - } + } break; case MTL_CUBIC_LOOP_OK: @@ -974,6 +1296,21 @@ kernel void mtl_segment_setup(constant int* elementCount [[buffer(0)]], { const device mg_mtl_path_elt* elt = &elementBuffer[eltIndex]; + + //28 + // 125 + // 112 + if(elt->pathIndex != 124) + { + return; + } + + + if(elt->localEltIndex != 4)// && elt->localEltIndex != 3) + { + return; + } + const device mg_mtl_path_queue* pathQueue = &pathQueueBuffer[elt->pathIndex]; device mg_mtl_tile_queue* tileQueues = &tileQueueBuffer[pathQueue->tileQueues]; @@ -987,19 +1324,27 @@ kernel void mtl_segment_setup(constant int* elementCount [[buffer(0)]], .tileSize = tileSize[0], .log.buffer = logBuffer, .log.offset = logOffsetBuffer, - .log.enabled = (eltIndex == 1)}; + .log.enabled = true}; switch(elt->kind) { case MG_MTL_LINE: { float2 p[2] = {elt->p[0]*scale[0], elt->p[1]*scale[0]}; + + mtl_log(setupCtx.log, "line: "); + log_line(p, setupCtx.log); + mtl_line_setup(&setupCtx, p); } break; case MG_MTL_QUADRATIC: { float2 p[3] = {elt->p[0]*scale[0], elt->p[1]*scale[0], elt->p[2]*scale[0]}; + + mtl_log(setupCtx.log, "quadratic: "); + log_quadratic_bezier(p, setupCtx.log); + mtl_quadratic_setup(&setupCtx, p); } break; @@ -1172,9 +1517,17 @@ kernel void mtl_raster(const device int* screenTilesBuffer [[buffer(0)]], pathIndex = op->index; winding = op->windingOffset; + + if(op->next != -1) + { + color = float4(0, 1, 0, 1); + } } else if(op->kind == MG_MTL_OP_SEGMENT) { + // outTexture.write(float4(1, 0, 0, 1), uint2(pixelCoord)); + // return; + const device mg_mtl_segment* seg = &segmentBuffer[op->index]; if( (pixelCoord.y > seg->box.y) @@ -1186,6 +1539,8 @@ kernel void mtl_raster(const device int* screenTilesBuffer [[buffer(0)]], if(op->crossRight) { + color = float4(0, 1, 1, 1); + if( (seg->config == MG_MTL_BR || seg->config == MG_MTL_TL) &&(pixelCoord.y > seg->box.w)) {