mb <-> c32 functions

This commit is contained in:
bumbread 2022-07-18 02:09:26 +11:00
parent 644364d2da
commit 3f7c3de288
1 changed files with 14 additions and 66 deletions

View File

@ -100,47 +100,19 @@ size_t mbrtoc32(
mbstate_t *restrict ps mbstate_t *restrict ps
) { ) {
if(s == NULL) { if(s == NULL) {
*ps = (mbstate_t) {0};
return 0; return 0;
} }
size_t nbytes; char32_t code_point;
int mblen = utf8_chdec((char8_t *)s, n, &code_point);
// Decode the first byte of UTF-8 sequence if(mblen == UNI_ESTRLN) return (size_t)(-2);
unsigned byte0 = *s; if(mblen <= 0) {
if (0x00 <= byte0 && byte0 < 0x80) nbytes = 1;
else if(0xc0 <= byte0 && byte0 < 0xe0) nbytes = 2;
else if(0xe0 <= byte0 && byte0 < 0xf0) nbytes = 3;
else if(0xf0 <= byte0 && byte0 < 0xf8) nbytes = 4;
else goto encoding_error;
unsigned nbytesreq = nbytes;
if(n < nbytesreq) {
return (size_t)(-2);
}
char32_t cp = byte0;
switch(nbytesreq) {
case 2: cp &= 0x1f; break;
case 3: cp &= 0x0f; break;
case 4: cp &= 0x07; break;
}
while(--nbytesreq)
cp |= (cp << 6) | ((*++s) & 0x3f);
if(0xdc00 <= cp && cp <= 0xe000)
goto encoding_error;
// Overloing seqs
if(cp < 0x80 && nbytes > 1) goto encoding_error;
if(cp < 0x800 && nbytes > 2) goto encoding_error;
if(cp < 0x10000 && nbytes > 3) goto encoding_error;
if(cp > 0x10ffff) goto encoding_error;
if(pc32 != NULL) *pc32 = cp;
if(cp == 0)
return 0;
else
return nbytes;
encoding_error:
errno = EILSEQ; errno = EILSEQ;
return (size_t)(-1); return (size_t)(-1);
} }
*pc32 = code_point;
if(code_point == 0) return 0;
return (size_t)mblen;
}
size_t c32rtomb( size_t c32rtomb(
char *restrict s, char *restrict s,
@ -151,34 +123,10 @@ size_t c32rtomb(
*ps = (mbstate_t) {0}; *ps = (mbstate_t) {0};
return 0; return 0;
} }
unsigned cp = c32; int mblen = utf8_enc(s, 4, c32);
if(cp >= 0x10ffff) goto encoding_error; if(mblen <= 0) {
size_t nbytes = 4;
if(cp < 0x10000) nbytes = 3;
if(cp < 0x800) nbytes = 2;
if(cp < 0x80) nbytes = 1;
switch(nbytes) {
case 1: {
s[0] = cp;
} break;
case 2: {
s[0] = 0xc0 | (cp >> 6);
s[1] = 0x80 | ((cp >> 0) & 0x3f);
} break;
case 3: {
s[0] = 0xe0 | (cp >> 12);
s[1] = 0x80 | ((cp >> 6) & 0x3f);
s[2] = 0x80 | ((cp >> 0) & 0x3f);
} break;
case 4: {
s[0] = 0xf0 | (cp >> 18);
s[1] = 0x80 | ((cp >> 12) & 0x3f);
s[2] = 0x80 | ((cp >> 6) & 0x3f);
s[3] = 0x80 | ((cp >> 0) & 0x3f);
} break;
}
return nbytes;
encoding_error:
errno = EILSEQ; errno = EILSEQ;
return (size_t)(-1); return (size_t)(-1);
} }
return (size_t)mblen;
}