ciabatta/src/uchar.c

127 lines
3.5 KiB
C
Raw Normal View History

2022-06-22 12:36:26 +00:00
size_t mbrtoc16(
char16_t *restrict pc16,
char const *restrict s,
size_t n,
mbstate_t *restrict ps
) {
2022-07-15 17:33:55 +00:00
// Figure out the conversion state
static mbstate_t static_mbstate = {0};
if(ps == NULL) ps = &static_mbstate;
2022-06-22 12:36:26 +00:00
if(s == NULL) {
2022-07-15 17:33:55 +00:00
*ps = (mbstate_t) {0xd800};
2022-06-22 12:36:26 +00:00
return 0;
}
2022-07-15 17:33:55 +00:00
// Check leftovers, using 0xd800 as "no leftover" marker because it
// doesn't encode a valid character.
if(ps->leftover == 0xd800) {
// Decode the UTF-8 encoded codepoint
char32_t code_point;
int mblen = utf8_chdec((char8_t *)s, n, &code_point);
if(mblen == UNI_ESTRLN) return (size_t)(-2);
if(mblen <= 0) goto invalid_seq;
// Encode the codepoint into UTF-16 string
char16_t str[2];
int c16len = utf16_chenc(str, 2, code_point);
if(c16len <= 0) goto invalid_seq;
// Assign the decoded UTF-16 character, decide leftover
if(pc16 != NULL) *pc16 = str[0];
ps->leftover = (c16len == 2? str[1] : 0xd800);
return (size_t)mblen;
}
else {
// Otherwise use and reset the leftover
if(pc16 != NULL) *pc16 = ps->leftover;
ps->leftover = 0xd800;
return (size_t)(-3);
}
2022-07-15 17:33:55 +00:00
invalid_seq:
2022-06-22 12:36:26 +00:00
errno = EILSEQ;
return (size_t)(-1);
}
2022-07-15 17:33:55 +00:00
2022-06-22 12:36:26 +00:00
size_t c16rtomb(
char *restrict s,
char16_t c16,
mbstate_t *restrict ps
2022-06-24 01:24:04 +00:00
) {
2022-07-15 17:33:55 +00:00
// Figure out conversion state
static mbstate_t static_mbstate = {0};
if(ps == NULL) ps = &static_mbstate;
2022-06-24 02:43:47 +00:00
if(s == NULL) {
2022-07-15 17:33:55 +00:00
*ps = (mbstate_t) {0xd800};
2022-06-24 01:24:04 +00:00
return 0;
}
2022-07-15 17:33:55 +00:00
char32_t codepoint_to_write;
// Check whether a high surrogate was detected in a previous call to the
// function. If not, the high_surrogate value is 0xd800
if(ps->high_surrogate == 0xd800) {
// If c16 is a surrogate record it, or throw an error
if(uni_is_hsur(c16)) {
ps->high_surrogate = c16;
return 0;
}
else if(uni_is_lsur(c16)) {
goto invalid_char;
}
// We'll just write c16
codepoint_to_write = c16;
2022-06-24 01:24:04 +00:00
}
2022-07-15 17:33:55 +00:00
// If high surrogate exists, the next character must be a low surrogate
// so we'll write a codepoint made out of high and low surrogates
else if(uni_is_lsur(c16)) {
codepoint_to_write = uni_surtoc(ps->high_surrogate, c16);
2022-06-24 01:24:04 +00:00
}
2022-07-15 17:33:55 +00:00
else goto invalid_char;
// Write the codepoint that we decided to write to multibyte string
2022-07-15 17:41:38 +00:00
int written_len = utf8_chenc((char8_t *)s, 4, codepoint_to_write);
2022-07-15 17:33:55 +00:00
if(written_len < 0) {
goto invalid_char;
2022-06-24 01:24:04 +00:00
}
2022-07-15 17:41:38 +00:00
s[written_len] = 0;
2022-07-15 17:33:55 +00:00
return (size_t)written_len;
invalid_char:
2022-06-24 01:24:04 +00:00
errno = EILSEQ;
return (size_t)(-1);
}
2022-06-22 12:36:26 +00:00
size_t mbrtoc32(
char32_t *restrict pc32,
char const *restrict s,
size_t n,
mbstate_t *restrict ps
2022-06-24 01:24:04 +00:00
) {
if(s == NULL) {
return 0;
}
2022-07-17 15:09:26 +00:00
char32_t code_point;
int mblen = utf8_chdec((char8_t *)s, n, &code_point);
if(mblen == UNI_ESTRLN) return (size_t)(-2);
if(mblen <= 0) {
errno = EILSEQ;
return (size_t)(-1);
2022-06-24 01:24:04 +00:00
}
2022-07-17 15:09:26 +00:00
*pc32 = code_point;
if(code_point == 0) return 0;
return (size_t)mblen;
2022-06-24 01:24:04 +00:00
}
2022-06-22 12:36:26 +00:00
size_t c32rtomb(
char *restrict s,
char32_t c32,
mbstate_t *restrict ps
2022-06-24 01:24:04 +00:00
) {
2022-06-24 02:43:47 +00:00
if(s == NULL) {
2022-06-24 01:24:04 +00:00
*ps = (mbstate_t) {0};
return 0;
}
2022-07-17 19:43:41 +00:00
int mblen = utf8_chenc((char8_t *)s, 4, c32);
2022-07-17 15:09:26 +00:00
if(mblen <= 0) {
errno = EILSEQ;
return (size_t)(-1);
2022-06-24 01:24:04 +00:00
}
2022-07-17 15:09:26 +00:00
return (size_t)mblen;
2022-06-24 01:24:04 +00:00
}