diff --git a/inc/unicode.h b/inc/unicode.h
index e3880d7..c81350c 100644
--- a/inc/unicode.h
+++ b/inc/unicode.h
@@ -8,7 +8,8 @@ typedef uint_least16_t char16_t;
 typedef uint_least32_t char32_t;
 
 typedef int32_t uchar_t;
-typedef enum {
+
+enum {
     UCHAR_Invalid,
     UCHAR_Cc,
     UCHAR_Cf,
@@ -39,19 +40,57 @@ typedef enum {
     UCHAR_Zl,
     UCHAR_Zp,
     UCHAR_Zs,
-} uchar_class;
+};
+
+typedef struct uchar_props uchar_props;
+struct uchar_props {
+    int     bidi_class;
+    int     bidi_mirrored;
+    int     bidi_paired_bracket;
+    int     bidi_paired_bracket_type;
+    int     block;
+    int     canon_comb_class;
+    uchar_t ch_lower;
+    uchar_t ch_upper;
+    int     ndecomp;
+    uchar_t const decomp[4];
+    uchar_t default_igncp;
+    int     deprecated;
+    int     east_asian_width;
+    int     gcat;
+    int     hangul_syl_type;
+    int     join_type;
+    int     join_group;
+    int     line_brk;
+    char    const *name;
+    uchar_t nc_cp;
+    int     num_val;
+    int     ws;
+    int     dash;
+    int     letter_props;
+    int     math_props;
+    int     script;
+};
+
 
 int uni_classify(uchar_t ch);
 int uni_valid(uchar_t ch);
 uchar_t uni_to_lower(uchar_t u);
 uchar_t uni_to_upper(uchar_t u);
 
-int utf8_dec   (char const *restrict utf8_str,  uchar_t *restrict ch);
-int utf16_dec  (char const *restrict utf16_str, uchar_t *restrict ch);
-int utf8_dec_s (char const *restrict utf8_str,  size_t len, uchar_t *restrict ch);
-int utf16_dec_s(char const *restrict utf16_str, size_t len, uchar_t *restrict ch);
+int     uni_is_hsur(char16_t ch);
+int     uni_is_lsur(char16_t ch);
+uchar_t uni_surtoc (char16_t hsur, char16_t lsur);
 
-int utf8_enc   (char *utf8_str,  uchar_t ch);
-int utf16_enc  (char *utf16_str, uchar_t ch);
-int utf8_enc_s (char *utf8_str,  size_t len, uchar_t ch);
-int utf16_enc_s(char *utf16_str, size_t len, uchar_t ch);
+int utf16_chlen(char16_t const *str);
+int utf8_chlen (char     const *str);
+
+int utf16_dec_s(char16_t const *restrict str, size_t len, uchar_t *restrict ch);
+int utf8_dec_s (char     const *restrict str, size_t len, uchar_t *restrict ch);
+int utf16_dec  (char16_t const *restrict str,             uchar_t *restrict ch);
+int utf8_dec   (char     const *restrict str,             uchar_t *restrict ch);
+
+int utf16_enc_s(char16_t *str, size_t len, uchar_t ch);
+int utf8_enc_s (char     *str, size_t len, uchar_t ch);
+int utf16_enc  (char16_t *str,             uchar_t ch);
+int utf8_enc   (char     *str,             uchar_t ch);
diff --git a/src/code/unicode.c b/src/code/unicode.c
index 93da509..9585a12 100644
--- a/src/code/unicode.c
+++ b/src/code/unicode.c
@@ -15,3 +15,150 @@ uchar_t uni_to_upper(uchar_t cp) {
     return uni_codepoints[cp].upper;
 }
 
+int uni_valid(uchar_t ch) {
+    return (0x0000 <= ch && ch <= 0xd7ff) || (0xe000 <= ch && ch <= 0x10ffff);
+}
+
+int uni_is_hsur(char16_t ch) {
+    return 0xd800 <= ch && ch <= 0xdbff;
+}
+
+int uni_is_lsur(char16_t ch) {
+    return 0xdc00 <= ch && ch <= 0xdfff;
+}
+
+uchar_t uni_surtoc(char16_t hsur, char16_t lsur) {
+    uchar_t u = ((0x3ff & hsur) << 10) | (lsur & 0x3ff);
+    return u + 0x10000;
+}
+
+int utf16_chlen(char16_t const *str) {
+    char16_t cp = *str;
+    if(uni_is_hsur(cp))      return 2;
+    else if(uni_is_lsur(cp)) return 0;
+    else if(uni_valid(cp))   return 1;
+    return 0;
+}
+
+int utf8_chlen(char const *str) {
+    uint8_t byte0 = (uint8_t)*str;
+    if(byte0 < 0x80)      return 1;
+    else if(byte0 < 0xc0) return 0; // error
+    else if(byte0 < 0xe0) return 2;
+    else if(byte0 < 0xf0) return 3;
+    else if(byte0 < 0xf8) return 4;
+    return 0;
+}
+
+int utf16_dec(char16_t const *restrict str, uchar_t *restrict chp) {
+    int chlen = 0;
+    uchar_t ch;
+    if(uni_is_hsur(str[0])) {
+        char16_t hsur = str[0];
+        char16_t lsur = str[1];
+        ch = uni_surtoc(hsur, lsur);
+        chlen = 2;
+    }
+    else {
+        ch = str[0];
+    }
+    if(!uni_valid(ch)) {
+        chlen = 0;
+        ch = 0xfffd;
+    }
+    if(chp != NULL) *chp = ch;
+    return chlen;
+}
+
+int utf16_dec_s(
+    char16_t const *restrict str,
+    size_t len,
+    uchar_t *restrict chp
+) {
+    if(len == 0) return 0;
+    int chlen;
+    uchar_t ch;
+    if(uni_is_hsur(str[0])) {
+        if(len < 2) return 0;
+        char16_t hsur = str[0];
+        char16_t lsur = str[1];
+        ch = uni_surtoc(hsur, lsur);
+        chlen = 2;
+    }
+    else {
+        ch = str[0];
+        chlen = 1;
+    }
+    if(!uni_valid(ch)) {
+        ch = 0xfffd;
+        chlen = 0;
+    }
+    if(chp != NULL) *chp = ch;
+    return chlen;
+}
+
+int utf8_dec(char const *restrict str, uchar_t *restrict chp) {
+    uint8_t const *ustr = (uint8_t const *)str;
+    int chlen;
+    uchar_t ch;
+    if(ustr[0] < 0x80)      chlen = 1, ch = ustr[0];
+    else if(ustr[0] < 0xc0) chlen = 0, ch = 0xfffd;
+    else if(ustr[0] < 0xe0) chlen = 2, ch = ustr[0] & 0x1f;
+    else if(ustr[0] < 0xf0) chlen = 3, ch = ustr[0] & 0x0f;
+    else if(ustr[0] < 0xf8) chlen = 4, ch = ustr[0] & 0x07;
+    else chlen = 0;
+    for(int i = 1; i < chlen; ++i) {
+        uint8_t trail = ustr[i];
+        if((trail & 0xc0) != 0x80) {
+            chlen = 0;
+            ch = 0xfffd;
+            break;
+        }
+        ch <<= 6;
+        ch |= (trail & 0x3f); 
+    }
+    if(!uni_valid(ch)) {
+        chlen = 0;
+        ch = 0xfffd;
+    }
+    if(chp != NULL) *chp = ch;
+    return chlen;
+}
+
+int utf8_dec_s(
+    char const *restrict str,
+    size_t len,
+    uchar_t *restrict chp
+) {
+    if(len == 0) return 0;
+    uint8_t const *restrict ustr = (uint8_t const *restrict)str;
+    int chlen;
+    uchar_t ch;
+    if(ustr[0] < 0x80)      chlen = 1, ch = ustr[0];
+    else if(ustr[0] < 0xc0) chlen = 0, ch = 0xfffd;
+    else if(ustr[0] < 0xe0) chlen = 2, ch = ustr[0] & 0x1f;
+    else if(ustr[0] < 0xf0) chlen = 3, ch = ustr[0] & 0x0f;
+    else if(ustr[0] < 0xf8) chlen = 4, ch = ustr[0] & 0x07;
+    else chlen = 0;
+    if(len < chlen) {
+        return 0;
+    }
+    else chlen = 0;
+    for(int i = 1; i < chlen; ++i) {
+        uint8_t trail = ustr[i];
+        if((trail & 0xc0) != 0x80) {
+            chlen = 0;
+            ch = 0xfffd;
+            break;
+        }
+        ch <<= 6;
+        ch |= (trail & 0x3f); 
+    }
+    if(!uni_valid(ch)) {
+        chlen = 0;
+        ch = 0xfffd;
+    }
+    if(chp != NULL) *chp = ch;
+    return chlen;
+}
+
diff --git a/src/linux/syscalls.asm b/src/linux/syscalls.asm
new file mode 100644
index 0000000..21fa358
--- /dev/null
+++ b/src/linux/syscalls.asm
@@ -0,0 +1,9 @@
+
+bits 64
+segment .text
+
+global _exit
+_exit:
+    mov rax, 60
+    syscall
+    ret
diff --git a/test/test_uchar.c b/test/test_uchar.c
index 9490d3d..1e187dc 100644
--- a/test/test_uchar.c
+++ b/test/test_uchar.c
@@ -1,28 +1,19 @@
 
-#include <uchar.h>
+#include <unicode.h>
+#include <stdio.h>
 
-mbstate_t state;
 int main() {
-    char in[] = u8"zß水🍌"; // or "z\u00df\u6c34\U0001F34C"
-    size_t in_sz = sizeof in / sizeof *in;
-
-    char16_t out[in_sz];
-    char *p_in = in, *end = in + in_sz;
-    char16_t *p_out = out;
-    size_t rc;
-    while((rc = mbrtoc16(p_out, p_in, end - p_in, &state)))
+    char *mbstr = u8"улыбок тебе дед макар";
     {
-        if(rc == (size_t)-1)      // invalid input
-            break;
-        else if(rc == (size_t)-2) // truncated input
-            break;
-        else if(rc == (size_t)-3) // UTF-16 high surrogate
-            p_out += 1;
-        else {
-            p_in += rc;
-            p_out += 1;
-        };
+        char *str = mbstr;
+        uchar_t ch;
+        int len;
+        while((len = utf8_dec(str, &ch)) > 0 && ch != 0) {
+            printf("char: %d\n", ch);
+            str += len;
+        }
+        if(len <= 0) {
+            printf("This string is not utf8\n");
+        }
     }
-
-    size_t out_sz = p_out - out + 1;
 }