Use unicope as submodule like a true chad

2022-07-06 13:31:15 +11:00 · 2022-07-06 13:31:15 +11:00 · 704141d550
parent 097d7cf300
commit 704141d550
13 changed files with 44 additions and 693122 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -0,0 +1,3 @@
+[submodule "unicope"]
+	path = unicope
+	url = https://github.com/bumbread/unicope.git
--- a/8
+++ b/8
@ -2,7 +2,7 @@
 CC=clang
 SRC_DIR := src
 OBJ_DIR := bin
-IFLAGS := -Iinc -Isrc/win
+IFLAGS := -Iinc -Isrc/win -Iunicope/inc

 # Detect target operating system
 ifeq ($(OS),Windows_NT) 
@ -47,9 +47,13 @@ $(OBJ_DIR)/%.obj: $(SRC_DIR)/%.c
 	@-mkdir $(MKDIR_P_FLAG) "$(dir $@)" 2> $(NUL_FILE)
 	$(CC) $(CFLAGS) -c -o $@ $<

-ciabatta.lib: $(OBJ_FILES)
+ciabatta.lib: $(OBJ_FILES) unicope/unicope.lib
 	llvm-ar rc $@ $^

+unicope/unicope.lib:
+	clang -I unicope/inc -c unicope/src/unicode.c -o unicope/unicope.lib
+
+
 test: ciabatta.lib
 	clang -g test/test_$(test).c ciabatta.lib -std=c11 $(LIBS) -nostdlib -Iinc

--- a/bake.cmd
+++ b/bake.cmd
@ -13,17 +13,13 @@ if "%~1" neq "_start_" (
 )
 shift /1

-set CIABATTA_OPTIONS=-Iinc -Wall -g -gcodeview -nodefaultlibs -D_CRT_SECURE_NO_WARNINGS -mfma
+set CIABATTA_OPTIONS=-Iinc -I unicope\inc -Wall -g -gcodeview -nodefaultlibs -D_CRT_SECURE_NO_WARNINGS -mfma
 set PLATFORM=win

 if "%1"=="test" (
    goto :skip_crt_compilation
 )

-if not exist src\code\unicode\data.h (
-    py src\code\unicode\compile.py
-)
-
 if exist bin rd/s/q bin
 mkdir bin
 mkdir bin\%PLATFORM%
--- a/bake_cc.cmd
+++ b/bake_cc.cmd
@ -1,8 +1,8 @@
 for /R src\%PLATFORM% %%F in (*.c) do (
    echo %%F
-    start /B clang -Isrc/win -c -o bin\%PLATFORM%\%%~nF.obj %%F %CIABATTA_OPTIONS%
+    start /B clang -I unicope\inc -Isrc/win -c -o bin\%PLATFORM%\%%~nF.obj %%F %CIABATTA_OPTIONS%
 )
 for /R src\code %%F in (*.c) do (
    echo %%F
-    start /B clang -c -o bin\%%~nF.obj %%F %CIABATTA_OPTIONS%
+    start /B clang -I unicope\inc -c -o bin\%%~nF.obj %%F %CIABATTA_OPTIONS%
 )
--- a/build_cuik.cmd
+++ b/build_cuik.cmd
@ -1,18 +1,16 @@
@echo off
 setlocal enabledelayedexpansion

-if not exist src\code\unicode\data.h (
-    py src\code\unicode\compile.py
-)
-
 set PLATFORM=win
-set CIABATTA_OPTIONS=--crt none -I %% -I inc
+set CIABATTA_OPTIONS=--crt none -I %% -I inc -I unicope/inc

 del ciabatta.lib
+del unicope\unicope.lib
+cuik unicope\src\unicode.c -I unicope\inc -c -o unicope\unicope.lib
 cuik %CIABATTA_OPTIONS% src\code\*.c src\%PLATFORM%\*.c -c -o ciabatta.obj
 lib /out:ciabatta.lib ciabatta.obj

 if "%TEST%"=="" set TEST=assert

-cuik test\test_%TEST%.c --lib ciabatta.lib,kernel32.lib,user32.lib,shell32.lib %CIABATTA_OPTIONS%
+cuik test\test_%TEST%.c --lib ciabatta.lib,unicope/unicope.lib,kernel32.lib,user32.lib,shell32.lib %CIABATTA_OPTIONS%
 del ciabatta.obj
--- a/inc/unicode.h
+++ b/inc/unicode.h
@ -1,141 +0,0 @@
-
-#pragma once
-
-#include <stdint.h>
-#include <stddef.h>
-
-typedef uint_least16_t char16_t;
-typedef uint_least32_t char32_t;
-
-typedef int32_t uchar_t;
-
-#define UNI_EBADCP (-1)
-#define UNI_EULSUR (-2)
-#define UNI_EIBYTE (-3)
-#define UNI_ETBYTE (-4)
-#define UNI_ESTRLN (-5)
-#define UNI_EOLONG (-6)
-
-enum {
-    UCHAR_BAD,
-    UCHAR_Cc,
-    UCHAR_Cf,
-    UCHAR_Co,
-    UCHAR_Cs,
-    UCHAR_Ll,
-    UCHAR_Lm,
-    UCHAR_Lo,
-    UCHAR_Lt,
-    UCHAR_Lu,
-    UCHAR_Mc,
-    UCHAR_Me,
-    UCHAR_Mn,
-    UCHAR_Nd,
-    UCHAR_Nl,
-    UCHAR_No,
-    UCHAR_Pc,
-    UCHAR_Pd,
-    UCHAR_Pe,
-    UCHAR_Pf,
-    UCHAR_Pi,
-    UCHAR_Po,
-    UCHAR_Ps,
-    UCHAR_Sc,
-    UCHAR_Sk,
-    UCHAR_Sm,
-    UCHAR_So,
-    UCHAR_Zl,
-    UCHAR_Zp,
-    UCHAR_Zs,
-};
-
-enum {
-    UCHAR_BIDI_AL,
-    UCHAR_BIDI_AN,
-    UCHAR_BIDI_B,
-    UCHAR_BIDI_BN,
-    UCHAR_BIDI_CS,
-    UCHAR_BIDI_EN,
-    UCHAR_BIDI_ES,
-    UCHAR_BIDI_ET,
-    UCHAR_BIDI_FSI,
-    UCHAR_BIDI_L,
-    UCHAR_BIDI_LRE,
-    UCHAR_BIDI_LRI,
-    UCHAR_BIDI_LRO,
-    UCHAR_BIDI_NSM,
-    UCHAR_BIDI_ON,
-    UCHAR_BIDI_PDF,
-    UCHAR_BIDI_PDI,
-    UCHAR_BIDI_R,
-    UCHAR_BIDI_RLE,
-    UCHAR_BIDI_RLI,
-    UCHAR_BIDI_RLO,
-    UCHAR_BIDI_S,
-    UCHAR_BIDI_WS,
-};
-
-enum {
-    UCHAR_DECOMP_CANON,
-    UCHAR_DECOMP_FONT,
-    UCHAR_DECOMP_NOBREAK,
-    UCHAR_DECOMP_INITIAL,
-    UCHAR_DECOMP_MEDIAL,
-    UCHAR_DECOMP_FINAL,
-    UCHAR_DECOMP_ISOLATED,
-    UCHAR_DECOMP_CIRCLE,
-    UCHAR_DECOMP_SUPER,
-    UCHAR_DECOMP_SUB,
-    UCHAR_DECOMP_VERTICAL,
-    UCHAR_DECOMP_WIDE,
-    UCHAR_DECOMP_NARROW,
-    UCHAR_DECOMP_SMALL,
-    UCHAR_DECOMP_SQUARE,
-    UCHAR_DECOMP_FRACTION,
-    UCHAR_DECOMP_COMPAT,
-};
-
-typedef struct uchar_props uchar_props;
-struct uchar_props {
-    uchar_t       code;
-    char const   *name;
-    int           cat_gen;
-    int           cat_bidi;
-    int           comb_class;
-    int           dec_type;
-    int           dec_map_n;
-    uchar_t const dec_map[18]; // U+FDFA takes 18, everything else takes up <8
-    int           dec_value;
-    int           dig_value;
-    double        num_value;
-    int           bidi_mirrored;
-    char const   *old_name;
-    char const   *comment;
-    uchar_t       lower;
-    uchar_t       upper;
-    uchar_t       title;
-};
-
-uchar_props *uni_props   (uchar_t cp);
-int          uni_valid   (uchar_t cp);
-int          uni_classify(uchar_t cp);
-uchar_t      uni_tolower (uchar_t cp);
-uchar_t      uni_toupper (uchar_t cp);
-uchar_t      uni_totitle (uchar_t cp);
-
-int     uni_is_hsur(char16_t cp);
-int     uni_is_lsur(char16_t cp);
-uchar_t uni_surtoc (char16_t hsur, char16_t lsur);
-
-int utf16_chlen(char16_t const *str);
-int utf8_chlen (char     const *str);
-
-int utf16_dec_s(char16_t const *restrict str, size_t len, uchar_t *restrict ch);
-int utf8_dec_s (char     const *restrict str, size_t len, uchar_t *restrict ch);
-int utf16_dec  (char16_t const *restrict str,             uchar_t *restrict ch);
-int utf8_dec   (char     const *restrict str,             uchar_t *restrict ch);
-
-int utf16_enc_s(char16_t *str, size_t len, uchar_t ch);
-int utf8_enc_s (char     *str, size_t len, uchar_t ch);
-int utf16_enc  (char16_t *str,             uchar_t ch);
-int utf8_enc   (char     *str,             uchar_t ch);
--- a/src/code/uchar.c
+++ b/src/code/uchar.c
@ -2,6 +2,8 @@
 #include <uchar.h>
 #include <errno.h>

+#include <unicode.h>
+
 size_t mbrtoc16(
    char16_t   *restrict pc16,
    char const *restrict s,
@ -12,59 +14,34 @@ size_t mbrtoc16(
        *ps = (mbstate_t) {0};
        return 0;
    }
-    size_t nbytes;
-    char16_t parsed_char;
-    char16_t next_char;
-    // First check leftovers
-    if(ps->leftover == 0) {
-        // Decode the first byte of UTF-8 sequence
-        unsigned byte0 = *s;
-        if     (0x00 <= byte0 && byte0 < 0x80) nbytes = 1;
-        else if(0xc0 <= byte0 && byte0 < 0xe0) nbytes = 2;
-        else if(0xe0 <= byte0 && byte0 < 0xf0) nbytes = 3;
-        else if(0xf0 <= byte0 && byte0 < 0xf8) nbytes = 4;
-        else goto encoding_error;
-        unsigned nbytesreq = nbytes;
-        if(n < nbytesreq) {
-            return (size_t)(-2);
-        }
-        char32_t cp = byte0;
-        switch(nbytesreq) {
-            case 2: cp &= 0x1f; break;
-            case 3: cp &= 0x0f; break;
-            case 4: cp &= 0x07; break;
-        }
-        while(--nbytesreq)
-            cp |= (cp << 6) | ((*++s) & 0x3f);
-        if(0xdc00 <= cp && cp <= 0xe000)
-            goto encoding_error;
-        // Overloing seqs
-        if(cp < 0x80    && nbytes > 1) goto encoding_error;
-        if(cp < 0x800   && nbytes > 2) goto encoding_error;
-        if(cp < 0x10000 && nbytes > 3) goto encoding_error;
-        if(cp > 0x10ffff) goto encoding_error;
-        // Now convert this char shit to UTF-16
-        if(cp < 0x10000) {
-            parsed_char = cp;
-            next_char   = 0; // no next
-        }
-        else {
-            cp -= 0x10000;
-            parsed_char = 0xd800 | (cp >> 10);
-            next_char   = 0xdc00 | (cp & 0x3ff);
-        }
-    }
-    else {
+    // First check leftovers, using 0xd800 as marker because it doesn't
+    // encode a valid character.
+    if(ps->leftover != 0xd800) {
        if(pc16 != NULL) *pc16 = ps->leftover;
-        ps->leftover = 0;
+        ps->leftover = 0xd800;
        return (size_t)(-3);
    }
-    if(pc16 != NULL) *pc16 = parsed_char;
-    ps->leftover = next_char;
-    if(parsed_char == 0)
-        return 0;
-    else
-        return nbytes;
+    else {
+        uchar_t ch;
+        char16_t str[3];
+        int chlen = utf8_dec(s, &ch);
+        if(chlen <= 0) goto encoding_error;
+        int wrlen = utf16_enc(str, ch);
+        char16_t curc;
+        char16_t next;
+        if(wrlen <= 0) goto encoding_error;
+        else if(wrlen == 2) {
+            curc = str[0];
+            next = 0xd800;
+        }
+        else {
+            curc = str[0];
+            next = str[1];
+        }
+        ps->leftover = next;
+        if(pc16 != NULL) *pc16 = curc;
+        return (size_t)-2;
+    }
 encoding_error:
    errno = EILSEQ;
    return (size_t)(-1);
--- a/src/code/unicode.c
+++ b/src/code/unicode.c
@ -1,279 +0,0 @@
-
-#include <unicode.h>
-
-#include "unicode/data.h"
-
-uchar_props *uni_props(uchar_t cp) {
-    if(!uni_valid(cp))              return NULL;
-    if(unicode_data[cp].code != cp) return NULL;
-    return &unicode_data[cp];
-}
-
-int uni_cat_gen(uchar_t cp) {
-    uchar_props *props = uni_props(cp);
-    if(props != NULL)
-        return unicode_data[cp].cat_gen;
-    else
-        return UCHAR_BAD;
-}
-
-uchar_t uni_tolower(uchar_t cp) {
-    return unicode_data[cp].lower;
-}
-
-uchar_t uni_toupper(uchar_t cp) {
-    return unicode_data[cp].upper;
-}
-
-uchar_t uni_totitle(uchar_t cp) {
-    return unicode_data[cp].title;
-}
-
-int uni_valid(uchar_t ch) {
-    return (0x0000 <= ch && ch <= 0xd7ff) || (0xe000 <= ch && ch <= 0x10ffff);
-}
-
-int uni_is_hsur(char16_t ch) {
-    return 0xd800 <= ch && ch <= 0xdbff;
-}
-
-int uni_is_lsur(char16_t ch) {
-    return 0xdc00 <= ch && ch <= 0xdfff;
-}
-
-uchar_t uni_surtoc(char16_t hsur, char16_t lsur) {
-    uchar_t u = ((0x3ff & hsur) << 10) | (lsur & 0x3ff);
-    return u + 0x10000;
-}
-
-int utf16_chlen(char16_t const *str) {
-    char16_t cp = *str;
-    if(uni_is_hsur(cp))      return 2;
-    else if(uni_is_lsur(cp)) return UNI_EULSUR;
-    else return 1;
-}
-
-int utf8_chlen(char const *str) {
-    uint8_t byte0 = (uint8_t)*str;
-    if(byte0 < 0x80)      return 1;
-    else if(byte0 < 0xc0) return UNI_EIBYTE;
-    else if(byte0 < 0xe0) return 2;
-    else if(byte0 < 0xf0) return 3;
-    else if(byte0 < 0xf8) return 4;
-    return UNI_EIBYTE;
-}
-
-int utf16_dec(char16_t const *restrict str, uchar_t *restrict chp) {
-    int chlen = 0;
-    uchar_t ch;
-    if(uni_is_hsur(str[0])) {
-        char16_t hsur = str[0];
-        char16_t lsur = str[1];
-        ch = uni_surtoc(hsur, lsur);
-        chlen = 2;
-        if(ch > 0x10ffff) {
-            chlen = UNI_EBADCP;
-            ch = 0xfffd;
-        }
-    }
-    else(!uni_is_lsur(str[0])) {
-        ch = str[0];
-    }
-    else {
-        chlen = UNI_EULSUR;
-        ch = 0xfffd;
-    }
-    if(chp != NULL) *chp = ch;
-    return chlen;
-}
-
-int utf16_dec_s(
-    char16_t const *restrict str,
-    int len,
-    uchar_t *restrict chp
-) {
-    if(len == 0) return 0;
-    int chlen;
-    uchar_t ch;
-    if(uni_is_hsur(str[0])) {
-        if(len < 2) return 0;
-        char16_t hsur = str[0];
-        char16_t lsur = str[1];
-        ch = uni_surtoc(hsur, lsur);
-        chlen = 2;
-        if(ch > 0x10ffff) {
-            chlen = UNI_EBADCP;
-            ch = 0xfffd;
-        }
-    }
-    else if(!uni_is_lsur(str[0])) {
-        ch = str[0];
-        chlen = 1;
-    }
-    else {
-        chlen = UNI_EULSUR;
-        ch = 0xfffd;
-    }
-    if(chp != NULL) *chp = ch;
-    return chlen;
-}
-
-int utf8_dec(char const *restrict str, uchar_t *restrict chp) {
-    uint8_t const *ustr = (uint8_t const *)str;
-
-    int chlen;
-    uchar_t ch;
-    if(ustr[0] < 0x80)      ch = ustr[0],        chlen = 1;
-    else if(ustr[0] < 0xc0) ch = 0xfffd,         chlen = UNI_EIBYTE;
-    else if(ustr[0] < 0xe0) ch = ustr[0] & 0x1f, chlen = 2;
-    else if(ustr[0] < 0xf0) ch = ustr[0] & 0x0f, chlen = 3;
-    else if(ustr[0] < 0xf8) ch = ustr[0] & 0x07, chlen = 4;
-    else                    ch = 0xfffd,         chlen = UNI_EIBYTE;
-
-    if(chlen > 0) for(int i = 1; i < chlen; ++i) {
-        uint8_t trail = ustr[i];
-        if((trail & 0xc0) != 0x80) {
-            chlen = UNI_ETBYTE;
-            ch = 0xfffd;
-            break;
-        }
-        ch <<= 6;
-        ch |= (trail & 0x3f); 
-    }
-    if(!uni_valid(ch)) {
-        chlen = UNI_EBADCP;
-        ch = 0xfffd;
-    }
-    if(chp != NULL) *chp = ch;
-    return chlen;
-}
-
-int utf8_dec_s(
-    char const *restrict str,
-    int len,
-    uchar_t *restrict chp
-) {
-    if(len == 0) return 0;
-    uint8_t const *restrict ustr = (uint8_t const *restrict)str;
-    int chlen;
-    uchar_t ch;
-    if(ustr[0] < 0x80)      ch = ustr[0],        chlen = 1;
-    else if(ustr[0] < 0xc0) ch = 0xfffd,         chlen = UNI_EIBYTE;
-    else if(ustr[0] < 0xe0) ch = ustr[0] & 0x1f, chlen = 2;
-    else if(ustr[0] < 0xf0) ch = ustr[0] & 0x0f, chlen = 3;
-    else if(ustr[0] < 0xf8) ch = ustr[0] & 0x07, chlen = 4;
-    else                    ch = 0xfffd,         chlen = UNI_EIBYTE;
-    if(chlen > len) {
-        return UNI_ESTRLN;
-    }
-    if(chlen > 0) for(int i = 1; i < chlen; ++i) {
-        uint8_t trail = ustr[i];
-        if((trail & 0xc0) != 0x80) {
-            chlen = UNI_ETBYTE;
-            ch = 0xfffd;
-            break;
-        }
-        ch <<= 6;
-        ch |= (trail & 0x3f); 
-    }
-    if(!uni_valid(ch)) {
-        chlen = UNI_EBADCP;
-        ch = 0xfffd;
-    }
-    if(chp != NULL) *chp = ch;
-    return chlen;
-}
-
-int utf16_enc(char16_t *str, uchar_t cp) {
-    if(!is_valid(cp)) {
-        return UNI_EBADCP;
-    }
-    if(cp < 0x10000) {
-        str[0] = cp;
-        return 1;
-    }
-    else {
-        cp -= 0x10000;
-        str[0] = 0xD800 + (cp >> 10);
-        str[1] = 0xDC00 + (cp & 0x3ff);
-        return 2;
-    }
-}
-
-int utf8_enc(char *str, uchar_t ch) {
-    if(!is_valid(cp)) {
-        return UNI_EBADCP;
-    }
-    if(cp < 0x80) {
-        str[0] = ch;
-        return 1;
-    }
-    else if(cp < 0x800) {
-        str[0] = 0xc0 | (ch >> 6);
-        str[1] = 0x80 | ((ch >> 0) & 0x3f);
-        return 2;
-    }
-    else if(cp < 0x10000) {
-        str[0] = 0xe0 | (ch >> 18);
-        str[1] = 0x80 | ((ch >> 6) & 0x3f);
-        str[2] = 0x80 | ((ch >> 0) & 0x3f);
-        return 3;
-    }
-    else {
-        str[0] = 0xe0 | (ch >> 24);
-        str[1] = 0x80 | ((ch >> 18) & 0x3f);
-        str[2] = 0x80 | ((ch >> 6)  & 0x3f);
-        str[3] = 0x80 | ((ch >> 0)  & 0x3f);
-        return 4;
-    }
-}
-
-int utf16_enc_s(char16_t *str, size_t len, uchar_t ch) {
-    if(!is_valid(cp)) {
-        return UNI_EBADCP;
-    }
-    if(len == 0) return 0;
-    if(cp < 0x10000) {
-        str[0] = cp;
-        return 1;
-    }
-    else {
-        if(len < 2) return UNI_ESTRLN;
-        cp -= 0x10000;
-        str[0] = 0xD800 + (cp >> 10);
-        str[1] = 0xDC00 + (cp & 0x3ff);
-        return 2;
-    }
-}
-
-int utf8_enc_s(char *str, size_t len, uchar_t ch) {
-    if(!is_valid(cp)) {
-        return UNI_EBADCP;
-    }
-    if(len == 0) return 0;
-    if(cp < 0x80) {
-        str[0] = ch;
-        return 1;
-    }
-    else if(cp < 0x800) {
-        if(len < 2) return UNI_ESTRLN;
-        str[0] = 0xc0 | (ch >> 6);
-        str[1] = 0x80 | ((ch >> 0) & 0x3f);
-        return 2;
-    }
-    else if(cp < 0x10000) {
-        if(len < 3) return UNI_ESTRLN;
-        str[0] = 0xe0 | (ch >> 18);
-        str[1] = 0x80 | ((ch >> 6) & 0x3f);
-        str[2] = 0x80 | ((ch >> 0) & 0x3f);
-        return 3;
-    }
-    else {
-        if(len < 4) return UNI_ESTRLN;
-        str[0] = 0xe0 | (ch >> 24);
-        str[1] = 0x80 | ((ch >> 18) & 0x3f);
-        str[2] = 0x80 | ((ch >> 6)  & 0x3f);
-        str[3] = 0x80 | ((ch >> 0)  & 0x3f);
-        return 4;
-    }
-}
--- a/src/code/unicode/compile.py
+++ b/src/code/unicode/compile.py
@ -1,98 +0,0 @@
-
-import os;
-import sys;
-
-abspath = os.path.abspath(sys.argv[0])
-dname = os.path.dirname(abspath)
-os.chdir(dname)
-
-with open('data.h', 'w') as header:
-    header.write('\n');
-    header.write('#pragma once\n\n');
-    header.write('#include <unicode.h>\n');
-    header.write(
-'''
-uchar_props unicode_data[] = {
-''');
-
-    with open('data.txt') as file:
-        for line in file:
-            row = line.split(';')
-            code       = row[0].strip()
-            name       = row[1].strip()
-            cat_gen    = row[2].strip()
-            cat_bidi   = row[4].strip()
-            comb_class = row[3].strip()
-            dec_map    = row[5].strip()
-            dec_value  = row[6].strip()
-            dig_value  = row[7].strip()
-            num_value  = row[8].strip()
-            mirrored   = row[9].strip()
-            old_name   = row[10].strip()
-            comment    = row[11].strip()
-            upper      = row[12].strip()
-            lower      = row[13].strip()
-            title      = row[14].strip()
-            # Process decompositional mapping
-            dec_map_n  = 0
-            dec_type   = 'CANON'
-            if dec_map != '':
-                dec_map = dec_map.split(' ')
-                if dec_map[0][0] == '<':
-                    dec_type = dec_map[0][1:-1].upper()
-                    dec_map = dec_map[1:]
-                dec_map_n = len(dec_map)
-            if dec_map_n != 0:
-                dec_map = ', '.join(list(map(lambda x: '0x' + x, dec_map)))
-            else:
-                dec_map = '0'
-            # Make sure lowercase and uppercase mappings are defined
-            if lower == '':
-                lower = code
-            if upper == '':
-                upper = code
-            if title == '' or title == '\n':
-                title = code
-            header.write(
-            '''
-    [0x%s] = {
-        .code = 0x%s,
-        .name = "%s",
-        .cat_gen = UCHAR_%s,
-        .cat_bidi = UCHAR_BIDI_%s,
-        .comb_class = %s,
-        .dec_type = UCHAR_DECOMP_%s,
-        .dec_map_n = %s,
-        .dec_map = {%s},
-        .dec_value = %s,
-        .dig_value = %s,
-        .num_value = %s,
-        .bidi_mirrored = %s,
-        .old_name = "%s",
-        .comment  = "%s",
-        .lower = 0x%s,
-        .upper = 0x%s,
-        .title = 0x%s,
-    },''' % (
-                code,
-                code,
-                name,
-                cat_gen,
-                cat_bidi,
-                comb_class,
-                dec_type,
-                dec_map_n,
-                dec_map,
-                dec_value if dec_value != '' else '-1',
-                dig_value if dig_value != '' else '-1',
-                num_value if num_value != '' else '-1',
-                '1' if mirrored == 'Y' else '0',
-                old_name,
-                comment,
-                lower,
-                upper,
-                title
-            ));
-
-    header.write('};\n\n');
-    header.close();
--- a/src/code/unicode/data.h
+++ b/src/code/unicode/data.h
--- a/src/code/unicode/data.txt
+++ b/src/code/unicode/data.txt
--- a/src/code/unicode/readme
+++ b/src/code/unicode/readme
@ -1,11 +0,0 @@
-
-The unicode-based functions work based on official unicode data. You can find
-the file with Unicode data at:
-
-    https://unicode.org/Public/UNIDATA/UnicodeData.txt
-
-This file is placed into this directory by the name data.txt. To update the
-unicode standard a new file is put under that name, then unicode_compile.py is
-ran with python interpreter. It will generate a new unicode.h header file.
-
-DO NOT MODIFY data.h DIRECTLY BRUH.
--- a/1
+++ b/1
@ -0,0 +1 @@
+Subproject commit 5402d9b6987795856dd870c17075839453238503
				`@ -0,0 +1 @@`
				`Subproject commit 5402d9b6987795856dd870c17075839453238503`