Unicode shit (wctype.h)

2022-06-16 18:49:46 +11:00 · 2022-06-16 18:49:46 +11:00 · 0ef1894917
parent 293b812a03
commit 0ef1894917
9 changed files with 69534 additions and 3 deletions
--- a/bake.cmd
+++ b/bake.cmd
@ -51,5 +51,5 @@ del build\*.obj
 :skip_crt_compilation
 echo Compiling test..
-clang -fno-builtin test\test_printf.c ciabatta.lib -std=c11 -lkernel32 -luser32 -lshell32 -nostdlib %CIABATTA_OPTIONS%
+clang -fno-builtin test\test_wctype.c ciabatta.lib -std=c11 -lkernel32 -luser32 -lshell32 -nostdlib %CIABATTA_OPTIONS%
 ::cl test\test_math.c /Iinc -D_CRT_SECURE_NO_WARNINGS /Z7 /link ciabatta.lib kernel32.lib user32.lib shell32.lib -nostdlib -nodefaultlibs
--- a/code/unicode/readme
+++ b/code/unicode/readme
@ -0,0 +1,11 @@
 The unicode-based functions work based on official unicode data. You can find
 the file with Unicode data at:
    https://unicode.org/Public/UNIDATA/UnicodeData.txt
 This file is placed into this directory by the name unicode_data.txt. To update
 the unicode standard a new file is put under that name, then unicode_compile.py
 is ran with python interpreter. It will generate a new unicode.h header file.
 DO NOT MODIFY unicode.h DIRECTLY BRUH.
--- a/code/unicode/unicode.h
+++ b/code/unicode/unicode.h
--- a/code/unicode/unicode_compile.py
+++ b/code/unicode/unicode_compile.py
@ -0,0 +1,62 @@
 import os;
 import sys;
 abspath = os.path.abspath(sys.argv[0])
 dname = os.path.dirname(abspath)
 os.chdir(dname)
 with open('unicode.h', 'w') as header:
    header.write('\n');
    header.write('#pragma once\n\n');
    header.write('#define Cc  0\n');
    header.write('#define Cf  1\n');
    header.write('#define Co  2\n');
    header.write('#define Cs  3\n');
    header.write('#define Ll  4\n');
    header.write('#define Lm  5\n');
    header.write('#define Lo  6\n');
    header.write('#define Lt  7\n');
    header.write('#define Lu  8\n');
    header.write('#define Mc  9\n');
    header.write('#define Me 10\n');
    header.write('#define Mn 11\n');
    header.write('#define Nd 12\n');
    header.write('#define Nl 13\n');
    header.write('#define No 14\n');
    header.write('#define Pc 15\n');
    header.write('#define Pd 16\n');
    header.write('#define Pe 17\n');
    header.write('#define Pf 18\n');
    header.write('#define Pi 19\n');
    header.write('#define Po 20\n');
    header.write('#define Ps 21\n');
    header.write('#define Sc 22\n');
    header.write('#define Sk 23\n');
    header.write('#define Sm 24\n');
    header.write('#define So 25\n');
    header.write('#define Zl 26\n');
    header.write('#define Zp 27\n');
    header.write('#define Zs 28\n');
    header.write('\n');
    header.write('#define UNI_TAB \\\n');
    with open('unicode_data.txt') as file:
        for line in file:
            row = line.split(';')
            code  = row[0].strip()
            cat   = row[2].strip()
            lower = row[13].strip()
            upper = row[14].strip()
            if lower == '':
                lower = code
            if upper == '' or upper == '\n':
                upper = code
            header.write('    X(' + \
                '0x' + code  + ', ' + \
                cat          + ', ' + \
                '0x' + lower + ', ' + \
                '0x' + upper + ')\\\n');
    header.write('\n');
    header.close();
--- a/code/unicode/unicode_data.txt
+++ b/code/unicode/unicode_data.txt
--- a/code/unicode/wctype.c
+++ b/code/unicode/wctype.c
@ -0,0 +1,127 @@
 #include <wctype.h>
 #include <string.h>
 #include "unicode.h"
 static inline int char_cat(wint_t wc) {
    #define X(code, cat, l, u) case code: return cat;
    switch(wc) {
        UNI_TAB
    }
    #undef X
    return -1;
 }
 int iswctype(wint_t wc, wctype_t desc) {
    return desc(wc);
 }
 wctype_t wctype(const char *property) {
    if(!strcmp(property, "alnum"))  return iswalnum;
    if(!strcmp(property, "alpha"))  return iswalpha;
    if(!strcmp(property, "blank"))  return iswblank;
    if(!strcmp(property, "cntrl"))  return iswcntrl;
    if(!strcmp(property, "digit"))  return iswdigit;
    if(!strcmp(property, "graph"))  return iswgraph;
    if(!strcmp(property, "lower"))  return iswlower;
    if(!strcmp(property, "print"))  return iswprint;
    if(!strcmp(property, "punct"))  return iswpunct;
    if(!strcmp(property, "space"))  return iswspace;
    if(!strcmp(property, "upper"))  return iswupper;
    if(!strcmp(property, "xdigit")) return iswxdigit;
    return NULL;
 }
 wint_t towctrans(wint_t wc, wctrans_t desc) {
    return desc(wc);
 }
 wctrans_t wctrans(const char *property) {
    if(!strcmp(property, "tolower"))  return towlower;
    if(!strcmp(property, "toupper"))  return towupper;
    return NULL;
 }
 int iswalnum(wint_t wc) {
    return iswalpha(wc) || iswdigit(wc);
 }
 int iswalpha(wint_t wc) {
    return iswupper(wc) || iswlower(wc);
 }
 int iswblank(wint_t wc) {
    return wc == ' ' || wc == '\t';
 }
 int iswcntrl(wint_t wc) {
    return char_cat(wc) == Cc;
 }
 int iswdigit(wint_t wc) {
    return '0' <= wc && wc <= '9';
 }
 int iswgraph(wint_t wc) {
    return iswprint(wc) && !iswspace(wc);
 }
 int iswlower(wint_t wc) {
    return char_cat(wc) == Ll;
 }
 int iswprint(wint_t wc) {
    switch(char_cat(wc)) {
        case Cc:
        case Cf:
        case Co:
        case Cs:
            return 0;
    }
    return 1;
 }
 int iswpunct(wint_t wc) {
    switch(char_cat(wc)) {
        case Pc:
        case Pd:
        case Pe:
        case Pf:
        case Pi:
        case Po:
        case Ps:
            return 1;
    }
    return 0;
 }
 int iswspace(wint_t wc) {
    return char_cat(wc) == Zs;
 }
 int iswupper(wint_t wc) {
    return char_cat(wc) == Lu;
 }
 int iswxdigit(wint_t wc) {
    return iswdigit(wc) || ('a'<=wc && wc<='f') || ('A'<= wc && wc<='F');
 }
 wint_t towlower(wint_t wc) {
    #define X(code, cat, l, u) case code: return l;
    switch(wc) {
        UNI_TAB
    }
    #undef X
    return wc;
 }
 wint_t towupper(wint_t wc) {
    #define X(code, cat, l, u) case code: return u;
    switch(wc) {
        UNI_TAB
    }
    #undef X
    return wc;
 }
--- a/inc/wctype.h
+++ b/inc/wctype.h
@ -2,8 +2,8 @@
 #pragma once
 typedef int wint_t;
-wctrans_t;
+typedef int (*wctrans_t)(wint_t wc);
-wctype_t;
+typedef int (*wctype_t)(wint_t wc);
 #ifndef WEOF
 	#define WEOF 0
--- a/test/test_wctype.c
+++ b/test/test_wctype.c
@ -0,0 +1,10 @@
 #include <wctype.h>
 #include <stdio.h>
 int main() {
    if(iswalpha(L'я')) {
        printf("Symbol 'я' is indeed a letter\n");
    }
    return 0;
 }
--- a/unicode.h
+++ b/unicode.h
@ -0,0 +1,34 @@
 #pragma once
 #define Cc  0
 #define Cf  1
 #define Co  2
 #define Cs  3
 #define Ll  4
 #define Lm  5
 #define Lo  6
 #define Lt  7
 #define Lu  8
 #define Mc  9
 #define Me 10
 #define Mn 11
 #define Nd 12
 #define Nl 13
 #define No 14
 #define Pc 15
 #define Pd 16
 #define Pe 17
 #define Pf 18
 #define Pi 19
 #define Po 20
 #define Ps 21
 #define Sc 22
 #define Sk 23
 #define Sm 24
 #define So 25
 #define Zl 26
 #define Zp 27
 #define Zs 28
 #define UNI_TAB \