Unicode shit (wctype.h)

This commit is contained in:
bumbread 2022-06-16 18:49:46 +11:00
parent 293b812a03
commit 0ef1894917
9 changed files with 69534 additions and 3 deletions

View File

@ -51,5 +51,5 @@ del build\*.obj
:skip_crt_compilation
echo Compiling test..
clang -fno-builtin test\test_printf.c ciabatta.lib -std=c11 -lkernel32 -luser32 -lshell32 -nostdlib %CIABATTA_OPTIONS%
clang -fno-builtin test\test_wctype.c ciabatta.lib -std=c11 -lkernel32 -luser32 -lshell32 -nostdlib %CIABATTA_OPTIONS%
::cl test\test_math.c /Iinc -D_CRT_SECURE_NO_WARNINGS /Z7 /link ciabatta.lib kernel32.lib user32.lib shell32.lib -nostdlib -nodefaultlibs

11
code/unicode/readme Normal file
View File

@ -0,0 +1,11 @@
The unicode-based functions work based on official unicode data. You can find
the file with Unicode data at:
https://unicode.org/Public/UNIDATA/UnicodeData.txt
This file is placed into this directory by the name unicode_data.txt. To update
the unicode standard a new file is put under that name, then unicode_compile.py
is ran with python interpreter. It will generate a new unicode.h header file.
DO NOT MODIFY unicode.h DIRECTLY BRUH.

34661
code/unicode/unicode.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,62 @@
import os;
import sys;
abspath = os.path.abspath(sys.argv[0])
dname = os.path.dirname(abspath)
os.chdir(dname)
with open('unicode.h', 'w') as header:
header.write('\n');
header.write('#pragma once\n\n');
header.write('#define Cc 0\n');
header.write('#define Cf 1\n');
header.write('#define Co 2\n');
header.write('#define Cs 3\n');
header.write('#define Ll 4\n');
header.write('#define Lm 5\n');
header.write('#define Lo 6\n');
header.write('#define Lt 7\n');
header.write('#define Lu 8\n');
header.write('#define Mc 9\n');
header.write('#define Me 10\n');
header.write('#define Mn 11\n');
header.write('#define Nd 12\n');
header.write('#define Nl 13\n');
header.write('#define No 14\n');
header.write('#define Pc 15\n');
header.write('#define Pd 16\n');
header.write('#define Pe 17\n');
header.write('#define Pf 18\n');
header.write('#define Pi 19\n');
header.write('#define Po 20\n');
header.write('#define Ps 21\n');
header.write('#define Sc 22\n');
header.write('#define Sk 23\n');
header.write('#define Sm 24\n');
header.write('#define So 25\n');
header.write('#define Zl 26\n');
header.write('#define Zp 27\n');
header.write('#define Zs 28\n');
header.write('\n');
header.write('#define UNI_TAB \\\n');
with open('unicode_data.txt') as file:
for line in file:
row = line.split(';')
code = row[0].strip()
cat = row[2].strip()
lower = row[13].strip()
upper = row[14].strip()
if lower == '':
lower = code
if upper == '' or upper == '\n':
upper = code
header.write(' X(' + \
'0x' + code + ', ' + \
cat + ', ' + \
'0x' + lower + ', ' + \
'0x' + upper + ')\\\n');
header.write('\n');
header.close();

34626
code/unicode/unicode_data.txt Normal file

File diff suppressed because it is too large Load Diff

127
code/unicode/wctype.c Normal file
View File

@ -0,0 +1,127 @@
#include <wctype.h>
#include <string.h>
#include "unicode.h"
static inline int char_cat(wint_t wc) {
#define X(code, cat, l, u) case code: return cat;
switch(wc) {
UNI_TAB
}
#undef X
return -1;
}
int iswctype(wint_t wc, wctype_t desc) {
return desc(wc);
}
wctype_t wctype(const char *property) {
if(!strcmp(property, "alnum")) return iswalnum;
if(!strcmp(property, "alpha")) return iswalpha;
if(!strcmp(property, "blank")) return iswblank;
if(!strcmp(property, "cntrl")) return iswcntrl;
if(!strcmp(property, "digit")) return iswdigit;
if(!strcmp(property, "graph")) return iswgraph;
if(!strcmp(property, "lower")) return iswlower;
if(!strcmp(property, "print")) return iswprint;
if(!strcmp(property, "punct")) return iswpunct;
if(!strcmp(property, "space")) return iswspace;
if(!strcmp(property, "upper")) return iswupper;
if(!strcmp(property, "xdigit")) return iswxdigit;
return NULL;
}
wint_t towctrans(wint_t wc, wctrans_t desc) {
return desc(wc);
}
wctrans_t wctrans(const char *property) {
if(!strcmp(property, "tolower")) return towlower;
if(!strcmp(property, "toupper")) return towupper;
return NULL;
}
int iswalnum(wint_t wc) {
return iswalpha(wc) || iswdigit(wc);
}
int iswalpha(wint_t wc) {
return iswupper(wc) || iswlower(wc);
}
int iswblank(wint_t wc) {
return wc == ' ' || wc == '\t';
}
int iswcntrl(wint_t wc) {
return char_cat(wc) == Cc;
}
int iswdigit(wint_t wc) {
return '0' <= wc && wc <= '9';
}
int iswgraph(wint_t wc) {
return iswprint(wc) && !iswspace(wc);
}
int iswlower(wint_t wc) {
return char_cat(wc) == Ll;
}
int iswprint(wint_t wc) {
switch(char_cat(wc)) {
case Cc:
case Cf:
case Co:
case Cs:
return 0;
}
return 1;
}
int iswpunct(wint_t wc) {
switch(char_cat(wc)) {
case Pc:
case Pd:
case Pe:
case Pf:
case Pi:
case Po:
case Ps:
return 1;
}
return 0;
}
int iswspace(wint_t wc) {
return char_cat(wc) == Zs;
}
int iswupper(wint_t wc) {
return char_cat(wc) == Lu;
}
int iswxdigit(wint_t wc) {
return iswdigit(wc) || ('a'<=wc && wc<='f') || ('A'<= wc && wc<='F');
}
wint_t towlower(wint_t wc) {
#define X(code, cat, l, u) case code: return l;
switch(wc) {
UNI_TAB
}
#undef X
return wc;
}
wint_t towupper(wint_t wc) {
#define X(code, cat, l, u) case code: return u;
switch(wc) {
UNI_TAB
}
#undef X
return wc;
}

View File

@ -2,8 +2,8 @@
#pragma once
typedef int wint_t;
wctrans_t;
wctype_t;
typedef int (*wctrans_t)(wint_t wc);
typedef int (*wctype_t)(wint_t wc);
#ifndef WEOF
#define WEOF 0

10
test/test_wctype.c Normal file
View File

@ -0,0 +1,10 @@
#include <wctype.h>
#include <stdio.h>
int main() {
if(iswalpha(L'я')) {
printf("Symbol 'я' is indeed a letter\n");
}
return 0;
}

34
unicode.h Normal file
View File

@ -0,0 +1,34 @@
#pragma once
#define Cc 0
#define Cf 1
#define Co 2
#define Cs 3
#define Ll 4
#define Lm 5
#define Lo 6
#define Lt 7
#define Lu 8
#define Mc 9
#define Me 10
#define Mn 11
#define Nd 12
#define Nl 13
#define No 14
#define Pc 15
#define Pd 16
#define Pe 17
#define Pf 18
#define Pi 19
#define Po 20
#define Ps 21
#define Sc 22
#define Sk 23
#define Sm 24
#define So 25
#define Zl 26
#define Zp 27
#define Zs 28
#define UNI_TAB \