mirror of https://github.com/flysand7/ciabatta.git
Unicode shit (wctype.h)
This commit is contained in:
parent
293b812a03
commit
0ef1894917
2
bake.cmd
2
bake.cmd
|
@ -51,5 +51,5 @@ del build\*.obj
|
|||
|
||||
:skip_crt_compilation
|
||||
echo Compiling test..
|
||||
clang -fno-builtin test\test_printf.c ciabatta.lib -std=c11 -lkernel32 -luser32 -lshell32 -nostdlib %CIABATTA_OPTIONS%
|
||||
clang -fno-builtin test\test_wctype.c ciabatta.lib -std=c11 -lkernel32 -luser32 -lshell32 -nostdlib %CIABATTA_OPTIONS%
|
||||
::cl test\test_math.c /Iinc -D_CRT_SECURE_NO_WARNINGS /Z7 /link ciabatta.lib kernel32.lib user32.lib shell32.lib -nostdlib -nodefaultlibs
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
|
||||
The unicode-based functions work based on official unicode data. You can find
|
||||
the file with Unicode data at:
|
||||
|
||||
https://unicode.org/Public/UNIDATA/UnicodeData.txt
|
||||
|
||||
This file is placed into this directory by the name unicode_data.txt. To update
|
||||
the unicode standard a new file is put under that name, then unicode_compile.py
|
||||
is ran with python interpreter. It will generate a new unicode.h header file.
|
||||
|
||||
DO NOT MODIFY unicode.h DIRECTLY BRUH.
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,62 @@
|
|||
|
||||
import os;
|
||||
import sys;
|
||||
|
||||
abspath = os.path.abspath(sys.argv[0])
|
||||
dname = os.path.dirname(abspath)
|
||||
os.chdir(dname)
|
||||
|
||||
with open('unicode.h', 'w') as header:
|
||||
header.write('\n');
|
||||
header.write('#pragma once\n\n');
|
||||
header.write('#define Cc 0\n');
|
||||
header.write('#define Cf 1\n');
|
||||
header.write('#define Co 2\n');
|
||||
header.write('#define Cs 3\n');
|
||||
header.write('#define Ll 4\n');
|
||||
header.write('#define Lm 5\n');
|
||||
header.write('#define Lo 6\n');
|
||||
header.write('#define Lt 7\n');
|
||||
header.write('#define Lu 8\n');
|
||||
header.write('#define Mc 9\n');
|
||||
header.write('#define Me 10\n');
|
||||
header.write('#define Mn 11\n');
|
||||
header.write('#define Nd 12\n');
|
||||
header.write('#define Nl 13\n');
|
||||
header.write('#define No 14\n');
|
||||
header.write('#define Pc 15\n');
|
||||
header.write('#define Pd 16\n');
|
||||
header.write('#define Pe 17\n');
|
||||
header.write('#define Pf 18\n');
|
||||
header.write('#define Pi 19\n');
|
||||
header.write('#define Po 20\n');
|
||||
header.write('#define Ps 21\n');
|
||||
header.write('#define Sc 22\n');
|
||||
header.write('#define Sk 23\n');
|
||||
header.write('#define Sm 24\n');
|
||||
header.write('#define So 25\n');
|
||||
header.write('#define Zl 26\n');
|
||||
header.write('#define Zp 27\n');
|
||||
header.write('#define Zs 28\n');
|
||||
header.write('\n');
|
||||
header.write('#define UNI_TAB \\\n');
|
||||
|
||||
with open('unicode_data.txt') as file:
|
||||
for line in file:
|
||||
row = line.split(';')
|
||||
code = row[0].strip()
|
||||
cat = row[2].strip()
|
||||
lower = row[13].strip()
|
||||
upper = row[14].strip()
|
||||
if lower == '':
|
||||
lower = code
|
||||
if upper == '' or upper == '\n':
|
||||
upper = code
|
||||
header.write(' X(' + \
|
||||
'0x' + code + ', ' + \
|
||||
cat + ', ' + \
|
||||
'0x' + lower + ', ' + \
|
||||
'0x' + upper + ')\\\n');
|
||||
|
||||
header.write('\n');
|
||||
header.close();
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,127 @@
|
|||
|
||||
#include <wctype.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "unicode.h"
|
||||
|
||||
static inline int char_cat(wint_t wc) {
|
||||
#define X(code, cat, l, u) case code: return cat;
|
||||
switch(wc) {
|
||||
UNI_TAB
|
||||
}
|
||||
#undef X
|
||||
return -1;
|
||||
}
|
||||
|
||||
int iswctype(wint_t wc, wctype_t desc) {
|
||||
return desc(wc);
|
||||
}
|
||||
|
||||
wctype_t wctype(const char *property) {
|
||||
if(!strcmp(property, "alnum")) return iswalnum;
|
||||
if(!strcmp(property, "alpha")) return iswalpha;
|
||||
if(!strcmp(property, "blank")) return iswblank;
|
||||
if(!strcmp(property, "cntrl")) return iswcntrl;
|
||||
if(!strcmp(property, "digit")) return iswdigit;
|
||||
if(!strcmp(property, "graph")) return iswgraph;
|
||||
if(!strcmp(property, "lower")) return iswlower;
|
||||
if(!strcmp(property, "print")) return iswprint;
|
||||
if(!strcmp(property, "punct")) return iswpunct;
|
||||
if(!strcmp(property, "space")) return iswspace;
|
||||
if(!strcmp(property, "upper")) return iswupper;
|
||||
if(!strcmp(property, "xdigit")) return iswxdigit;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
wint_t towctrans(wint_t wc, wctrans_t desc) {
|
||||
return desc(wc);
|
||||
}
|
||||
|
||||
wctrans_t wctrans(const char *property) {
|
||||
if(!strcmp(property, "tolower")) return towlower;
|
||||
if(!strcmp(property, "toupper")) return towupper;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int iswalnum(wint_t wc) {
|
||||
return iswalpha(wc) || iswdigit(wc);
|
||||
}
|
||||
|
||||
int iswalpha(wint_t wc) {
|
||||
return iswupper(wc) || iswlower(wc);
|
||||
}
|
||||
|
||||
int iswblank(wint_t wc) {
|
||||
return wc == ' ' || wc == '\t';
|
||||
}
|
||||
|
||||
int iswcntrl(wint_t wc) {
|
||||
return char_cat(wc) == Cc;
|
||||
}
|
||||
|
||||
int iswdigit(wint_t wc) {
|
||||
return '0' <= wc && wc <= '9';
|
||||
}
|
||||
|
||||
int iswgraph(wint_t wc) {
|
||||
return iswprint(wc) && !iswspace(wc);
|
||||
}
|
||||
|
||||
int iswlower(wint_t wc) {
|
||||
return char_cat(wc) == Ll;
|
||||
}
|
||||
|
||||
int iswprint(wint_t wc) {
|
||||
switch(char_cat(wc)) {
|
||||
case Cc:
|
||||
case Cf:
|
||||
case Co:
|
||||
case Cs:
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
int iswpunct(wint_t wc) {
|
||||
switch(char_cat(wc)) {
|
||||
case Pc:
|
||||
case Pd:
|
||||
case Pe:
|
||||
case Pf:
|
||||
case Pi:
|
||||
case Po:
|
||||
case Ps:
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int iswspace(wint_t wc) {
|
||||
return char_cat(wc) == Zs;
|
||||
}
|
||||
|
||||
int iswupper(wint_t wc) {
|
||||
return char_cat(wc) == Lu;
|
||||
}
|
||||
|
||||
int iswxdigit(wint_t wc) {
|
||||
return iswdigit(wc) || ('a'<=wc && wc<='f') || ('A'<= wc && wc<='F');
|
||||
}
|
||||
|
||||
wint_t towlower(wint_t wc) {
|
||||
#define X(code, cat, l, u) case code: return l;
|
||||
switch(wc) {
|
||||
UNI_TAB
|
||||
}
|
||||
#undef X
|
||||
return wc;
|
||||
}
|
||||
|
||||
wint_t towupper(wint_t wc) {
|
||||
#define X(code, cat, l, u) case code: return u;
|
||||
switch(wc) {
|
||||
UNI_TAB
|
||||
}
|
||||
#undef X
|
||||
return wc;
|
||||
}
|
|
@ -2,8 +2,8 @@
|
|||
#pragma once
|
||||
|
||||
typedef int wint_t;
|
||||
wctrans_t;
|
||||
wctype_t;
|
||||
typedef int (*wctrans_t)(wint_t wc);
|
||||
typedef int (*wctype_t)(wint_t wc);
|
||||
|
||||
#ifndef WEOF
|
||||
#define WEOF 0
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
|
||||
#include <wctype.h>
|
||||
#include <stdio.h>
|
||||
|
||||
int main() {
|
||||
if(iswalpha(L'я')) {
|
||||
printf("Symbol 'я' is indeed a letter\n");
|
||||
}
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#define Cc 0
|
||||
#define Cf 1
|
||||
#define Co 2
|
||||
#define Cs 3
|
||||
#define Ll 4
|
||||
#define Lm 5
|
||||
#define Lo 6
|
||||
#define Lt 7
|
||||
#define Lu 8
|
||||
#define Mc 9
|
||||
#define Me 10
|
||||
#define Mn 11
|
||||
#define Nd 12
|
||||
#define Nl 13
|
||||
#define No 14
|
||||
#define Pc 15
|
||||
#define Pd 16
|
||||
#define Pe 17
|
||||
#define Pf 18
|
||||
#define Pi 19
|
||||
#define Po 20
|
||||
#define Ps 21
|
||||
#define Sc 22
|
||||
#define Sk 23
|
||||
#define Sm 24
|
||||
#define So 25
|
||||
#define Zl 26
|
||||
#define Zp 27
|
||||
#define Zs 28
|
||||
|
||||
#define UNI_TAB \
|
Loading…
Reference in New Issue