Factor out some unicode stuff also it compiles on linux

This commit is contained in:
bumbread 2022-06-28 22:49:30 +11:00
parent 0d58124c26
commit 328e9f6c35
9 changed files with 34865 additions and 34744 deletions

View File

@ -1,20 +1,53 @@
GNUFLAGS=-Werror -Wall -Iinc -Isrc/win
CLFLAGS=/I:inc /I:src/win /link /incremental:no /subsystem:windows /nodefaultlib kernel32.lib
CC=clang
CFLAGS=$(GNUFLAGS)
LDFLAGS=/nologo /nodefaultlib /entry:mainCRTStartup
SRC_DIR := src
OBJ_DIR := bin
SRC_FILES := $(wildcard $(SRC_DIR)/code/*.c) $(wildcard $(SRC_DIR)/win/*.c)
OBJ_FILES := $(patsubst $(SRC_DIR)/%.c,$(OBJ_DIR)/%.obj,$(SRC_FILES))
IFLAGS := -Iinc -Isrc/win
ciabatta.lib: $(OBJ_FILES)
lib $(LDFLAGS) /out:$@ $^
# Detect target operating system
ifeq ($(OS),Windows_NT)
PLATFORM := win
else
PLATFORM := $(shell sh -c 'uname 2>/dev/null || echo Unknown')
PLATFORM := $(shell sh -c 'echo $(PLATFORM) | tr A-Z a-z')
endif
ifeq ($(PLATFORM),Unknown)
echo Unknown platform
exit 1
endif
# If we're compiling under windows we'll link to these libraries
ifeq ($(PLATFORM),win)
LIBS := -lDbghelp -lkernel32 -luser32 -lshell32
endif
# Compiler flags
ifeq ($(CC), clang)
CFLAGS=$(GNUFLAGS) -Werror -Wall -msse2 $(IFLAGS)
else
echo BAD CC
exit 1
endif
# Figure out what we want to compile at the end
SRC_FILES := $(wildcard $(SRC_DIR)/code/*.c) $(wildcard $(SRC_DIR)/$(PLATFORM)/*.c)
OBJ_FILES := $(patsubst $(SRC_DIR)/%.c,$(OBJ_DIR)/%.obj,$(SRC_FILES))
$(OBJ_DIR)/%.obj: $(SRC_DIR)/%.c
$(CC) $(CFLAGS) -c -o $@ $<
.PHONY: ciabatta.lib
ciabatta.lib: $(OBJ_FILES)
llvm-ar rc $@ $^
test: ciabatta.lib
clang test/test_$(test).c ciabatta.lib -std=c11 $(LIBS) -nostdlib -Iinc
clean:
rd/s/q bin || true
rm -Rf bin || true
mkdir bin
mkdir bin/code
mkdir bin/win
mkdir bin/linux
.PHONY: ciabatta.lib test

View File

@ -4,10 +4,10 @@
#include <stddef.h>
#include <stdint.h>
typedef struct mbstate_t mbstate_t;
typedef uint_least16_t char16_t;
typedef uint_least32_t char32_t;
typedef struct mbstate_t mbstate_t;
struct mbstate_t {
char16_t leftover;
};

57
inc/unicode.h Normal file
View File

@ -0,0 +1,57 @@
#pragma once
#include <stdint.h>
#include <stddef.h>
typedef uint_least16_t char16_t;
typedef uint_least32_t char32_t;
typedef int32_t uchar_t;
typedef enum {
UCHAR_Invalid,
UCHAR_Cc,
UCHAR_Cf,
UCHAR_Co,
UCHAR_Cs,
UCHAR_Ll,
UCHAR_Lm,
UCHAR_Lo,
UCHAR_Lt,
UCHAR_Lu,
UCHAR_Mc,
UCHAR_Me,
UCHAR_Mn,
UCHAR_Nd,
UCHAR_Nl,
UCHAR_No,
UCHAR_Pc,
UCHAR_Pd,
UCHAR_Pe,
UCHAR_Pf,
UCHAR_Pi,
UCHAR_Po,
UCHAR_Ps,
UCHAR_Sc,
UCHAR_Sk,
UCHAR_Sm,
UCHAR_So,
UCHAR_Zl,
UCHAR_Zp,
UCHAR_Zs,
} uchar_class;
int uni_classify(uchar_t ch);
int uni_valid(uchar_t ch);
uchar_t uni_to_lower(uchar_t u);
uchar_t uni_to_upper(uchar_t u);
int utf8_dec (char const *restrict utf8_str, uchar_t *restrict ch);
int utf16_dec (char const *restrict utf16_str, uchar_t *restrict ch);
int utf8_dec_s (char const *restrict utf8_str, size_t len, uchar_t *restrict ch);
int utf16_dec_s(char const *restrict utf16_str, size_t len, uchar_t *restrict ch);
int utf8_enc (char *utf8_str, uchar_t ch);
int utf16_enc (char *utf16_str, uchar_t ch);
int utf8_enc_s (char *utf8_str, size_t len, uchar_t ch);
int utf16_enc_s(char *utf16_str, size_t len, uchar_t ch);

View File

@ -1,6 +1,11 @@
#include <fenv.h>
#include <intrin.h>
#if defined(_WIN32)
#include <intrin.h>
#else
#include <x86intrin.h>
#endif
#define fe_masks(excepts) (((fexcept_t)(excepts)) << 7)
#define fe_flags(excepts) ((fexcept_t)(excepts))

17
src/code/unicode.c Normal file
View File

@ -0,0 +1,17 @@
#include <unicode.h>
#include "unicode/data.h"
int uni_classify(uchar_t cp) {
return uni_codepoints[cp].cat;
}
uchar_t uni_to_lower(uchar_t cp) {
return uni_codepoints[cp].lower;
}
uchar_t uni_to_upper(uchar_t cp) {
return uni_codepoints[cp].upper;
}

View File

@ -9,42 +9,14 @@ os.chdir(dname)
with open('data.h', 'w') as header:
header.write('\n');
header.write('#pragma once\n\n');
header.write('#define Cc 0\n');
header.write('#define Cf 1\n');
header.write('#define Co 2\n');
header.write('#define Cs 3\n');
header.write('#define Ll 4\n');
header.write('#define Lm 5\n');
header.write('#define Lo 6\n');
header.write('#define Lt 7\n');
header.write('#define Lu 8\n');
header.write('#define Mc 9\n');
header.write('#define Me 10\n');
header.write('#define Mn 11\n');
header.write('#define Nd 12\n');
header.write('#define Nl 13\n');
header.write('#define No 14\n');
header.write('#define Pc 15\n');
header.write('#define Pd 16\n');
header.write('#define Pe 17\n');
header.write('#define Pf 18\n');
header.write('#define Pi 19\n');
header.write('#define Po 20\n');
header.write('#define Ps 21\n');
header.write('#define Sc 22\n');
header.write('#define Sk 23\n');
header.write('#define Sm 24\n');
header.write('#define So 25\n');
header.write('#define Zl 26\n');
header.write('#define Zp 27\n');
header.write('#define Zs 28\n');
header.write('#include<unicode.h>\n');
header.write(
'''
struct _uni_elm {
wint_t code;
wint_t cat;
wint_t lower;
wint_t upper;
uchar_t code;
int cat;
uchar_t lower;
uchar_t upper;
} uni_codepoints[] = {
''');
@ -60,10 +32,11 @@ struct _uni_elm {
if upper == '' or upper == '\n':
upper = code
header.write(' {' + \
'0x' + code + ', ' + \
cat + ', ' + \
'0x' + lower + ', ' + \
'0x' + upper + '},\n');
'0x' + code + ', ' + \
'UCHAR_' + cat + ', ' + \
'0x' + lower + ', ' + \
'0x' + upper + '},\n'
);
header.write('};\n\n');
header.close();

File diff suppressed because it is too large Load Diff

View File

@ -2,11 +2,7 @@
#include <wctype.h>
#include <string.h>
#include "unicode/data.h"
static inline int char_cat(wint_t wc) {
return uni_codepoints[wc].cat;
}
#include <unicode.h>
int iswctype(wint_t wc, wctype_t desc) {
return desc(wc);
@ -51,7 +47,7 @@ int iswblank(wint_t wc) {
}
int iswcntrl(wint_t wc) {
return char_cat(wc) == Cc;
return uni_classify(wc) == UCHAR_Cc;
}
int iswdigit(wint_t wc) {
@ -63,33 +59,33 @@ int iswgraph(wint_t wc) {
}
int iswlower(wint_t wc) {
return char_cat(wc) == Ll;
return uni_classify(wc) == UCHAR_Ll;
}
int iswprint(wint_t wc) {
switch(char_cat(wc)) {
case Cc:
case Cf:
case Co:
case Cs:
switch(uni_classify(wc)) {
case UCHAR_Cc:
case UCHAR_Cf:
case UCHAR_Co:
case UCHAR_Cs:
return 0;
}
return 1;
}
int iswpunct(wint_t wc) {
switch(char_cat(wc)) {
case Pc:
case Pd:
case Pe:
case Pf:
case Pi:
case Po:
case Ps:
case Sk:
case Sc:
case Sm:
case So:
switch(uni_classify(wc)) {
case UCHAR_Pc:
case UCHAR_Pd:
case UCHAR_Pe:
case UCHAR_Pf:
case UCHAR_Pi:
case UCHAR_Po:
case UCHAR_Ps:
case UCHAR_Sk:
case UCHAR_Sc:
case UCHAR_Sm:
case UCHAR_So:
return 1;
}
return 0;
@ -109,7 +105,7 @@ int iswspace(wint_t wc) {
}
int iswupper(wint_t wc) {
return char_cat(wc) == Lu;
return uni_classify(wc) == UCHAR_Lu;
}
int iswxdigit(wint_t wc) {
@ -117,9 +113,9 @@ int iswxdigit(wint_t wc) {
}
wint_t towlower(wint_t wc) {
return uni_codepoints[wc].lower;
return uni_to_lower(wc);
}
wint_t towupper(wint_t wc) {
return uni_codepoints[wc].upper;
return uni_to_upper(wc);
}

View File

@ -0,0 +1,68 @@
#include <unistd.h>
#include <stddef.h>
#include <stdlib.h>
#include <signal.h>
#include <locale.h>
// Exit routines
#define ATEXIT_FUNC_COUNT 64
#define ATQEXIT_FUNC_COUNT 64
static void (*atexit_funcs [ATEXIT_FUNC_COUNT])(void);
static void (*atqexit_funcs[ATQEXIT_FUNC_COUNT])(void);
static int atexit_func_count;
static int atqexit_func_count;
extern int main(int argc, char** argv);
void _start() {
srand(0);
setlocale(LC_ALL, "C");
int argc = 0;
char *argv[1] = {NULL};
int code = main(argc, argv);
_exit(code);
}
_Noreturn void quick_exit(int status) {
while(atqexit_func_count--) {
atqexit_funcs[atqexit_func_count]();
}
_exit(status);
}
_Noreturn void exit(int status) {
while(atexit_func_count--) {
atexit_funcs[atqexit_func_count]();
}
// _close_io();
_exit(status);
}
_Noreturn void _Exit(int status) {
_exit(status);
}
_Noreturn void abort(void) {
// raise(SIGABRT);
_exit(-69);
}
int atexit(void (*func)(void)) {
if (atexit_func_count >= ATEXIT_FUNC_COUNT) {
return 0;
}
atexit_funcs[atexit_func_count++] = func;
return 1;
}
int at_quick_exit(void (*func)(void)) {
if(atqexit_func_count >= ATQEXIT_FUNC_COUNT) {
return 0;
}
atqexit_funcs[atqexit_func_count++] = func;
return 1;
}