From 67c4942b60b441f4231a6467d2ea72d87b0328d9 Mon Sep 17 00:00:00 2001 From: Alex Baines Date: Sun, 7 Mar 2021 21:59:43 +0000 Subject: [PATCH] add unfinished hmmlib rewrite "hmmlib2" --- hmmlib2/Makefile | 5 + hmmlib2/hmmlib.c | 589 +++++++++++++++++++++++++++++++++++++++++ hmmlib2/hmmlib.h | 114 ++++++++ hmmlib2/obbg032_1.hmml | 25 ++ 4 files changed, 733 insertions(+) create mode 100644 hmmlib2/Makefile create mode 100644 hmmlib2/hmmlib.c create mode 100644 hmmlib2/hmmlib.h create mode 100644 hmmlib2/obbg032_1.hmml diff --git a/hmmlib2/Makefile b/hmmlib2/Makefile new file mode 100644 index 0000000..e72a78a --- /dev/null +++ b/hmmlib2/Makefile @@ -0,0 +1,5 @@ +# this is just for the test program (WON build should also work) +# when the library is finished, it should be #include-able + +hmmlib: hmmlib.c + gcc -g $< -o $@ diff --git a/hmmlib2/hmmlib.c b/hmmlib2/hmmlib.c new file mode 100644 index 0000000..62e2471 --- /dev/null +++ b/hmmlib2/hmmlib.c @@ -0,0 +1,589 @@ +#if 0 +gcc -g "$0" -o "${0%%.c}" +exit 0 +#endif +#include "hmmlib.h" +#include +#include +#include +#include +#include + +// +// TODO: +// parse quotes +// marker parameter + episode number +// track line numbers somehow +// seems to be only needed for the error-func, so... +// could probably do it after-the-fact, counting \n until we hit p->cursor +// port over the hmml_dump function +// test various files +// cleanup +// + +#define HSTX(x) x, sizeof(x)-1 +#define HSTR(x) (struct _hmml_str){ HSTX(x) } + +#ifndef MALLOC + #define MALLOC malloc +#endif + +#ifndef REALLOC + #define REALLOC realloc +#endif + +#ifndef countof + #define countof(x) (sizeof(x)/sizeof(*x)) +#endif + +#define _hmml_debug(...) +//#define _hmml_debug printf + +struct _hmml_parser { + HMML_Output out; + char* cursor; + jmp_buf err_buf; + intptr_t* free_list; +}; + +struct _hmml_str { + char* ptr; + size_t len; +}; + +// memory management boilerplate stuff + +static void* _hmml_store_ptr(struct _hmml_parser* p, void* input) +{ + uintptr_t* ptr; + if(p->free_list) { + ptr = p->free_list; + if(ptr[1] + 1 == ptr[0]) { + size_t n = ptr[0] << 1; + ptr = REALLOC(ptr, n * sizeof(uintptr_t)); + ptr[0] = n; + } + ptr[ptr[1]] = (intptr_t)input; + ptr[1]++; + } else { + ptr = MALLOC(8 * sizeof(uintptr_t)); + ptr[0] = 8; + ptr[1] = 3; + ptr[2] = (intptr_t)input; + } + p->free_list = ptr; + return input; +} + +static char* _hmml_persist_str(struct _hmml_parser* p, const struct _hmml_str str) +{ + char* mem = MALLOC(str.len+1); + memcpy(mem, str.ptr, str.len); + mem[str.len] = '\0'; + return _hmml_store_ptr(p, mem); +} + +static void _hmml_persist_array_fn(struct _hmml_parser* p, void** out, size_t* out_count, void* in, size_t in_size) +{ + void* base; + if(!*out) { + base = MALLOC(in_size + sizeof(size_t)); + _hmml_store_ptr(p, base); + *(size_t*)base = p->free_list[1]-1; + } else { + base = (char*)(*out) - sizeof(size_t); + base = REALLOC(base, (*out_count + 1) * in_size + sizeof(size_t)); + size_t free_list_off = *(size_t*)base; + p->free_list[free_list_off] = (intptr_t)base; + } + + *out = (char*)base + sizeof(size_t); + memcpy((char*)*out + (*out_count * in_size), in, in_size); + ++(*out_count); +} + +#define _hmml_persist_array(p, out, out_count, in) \ + _hmml_persist_array_fn((p), (void**)(out), (out_count), &(in), sizeof(in)) + +// error handling + +#define _hmml_err(p, fmt, ...) \ + _hmml_err_fn((p), fmt "\n", ##__VA_ARGS__) + +__attribute__((noreturn)) +static void _hmml_err_fn(struct _hmml_parser* p, const char* fmt, ...) +{ + static char error_buf[4096]; + + va_list va; + va_start(va, fmt); + int n = vsnprintf(error_buf, sizeof(error_buf), fmt, va); + va_end(va); + + // TODO: figure out / keep track of the line number + + p->out.error.message = _hmml_persist_str(p, (struct _hmml_str){ error_buf, n }); + longjmp(p->err_buf, 1); +} + +// actual parsing stuff + +static void _hmml_skip_ws(struct _hmml_parser* p) +{ + p->cursor += strspn(p->cursor, " \t\r\n"); +} + +static _Bool _hmml_str_eq(struct _hmml_str a, struct _hmml_str b) +{ + return a.len == b.len && memcmp(a.ptr, b.ptr, a.len) == 0; +} + +static _Bool _hmml_unesc(char in, char* out) +{ + if(strchr("[]:@~\\\"", in)) { + *out = in; + return 1; + } else { + return 0; + } +} + +static char* _hmml_read_attr(struct _hmml_parser* p, char* mem, size_t mem_size) +{ + char* src = p->cursor; + char* dst = mem; + + if(*src == '"') { + ++src; + while(*src && *src != '"' && src - p->cursor < mem_size) { + char converted; + if(*src == '\\' && _hmml_unesc(src[1], &converted)) { + *dst++ = converted; + src += 2; + } else { + *dst++ = *src++; + } + } + + if(*src != '"') { + _hmml_err(p, "Partially quoted attribute"); + } + + *dst = '\0'; + p->cursor = src+1; + } else { + size_t n = strcspn(src, " ]\r\n\t"); + if(n >= mem_size) { + _hmml_err(p, "Attribute [%.10s...] too long", p->cursor); + } + memcpy(dst, src, n); + dst += n; + *dst = '\0'; + p->cursor += n; + } + + return dst; +} + +static void _hmml_read_kv(struct _hmml_parser* p, struct _hmml_str* key, struct _hmml_str* val) +{ + static char key_memory[64]; + static char val_memory[1024]; + + size_t key_len = strcspn(p->cursor, " \r\n\t="); + if(key_len >= sizeof(key_memory)) { + _hmml_err(p, "Attribute key [%.10s...] too long", p->cursor); + } + + memcpy(key_memory, p->cursor, key_len); + key_memory[key_len] = '\0'; + p->cursor += key_len; + + _hmml_skip_ws(p); + + if(*p->cursor != '=') { + _hmml_err(p, "Expected '=', got [%.3s]", p->cursor); + } + + ++p->cursor; + + char* src = p->cursor; + char* dst = _hmml_read_attr(p, val_memory, sizeof(val_memory)); + + _hmml_debug("read kv [%s] = [%s]\n", key_memory, val_memory); + + key->ptr = key_memory; + key->len = key_len; + + val->ptr = val_memory; + val->len = dst - val_memory; +} + +static HMML_Marker _hmml_parse_marker(struct _hmml_parser* p) +{ + static char marker_mem[4096]; + + // the extended markers are inside [ ] and can contain parameters + _Bool extended = *p->cursor == '['; + if(extended) { + ++p->cursor; + } + + HMML_Marker marker = {}; + + char c = *p->cursor; + if(c == '~') { + marker.type = HMML_PROJECT; + } else if(c == '@') { + marker.type = HMML_MEMBER; + } else if(c == ':') { + marker.type = HMML_CATEGORY; + } else { + _hmml_err(p, "Unknown marker type"); + } + + ++p->cursor; + + char* end = _hmml_read_attr(p, marker_mem, sizeof(marker_mem)); + marker.marker = _hmml_persist_str(p, (struct _hmml_str){ marker_mem, end - marker_mem }); + + _hmml_skip_ws(p); + + if(*p->cursor == '#') { + // TODO: marker.episode = smth + } else { + // TODO: marker.parameter = _hmml_persist_str(p, (struct _hmml_str){ }); + } + + if(extended) { + if(*p->cursor != ']') { + _hmml_err(p, "Expected ']'"); + } + ++p->cursor; + } + + return marker; +} + +static HMML_Reference _hmml_parse_ref(struct _hmml_parser* p) +{ + HMML_Reference ref = {}; + + struct str_attr { + struct _hmml_str str; + char** dest; + } str_attrs[] = { + { HSTR("site") , &ref.site }, + { HSTR("page") , &ref.page }, + { HSTR("url") , &ref.url }, + { HSTR("title") , &ref.title }, + { HSTR("article") , &ref.article }, + { HSTR("author") , &ref.author }, + { HSTR("editor") , &ref.editor }, + { HSTR("publisher"), &ref.publisher }, + { HSTR("isbn") , &ref.isbn }, + }; + + for(;;) { +next_attr: + _hmml_skip_ws(p); + + if(*p->cursor == ']') { + ++p->cursor; + break; + } + + struct _hmml_str key, value; + _hmml_read_kv(p, &key, &value); + + for(int i = 0; i < countof(str_attrs); ++i) { + struct str_attr* s = str_attrs + i; + if(_hmml_str_eq(key, s->str)) { + *s->dest = _hmml_persist_str(p, value); + goto next_attr; + } + } + + _hmml_err(p, "Unknown reference attribute"); + } + + return ref; +} + +static void _hmml_parse_timecode(struct _hmml_parser* p, HMML_Annotation* anno) +{ + int h = 0, m = 0, s = 0, offset = 0; + int count = sscanf(p->cursor, "[%u:%u%n", &m, &s, &offset); + + if(count < 2) { + _hmml_err(p, "Unable to parse timecode"); + } + + p->cursor += offset; + char c = *p->cursor; + + if(c == ':') { + int tmp; + offset = 0; + if(sscanf(p->cursor, ":%u]%n", &tmp, &offset) != 1 || offset == 0) { + _hmml_err(p, "Unable to parse 3-part timecode"); + } + + h = m; + m = s; + s = tmp; + + p->cursor += offset; + + } else if(c != ']') { + _hmml_err(p, "Unable to parse timecode"); + } else { + ++p->cursor; + } + + if(s >= 60) { + _hmml_err(p, "Seconds cannot exceed 59"); + } + + if(m >= 60) { + _hmml_err(p, "Minutes cannot exceed 59"); + } + + anno->h = h; + anno->m = m; + anno->s = s; +} + +static void _hmml_parse_text(struct _hmml_parser* p, HMML_Annotation* anno) +{ + static char text_mem[4096]; + char* out = text_mem; + + memset(text_mem, 0, sizeof(text_mem)); + + for(;;) { + size_t n = strcspn(p->cursor, "\\[]:@~"); + char c = p->cursor[n]; + + if(out + n > text_mem + sizeof(text_mem)) { + _hmml_err(p, "Not enough text memory"); + } + + memcpy(out, p->cursor, n); + p->cursor += n; + out += n; + + if(c == '\0') { + _hmml_err(p, "Unexpected EOF"); + } + + else if(c == ']') { + ++p->cursor; + break; + } + + else if(c == '\\') { + *out++ = p->cursor[1]; + p->cursor += 2; + } + + else if(c == '[') { + if(strncmp(p->cursor + 1, "ref", 3) == 0) { + p->cursor += 4; + HMML_Reference ref = _hmml_parse_ref(p); + _hmml_persist_array(p, &anno->references, &anno->reference_count, ref); + } else { + HMML_Marker m = _hmml_parse_marker(p); + _hmml_persist_array(p, &anno->markers, &anno->marker_count, m); + } + } + + else { + // it is a @ ~ or : marker without parameters + HMML_Marker m = _hmml_parse_marker(p); + _hmml_persist_array(p, &anno->markers, &anno->marker_count, m); + } + + if(out - text_mem >= sizeof(text_mem)) { + _hmml_err(p, "Not enough text memory"); + } + } + + anno->text = _hmml_persist_str(p, (struct _hmml_str){ text_mem, out - text_mem }); +} + +static void _hmml_parse_annotations(struct _hmml_parser* p) +{ + for(;;) { + _hmml_skip_ws(p); + + if(*p->cursor == '\0') { + _hmml_err(p, "Unexpected EOF"); + } + + if(strncmp(p->cursor, "[/video]", 8) == 0) { + break; + } + + HMML_Annotation anno = {}; + + _hmml_parse_timecode(p, &anno); + + if(*p->cursor != '[') { + _hmml_err(p, "Expected '['"); + } + + if(p->cursor[1] == '@') { + HMML_Marker m = _hmml_parse_marker(p); + anno.author = m.marker; + } + + ++p->cursor; + + _hmml_parse_text(p, &anno); + + if(p->cursor[0] == '[' && p->cursor[1] == ':') { + HMML_Marker m = _hmml_parse_marker(p); + _hmml_persist_array(p, &anno.markers, &anno.marker_count, m); + } + + if(p->cursor[0] == '[' && p->cursor[1] == 'q') { + //_hmml_parse_quote(); + } + + _hmml_persist_array(p, &p->out.annotations, &p->out.annotation_count, anno); + } +} + +static void _hmml_parse_video(struct _hmml_parser* p) +{ + struct str_attr { + struct _hmml_str str; + char** dest; + } str_attrs[] = { + { HSTR("member") , &p->out.metadata.member }, + { HSTR("stream_platform"), &p->out.metadata.stream_platform }, + { HSTR("stream_username"), &p->out.metadata.stream_username }, + { HSTR("project") , &p->out.metadata.project }, + { HSTR("title") , &p->out.metadata.title }, + { HSTR("vod_platform") , &p->out.metadata.vod_platform }, + { HSTR("id") , &p->out.metadata.id }, + { HSTR("template") , &p->out.metadata.template }, + { HSTR("medium") , &p->out.metadata.medium }, + { HSTR("output") , &p->out.metadata.output }, + }; + + for(;;) { +next_attr: + _hmml_skip_ws(p); + + if(*p->cursor == ']') { + ++p->cursor; + return _hmml_parse_annotations(p); + } + + struct _hmml_str key, value; + _hmml_read_kv(p, &key, &value); + + for(int i = 0; i < countof(str_attrs); ++i) { + struct str_attr* s = str_attrs + i; + if(_hmml_str_eq(key, s->str)) { + *s->dest = _hmml_persist_str(p, value); + goto next_attr; + } + } + + if(_hmml_str_eq(key, HSTR("credit"))) { + HMML_Credit credit = {}; + + char* colon = strchr(value.ptr, ':'); + if(colon) { + *colon = '\0'; + credit.name = _hmml_persist_str(p, (struct _hmml_str){ value.ptr, colon - value.ptr }); + credit.role = _hmml_persist_str(p, (struct _hmml_str){ colon+1, value.len - ((colon+1) - value.ptr) }); + } else { + credit.name = _hmml_persist_str(p, value); + } + + _hmml_persist_array(p, &p->out.metadata.credits, &p->out.metadata.credit_count, credit); + + goto next_attr; + } + + HMML_VideoCustomMetaData custom = { + .key = _hmml_persist_str(p, key), + .value = _hmml_persist_str(p, value), + }; + + _hmml_persist_array(p, &p->out.metadata.custom, &p->out.metadata.custom_count, custom); + } +} + +HMML_Output hmml_parse(char* mem) +{ + struct _hmml_parser p = {}; + p.cursor = mem; + + if(setjmp(p.err_buf) == 1) { + // if it returns 1, an error happened + return p.out; + } + + static const struct _hmml_str prefix = HSTR("[video"); + if(strncasecmp(p.cursor, prefix.ptr, prefix.len)) { + _hmml_err(&p, "Missing initial video tag."); + } else { + p.cursor += prefix.len; + _hmml_parse_video(&p); + } + + p.out.free_list = p.free_list; + p.out.well_formed = 1; + return p.out; +} + +void hmml_free(HMML_Output* out) +{ + if(!out->free_list) { + return; + } + + for(int i = 2; i < ((uintptr_t*)out->free_list)[1]; ++i) { + free(((void**)out->free_list)[i]); + } + + free(out->free_list); +} + +// for testing, should be removed or #if 0'd for the library +int main(int argc, char** argv) +{ + if(argc < 2) { + fprintf(stderr, "Usage: %s [file]\n", argv[0]); + return 1; + } + + FILE* f = fopen(argv[1], "r"); + if(!f) { + perror(argv[1]); + return 1; + } + + fseek(f, 0, SEEK_END); + long size = ftell(f); + rewind(f); + + char* mem = malloc(size+1); + mem[size] = 0; + + fread(mem, 1, size, f); + fclose(f); + + HMML_Output out = hmml_parse(mem); + free(mem); + + // to look at "out" in gdb + asm("int3"); + + hmml_free(&out); +} diff --git a/hmmlib2/hmmlib.h b/hmmlib2/hmmlib.h new file mode 100644 index 0000000..9588905 --- /dev/null +++ b/hmmlib2/hmmlib.h @@ -0,0 +1,114 @@ +#ifndef HMML_H_ +#define HMML_H_ +#include +#include + +// Data structures + +typedef struct { + char* name; + char* role; +} HMML_Credit; + +typedef struct { + char* key; + char* value; +} HMML_VideoCustomMetaData; + +typedef struct { + char* member; + char* stream_platform; + char* stream_username; + char* project; + char* title; + char* vod_platform; + char* id; + char* output; + char* template; + char* medium; + + HMML_Credit* credits; + size_t credit_count; + + HMML_VideoCustomMetaData* custom; + size_t custom_count; + +} HMML_VideoMetaData; + +typedef struct { + char* site; + char* page; + char* url; + char* title; + char* article; + char* author; + char* editor; + char* publisher; + char* isbn; +} HMML_Reference; + +typedef enum { + HMML_CATEGORY, + HMML_MEMBER, + HMML_PROJECT, + + HMML_MARKER_COUNT, +} HMML_MarkerType; + +typedef struct { + HMML_MarkerType type; + char* marker; + char* parameter; + char* episode; +} HMML_Marker; + +typedef struct { + int id; + char* author; +} HMML_Quote; + +typedef struct { + int line; + + int h, m, s; + + char* text; + char* author; + + HMML_Reference* references; + size_t reference_count; + + HMML_Marker* markers; + size_t marker_count; + + HMML_Quote quote; + _Bool has_quote; +} HMML_Annotation; + +typedef struct { + int line; + char* message; +} HMML_Error; + +typedef struct { + _Bool well_formed; + HMML_VideoMetaData metadata; + HMML_Annotation* annotations; + size_t annotation_count; + HMML_Error error; + void* free_list; +} HMML_Output; + +// Functions + +HMML_Output hmml_parse_file (FILE* file); +void hmml_dump (HMML_Output* output); +void hmml_free (HMML_Output* output); + +// Version + +extern const struct HMML_Version { + int Major, Minor, Patch; +} hmml_version; + +#endif diff --git a/hmmlib2/obbg032_1.hmml b/hmmlib2/obbg032_1.hmml new file mode 100644 index 0000000..fe1e27e --- /dev/null +++ b/hmmlib2/obbg032_1.hmml @@ -0,0 +1,25 @@ +[video member=nothings stream_platform=twitch stream_username=nothings2 project=obbg title="Open Block Building Game Development #32 (1/2)" vod_platform=youtube id=Vm-0ZUVMHHc annotator=Miblo] +[2:01][Recap and update the TODO list] +[10:08][Consult the example YouTube description file and the longest .hmml file] +[38:58][Continue implementing parse_tag()][:parsing] +[41:40][@miblo][Ohhh, right. Yeah, the \[video\] and \[/video\] tags are the only ones that have that open-close format. All the other tags are "single" tags] +[41:51][Continue writing parse_tag() anyway][:parsing] +[43:10][@miblo][@nothings2: There isn't really, other than the rambling in: [ref + site="GitLab: Annotation-Pushers / Annotation-System / Issues" + page="Handmade Annotation Markup Language (previously MibloMarkup)" + url=http://git.handmadedev.org/Annotation-Pushers/Annotation-System/issues/2]] +[45:17][Enable parse_tag() to tokenise the \[video\] node][:parsing] +[1:51:32][Parse out the embedded \: and \@ tags][:parsing] +[1:57:50][:Run it to see if those two cases work and tweak the username conversion] +[3:15:14][@experior][Add a space after the pound? Or another character] +[3:16:40][Delete the \@ in the text node][:parsing] +[3:42:11][@miblo][@nothings2: That's fine, yeah! They are also in: [ref + site="GitLab: Annotation-Pushers / Annotation-Game" + page="projects/nothings/obbg" + url=http://git.handmadedev.org/Annotation-Pushers/Annotation-Game/tree/master/projects/nothings/obbg]] +[3:47:14][@insofaras][[ref + site="GitHub: nothings/obbg" + page="Pull Request #8: get it building and running on linux by insofaras" + url=https://github.com/nothings/obbg/pull/8/files#diff-90c561ba68b3be193f3378639ef489a0R1831]] +[3:50:44][Take a break] +[/video]