diff --git a/hmmlib2/Makefile b/hmmlib2/Makefile deleted file mode 100644 index e72a78a..0000000 --- a/hmmlib2/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -# this is just for the test program (WON build should also work) -# when the library is finished, it should be #include-able - -hmmlib: hmmlib.c - gcc -g $< -o $@ diff --git a/hmmlib2/README.TXT b/hmmlib2/README.TXT new file mode 100644 index 0000000..1544098 --- /dev/null +++ b/hmmlib2/README.TXT @@ -0,0 +1,6 @@ +This is a single-header library for parsing the HMML Format. +See https://git.handmade.network/Annotation-Pushers/Annotation-System/-/wikis/hmmlspec + +To use: + 1. #include "hmmlib.h" in any files that use the functions / data structures. + 2. in **one** of your .c files, #define HMMLIB_IMPLEMENTATION before including it. diff --git a/hmmlib2/hmmlib.c b/hmmlib2/hmmlib.c deleted file mode 100644 index 62e2471..0000000 --- a/hmmlib2/hmmlib.c +++ /dev/null @@ -1,589 +0,0 @@ -#if 0 -gcc -g "$0" -o "${0%%.c}" -exit 0 -#endif -#include "hmmlib.h" -#include -#include -#include -#include -#include - -// -// TODO: -// parse quotes -// marker parameter + episode number -// track line numbers somehow -// seems to be only needed for the error-func, so... -// could probably do it after-the-fact, counting \n until we hit p->cursor -// port over the hmml_dump function -// test various files -// cleanup -// - -#define HSTX(x) x, sizeof(x)-1 -#define HSTR(x) (struct _hmml_str){ HSTX(x) } - -#ifndef MALLOC - #define MALLOC malloc -#endif - -#ifndef REALLOC - #define REALLOC realloc -#endif - -#ifndef countof - #define countof(x) (sizeof(x)/sizeof(*x)) -#endif - -#define _hmml_debug(...) -//#define _hmml_debug printf - -struct _hmml_parser { - HMML_Output out; - char* cursor; - jmp_buf err_buf; - intptr_t* free_list; -}; - -struct _hmml_str { - char* ptr; - size_t len; -}; - -// memory management boilerplate stuff - -static void* _hmml_store_ptr(struct _hmml_parser* p, void* input) -{ - uintptr_t* ptr; - if(p->free_list) { - ptr = p->free_list; - if(ptr[1] + 1 == ptr[0]) { - size_t n = ptr[0] << 1; - ptr = REALLOC(ptr, n * sizeof(uintptr_t)); - ptr[0] = n; - } - ptr[ptr[1]] = (intptr_t)input; - ptr[1]++; - } else { - ptr = MALLOC(8 * sizeof(uintptr_t)); - ptr[0] = 8; - ptr[1] = 3; - ptr[2] = (intptr_t)input; - } - p->free_list = ptr; - return input; -} - -static char* _hmml_persist_str(struct _hmml_parser* p, const struct _hmml_str str) -{ - char* mem = MALLOC(str.len+1); - memcpy(mem, str.ptr, str.len); - mem[str.len] = '\0'; - return _hmml_store_ptr(p, mem); -} - -static void _hmml_persist_array_fn(struct _hmml_parser* p, void** out, size_t* out_count, void* in, size_t in_size) -{ - void* base; - if(!*out) { - base = MALLOC(in_size + sizeof(size_t)); - _hmml_store_ptr(p, base); - *(size_t*)base = p->free_list[1]-1; - } else { - base = (char*)(*out) - sizeof(size_t); - base = REALLOC(base, (*out_count + 1) * in_size + sizeof(size_t)); - size_t free_list_off = *(size_t*)base; - p->free_list[free_list_off] = (intptr_t)base; - } - - *out = (char*)base + sizeof(size_t); - memcpy((char*)*out + (*out_count * in_size), in, in_size); - ++(*out_count); -} - -#define _hmml_persist_array(p, out, out_count, in) \ - _hmml_persist_array_fn((p), (void**)(out), (out_count), &(in), sizeof(in)) - -// error handling - -#define _hmml_err(p, fmt, ...) \ - _hmml_err_fn((p), fmt "\n", ##__VA_ARGS__) - -__attribute__((noreturn)) -static void _hmml_err_fn(struct _hmml_parser* p, const char* fmt, ...) -{ - static char error_buf[4096]; - - va_list va; - va_start(va, fmt); - int n = vsnprintf(error_buf, sizeof(error_buf), fmt, va); - va_end(va); - - // TODO: figure out / keep track of the line number - - p->out.error.message = _hmml_persist_str(p, (struct _hmml_str){ error_buf, n }); - longjmp(p->err_buf, 1); -} - -// actual parsing stuff - -static void _hmml_skip_ws(struct _hmml_parser* p) -{ - p->cursor += strspn(p->cursor, " \t\r\n"); -} - -static _Bool _hmml_str_eq(struct _hmml_str a, struct _hmml_str b) -{ - return a.len == b.len && memcmp(a.ptr, b.ptr, a.len) == 0; -} - -static _Bool _hmml_unesc(char in, char* out) -{ - if(strchr("[]:@~\\\"", in)) { - *out = in; - return 1; - } else { - return 0; - } -} - -static char* _hmml_read_attr(struct _hmml_parser* p, char* mem, size_t mem_size) -{ - char* src = p->cursor; - char* dst = mem; - - if(*src == '"') { - ++src; - while(*src && *src != '"' && src - p->cursor < mem_size) { - char converted; - if(*src == '\\' && _hmml_unesc(src[1], &converted)) { - *dst++ = converted; - src += 2; - } else { - *dst++ = *src++; - } - } - - if(*src != '"') { - _hmml_err(p, "Partially quoted attribute"); - } - - *dst = '\0'; - p->cursor = src+1; - } else { - size_t n = strcspn(src, " ]\r\n\t"); - if(n >= mem_size) { - _hmml_err(p, "Attribute [%.10s...] too long", p->cursor); - } - memcpy(dst, src, n); - dst += n; - *dst = '\0'; - p->cursor += n; - } - - return dst; -} - -static void _hmml_read_kv(struct _hmml_parser* p, struct _hmml_str* key, struct _hmml_str* val) -{ - static char key_memory[64]; - static char val_memory[1024]; - - size_t key_len = strcspn(p->cursor, " \r\n\t="); - if(key_len >= sizeof(key_memory)) { - _hmml_err(p, "Attribute key [%.10s...] too long", p->cursor); - } - - memcpy(key_memory, p->cursor, key_len); - key_memory[key_len] = '\0'; - p->cursor += key_len; - - _hmml_skip_ws(p); - - if(*p->cursor != '=') { - _hmml_err(p, "Expected '=', got [%.3s]", p->cursor); - } - - ++p->cursor; - - char* src = p->cursor; - char* dst = _hmml_read_attr(p, val_memory, sizeof(val_memory)); - - _hmml_debug("read kv [%s] = [%s]\n", key_memory, val_memory); - - key->ptr = key_memory; - key->len = key_len; - - val->ptr = val_memory; - val->len = dst - val_memory; -} - -static HMML_Marker _hmml_parse_marker(struct _hmml_parser* p) -{ - static char marker_mem[4096]; - - // the extended markers are inside [ ] and can contain parameters - _Bool extended = *p->cursor == '['; - if(extended) { - ++p->cursor; - } - - HMML_Marker marker = {}; - - char c = *p->cursor; - if(c == '~') { - marker.type = HMML_PROJECT; - } else if(c == '@') { - marker.type = HMML_MEMBER; - } else if(c == ':') { - marker.type = HMML_CATEGORY; - } else { - _hmml_err(p, "Unknown marker type"); - } - - ++p->cursor; - - char* end = _hmml_read_attr(p, marker_mem, sizeof(marker_mem)); - marker.marker = _hmml_persist_str(p, (struct _hmml_str){ marker_mem, end - marker_mem }); - - _hmml_skip_ws(p); - - if(*p->cursor == '#') { - // TODO: marker.episode = smth - } else { - // TODO: marker.parameter = _hmml_persist_str(p, (struct _hmml_str){ }); - } - - if(extended) { - if(*p->cursor != ']') { - _hmml_err(p, "Expected ']'"); - } - ++p->cursor; - } - - return marker; -} - -static HMML_Reference _hmml_parse_ref(struct _hmml_parser* p) -{ - HMML_Reference ref = {}; - - struct str_attr { - struct _hmml_str str; - char** dest; - } str_attrs[] = { - { HSTR("site") , &ref.site }, - { HSTR("page") , &ref.page }, - { HSTR("url") , &ref.url }, - { HSTR("title") , &ref.title }, - { HSTR("article") , &ref.article }, - { HSTR("author") , &ref.author }, - { HSTR("editor") , &ref.editor }, - { HSTR("publisher"), &ref.publisher }, - { HSTR("isbn") , &ref.isbn }, - }; - - for(;;) { -next_attr: - _hmml_skip_ws(p); - - if(*p->cursor == ']') { - ++p->cursor; - break; - } - - struct _hmml_str key, value; - _hmml_read_kv(p, &key, &value); - - for(int i = 0; i < countof(str_attrs); ++i) { - struct str_attr* s = str_attrs + i; - if(_hmml_str_eq(key, s->str)) { - *s->dest = _hmml_persist_str(p, value); - goto next_attr; - } - } - - _hmml_err(p, "Unknown reference attribute"); - } - - return ref; -} - -static void _hmml_parse_timecode(struct _hmml_parser* p, HMML_Annotation* anno) -{ - int h = 0, m = 0, s = 0, offset = 0; - int count = sscanf(p->cursor, "[%u:%u%n", &m, &s, &offset); - - if(count < 2) { - _hmml_err(p, "Unable to parse timecode"); - } - - p->cursor += offset; - char c = *p->cursor; - - if(c == ':') { - int tmp; - offset = 0; - if(sscanf(p->cursor, ":%u]%n", &tmp, &offset) != 1 || offset == 0) { - _hmml_err(p, "Unable to parse 3-part timecode"); - } - - h = m; - m = s; - s = tmp; - - p->cursor += offset; - - } else if(c != ']') { - _hmml_err(p, "Unable to parse timecode"); - } else { - ++p->cursor; - } - - if(s >= 60) { - _hmml_err(p, "Seconds cannot exceed 59"); - } - - if(m >= 60) { - _hmml_err(p, "Minutes cannot exceed 59"); - } - - anno->h = h; - anno->m = m; - anno->s = s; -} - -static void _hmml_parse_text(struct _hmml_parser* p, HMML_Annotation* anno) -{ - static char text_mem[4096]; - char* out = text_mem; - - memset(text_mem, 0, sizeof(text_mem)); - - for(;;) { - size_t n = strcspn(p->cursor, "\\[]:@~"); - char c = p->cursor[n]; - - if(out + n > text_mem + sizeof(text_mem)) { - _hmml_err(p, "Not enough text memory"); - } - - memcpy(out, p->cursor, n); - p->cursor += n; - out += n; - - if(c == '\0') { - _hmml_err(p, "Unexpected EOF"); - } - - else if(c == ']') { - ++p->cursor; - break; - } - - else if(c == '\\') { - *out++ = p->cursor[1]; - p->cursor += 2; - } - - else if(c == '[') { - if(strncmp(p->cursor + 1, "ref", 3) == 0) { - p->cursor += 4; - HMML_Reference ref = _hmml_parse_ref(p); - _hmml_persist_array(p, &anno->references, &anno->reference_count, ref); - } else { - HMML_Marker m = _hmml_parse_marker(p); - _hmml_persist_array(p, &anno->markers, &anno->marker_count, m); - } - } - - else { - // it is a @ ~ or : marker without parameters - HMML_Marker m = _hmml_parse_marker(p); - _hmml_persist_array(p, &anno->markers, &anno->marker_count, m); - } - - if(out - text_mem >= sizeof(text_mem)) { - _hmml_err(p, "Not enough text memory"); - } - } - - anno->text = _hmml_persist_str(p, (struct _hmml_str){ text_mem, out - text_mem }); -} - -static void _hmml_parse_annotations(struct _hmml_parser* p) -{ - for(;;) { - _hmml_skip_ws(p); - - if(*p->cursor == '\0') { - _hmml_err(p, "Unexpected EOF"); - } - - if(strncmp(p->cursor, "[/video]", 8) == 0) { - break; - } - - HMML_Annotation anno = {}; - - _hmml_parse_timecode(p, &anno); - - if(*p->cursor != '[') { - _hmml_err(p, "Expected '['"); - } - - if(p->cursor[1] == '@') { - HMML_Marker m = _hmml_parse_marker(p); - anno.author = m.marker; - } - - ++p->cursor; - - _hmml_parse_text(p, &anno); - - if(p->cursor[0] == '[' && p->cursor[1] == ':') { - HMML_Marker m = _hmml_parse_marker(p); - _hmml_persist_array(p, &anno.markers, &anno.marker_count, m); - } - - if(p->cursor[0] == '[' && p->cursor[1] == 'q') { - //_hmml_parse_quote(); - } - - _hmml_persist_array(p, &p->out.annotations, &p->out.annotation_count, anno); - } -} - -static void _hmml_parse_video(struct _hmml_parser* p) -{ - struct str_attr { - struct _hmml_str str; - char** dest; - } str_attrs[] = { - { HSTR("member") , &p->out.metadata.member }, - { HSTR("stream_platform"), &p->out.metadata.stream_platform }, - { HSTR("stream_username"), &p->out.metadata.stream_username }, - { HSTR("project") , &p->out.metadata.project }, - { HSTR("title") , &p->out.metadata.title }, - { HSTR("vod_platform") , &p->out.metadata.vod_platform }, - { HSTR("id") , &p->out.metadata.id }, - { HSTR("template") , &p->out.metadata.template }, - { HSTR("medium") , &p->out.metadata.medium }, - { HSTR("output") , &p->out.metadata.output }, - }; - - for(;;) { -next_attr: - _hmml_skip_ws(p); - - if(*p->cursor == ']') { - ++p->cursor; - return _hmml_parse_annotations(p); - } - - struct _hmml_str key, value; - _hmml_read_kv(p, &key, &value); - - for(int i = 0; i < countof(str_attrs); ++i) { - struct str_attr* s = str_attrs + i; - if(_hmml_str_eq(key, s->str)) { - *s->dest = _hmml_persist_str(p, value); - goto next_attr; - } - } - - if(_hmml_str_eq(key, HSTR("credit"))) { - HMML_Credit credit = {}; - - char* colon = strchr(value.ptr, ':'); - if(colon) { - *colon = '\0'; - credit.name = _hmml_persist_str(p, (struct _hmml_str){ value.ptr, colon - value.ptr }); - credit.role = _hmml_persist_str(p, (struct _hmml_str){ colon+1, value.len - ((colon+1) - value.ptr) }); - } else { - credit.name = _hmml_persist_str(p, value); - } - - _hmml_persist_array(p, &p->out.metadata.credits, &p->out.metadata.credit_count, credit); - - goto next_attr; - } - - HMML_VideoCustomMetaData custom = { - .key = _hmml_persist_str(p, key), - .value = _hmml_persist_str(p, value), - }; - - _hmml_persist_array(p, &p->out.metadata.custom, &p->out.metadata.custom_count, custom); - } -} - -HMML_Output hmml_parse(char* mem) -{ - struct _hmml_parser p = {}; - p.cursor = mem; - - if(setjmp(p.err_buf) == 1) { - // if it returns 1, an error happened - return p.out; - } - - static const struct _hmml_str prefix = HSTR("[video"); - if(strncasecmp(p.cursor, prefix.ptr, prefix.len)) { - _hmml_err(&p, "Missing initial video tag."); - } else { - p.cursor += prefix.len; - _hmml_parse_video(&p); - } - - p.out.free_list = p.free_list; - p.out.well_formed = 1; - return p.out; -} - -void hmml_free(HMML_Output* out) -{ - if(!out->free_list) { - return; - } - - for(int i = 2; i < ((uintptr_t*)out->free_list)[1]; ++i) { - free(((void**)out->free_list)[i]); - } - - free(out->free_list); -} - -// for testing, should be removed or #if 0'd for the library -int main(int argc, char** argv) -{ - if(argc < 2) { - fprintf(stderr, "Usage: %s [file]\n", argv[0]); - return 1; - } - - FILE* f = fopen(argv[1], "r"); - if(!f) { - perror(argv[1]); - return 1; - } - - fseek(f, 0, SEEK_END); - long size = ftell(f); - rewind(f); - - char* mem = malloc(size+1); - mem[size] = 0; - - fread(mem, 1, size, f); - fclose(f); - - HMML_Output out = hmml_parse(mem); - free(mem); - - // to look at "out" in gdb - asm("int3"); - - hmml_free(&out); -} diff --git a/hmmlib2/hmmlib.h b/hmmlib2/hmmlib.h index 9588905..e65e169 100644 --- a/hmmlib2/hmmlib.h +++ b/hmmlib2/hmmlib.h @@ -1,5 +1,5 @@ -#ifndef HMML_H_ -#define HMML_H_ +#ifndef HMMLIB_H_ +#define HMMLIB_H_ #include #include @@ -63,6 +63,7 @@ typedef struct { } HMML_Marker; typedef struct { + _Bool present; int id; char* author; } HMML_Quote; @@ -82,11 +83,11 @@ typedef struct { size_t marker_count; HMML_Quote quote; - _Bool has_quote; } HMML_Annotation; typedef struct { int line; + int col; char* message; } HMML_Error; @@ -96,14 +97,13 @@ typedef struct { HMML_Annotation* annotations; size_t annotation_count; HMML_Error error; - void* free_list; + void* free_list; // implementation detail } HMML_Output; // Functions -HMML_Output hmml_parse_file (FILE* file); -void hmml_dump (HMML_Output* output); -void hmml_free (HMML_Output* output); +HMML_Output hmml_parse (const char* string); +void hmml_free (HMML_Output* output); // Version @@ -112,3 +112,675 @@ extern const struct HMML_Version { } hmml_version; #endif + +#ifdef HMMLIB_IMPLEMENTATION + +#include +#include +#include +#include +#include + +#define HSTX(x) x, sizeof(x)-1 +#define HSTR(x) (const struct _hmml_str){ HSTX(x) } + +#ifndef MALLOC + #define MALLOC malloc +#endif + +#ifndef REALLOC + #define REALLOC realloc +#endif + +#ifndef countof + #define countof(x) (sizeof(x)/sizeof(*x)) +#endif + +#define _hmml_debug(...) +//#define _hmml_debug printf + +struct _hmml_parser { + HMML_Output out; + const char* mem; + const char* cursor; + jmp_buf err_buf; + uintptr_t* free_list; + int line; +}; + +struct _hmml_str { + const char* ptr; + size_t len; +}; + +// memory management boilerplate stuff + +static void* _hmml_store_ptr(struct _hmml_parser* p, void* input) +{ + uintptr_t* ptr; + if(p->free_list) { + ptr = p->free_list; + if(ptr[1] + 1 == ptr[0]) { + size_t n = ptr[0] << 1; + ptr = REALLOC(ptr, n * sizeof(uintptr_t)); + ptr[0] = n; + } + ptr[ptr[1]] = (uintptr_t)input; + ptr[1]++; + } else { + ptr = MALLOC(8 * sizeof(uintptr_t)); + ptr[0] = 8; + ptr[1] = 3; + ptr[2] = (uintptr_t)input; + } + p->free_list = ptr; + return input; +} + +static char* _hmml_persist_str(struct _hmml_parser* p, const struct _hmml_str str) +{ + char* mem = MALLOC(str.len+1); + memcpy(mem, str.ptr, str.len); + mem[str.len] = '\0'; + return _hmml_store_ptr(p, mem); +} + +static void _hmml_persist_array_fn(struct _hmml_parser* p, void** out, size_t* out_count, void* in, size_t in_size) +{ + void* base; + if(!*out) { + base = MALLOC(in_size + sizeof(size_t)); + _hmml_store_ptr(p, base); + *(size_t*)base = p->free_list[1]-1; + } else { + base = (char*)(*out) - sizeof(size_t); + base = REALLOC(base, (*out_count + 1) * in_size + sizeof(size_t)); + size_t free_list_off = *(size_t*)base; + p->free_list[free_list_off] = (intptr_t)base; + } + + *out = (char*)base + sizeof(size_t); + memcpy((char*)*out + (*out_count * in_size), in, in_size); + ++(*out_count); +} + +#define _hmml_persist_array(p, out, out_count, in) \ + _hmml_persist_array_fn((p), (void**)(out), (out_count), &(in), sizeof(in)) + +// error handling + +#define _hmml_err(p, fmt, ...) \ + _hmml_err_fn((p), fmt "\n", ##__VA_ARGS__) + +__attribute__((noreturn)) +static void _hmml_err_fn(struct _hmml_parser* p, const char* fmt, ...) +{ + static char error_buf[4096]; + + va_list va; + va_start(va, fmt); + int n = vsnprintf(error_buf, sizeof(error_buf), fmt, va); + va_end(va); + + int line = 1, col = 1; + for(const char* ptr = p->mem; ptr != p->cursor; ++ptr) { + if(*ptr == '\n') { + ++line; + col = 1; + } else { + ++col; + } + } + + p->out.error.message = _hmml_persist_str(p, (struct _hmml_str){ error_buf, n }); + p->out.error.line = line; + p->out.error.col = col; + + longjmp(p->err_buf, 1); +} + +// actual parsing stuff + +static void _hmml_skip_ws(struct _hmml_parser* p) +{ + for(;;) { + uint8_t c = *p->cursor; + if(c && c <= ' ') { + if(c == '\n') { + ++p->line; + } + ++p->cursor; + } else { + break; + } + } +} + +static _Bool _hmml_str_eq(struct _hmml_str a, struct _hmml_str b) +{ + return a.len == b.len && memcmp(a.ptr, b.ptr, a.len) == 0; +} + +static _Bool _hmml_unesc(char in, char* out) +{ + if(strchr("[]:@~\\\"", in)) { + *out = in; + return 1; + } else { + return 0; + } +} + +static char* _hmml_read_attr(struct _hmml_parser* p, char* mem, size_t mem_size, _Bool break_on_punct) +{ + const char* src = p->cursor; + char* dst = mem; + + if(*src == '"') { + ++src; + while(*src && *src != '"' && (size_t)(src - p->cursor) < mem_size) { + char converted; + if(*src == '\\' && _hmml_unesc(src[1], &converted)) { + *dst++ = converted; + src += 2; + } else { + *dst++ = *src++; + } + } + + if(*src != '"') { + _hmml_err(p, "Partially quoted attribute"); + } + + *dst = '\0'; + p->cursor = src+1; + } else { + const char* breaks = break_on_punct + ? " ]\r\n\t:,'-./#=" + : " ]\r\n\t" + ; + + size_t n = strcspn(src, breaks); + if(n >= mem_size) { + _hmml_err(p, "Attribute [%.10s...] too long", p->cursor); + } + memcpy(dst, src, n); + dst += n; + *dst = '\0'; + p->cursor += n; + } + + return dst; +} + +static void _hmml_read_kv(struct _hmml_parser* p, struct _hmml_str* key, struct _hmml_str* val) +{ + static char key_memory[64]; + static char val_memory[1024]; + + size_t key_len = strcspn(p->cursor, " \r\n\t="); + if(key_len >= sizeof(key_memory)) { + _hmml_err(p, "Attribute key [%.10s...] too long", p->cursor); + } + + memcpy(key_memory, p->cursor, key_len); + key_memory[key_len] = '\0'; + p->cursor += key_len; + + _hmml_skip_ws(p); + + if(*p->cursor != '=') { + _hmml_err(p, "Expected '=', got [%.3s]", p->cursor); + } + + ++p->cursor; + + char* end = _hmml_read_attr(p, val_memory, sizeof(val_memory), 0); + + _hmml_debug("read kv [%s] = [%s]\n", key_memory, val_memory); + + key->ptr = key_memory; + key->len = key_len; + + val->ptr = val_memory; + val->len = end - val_memory; +} + +static HMML_Marker _hmml_parse_marker(struct _hmml_parser* p) +{ + static char marker_mem[4096]; + + // the extended markers are inside [ ] and can contain parameters + _Bool extended = *p->cursor == '['; + if(extended) { + ++p->cursor; + } + + HMML_Marker marker = {}; + + char c = *p->cursor; + if(c == '~') { + marker.type = HMML_PROJECT; + } else if(c == '@') { + marker.type = HMML_MEMBER; + } else if(c == ':') { + marker.type = HMML_CATEGORY; + } else { + _hmml_err(p, "Unknown marker type"); + } + + ++p->cursor; + + char* end = _hmml_read_attr(p, marker_mem, sizeof(marker_mem), 1); + marker.marker = _hmml_persist_str(p, (struct _hmml_str){ marker_mem, end - marker_mem }); + + if(extended) { + _hmml_skip_ws(p); + + if(*p->cursor == '#') { + ++p->cursor; + size_t n = strcspn(p->cursor, " "); + marker.episode = _hmml_persist_str(p, (struct _hmml_str){ p->cursor, n }); + p->cursor += n; + } else if(*p->cursor != ']') { + const char* end = p->cursor; + + for(;;) { + if(!*end) { + break; + } + + char converted; + if(*end == '\\' && _hmml_unesc(end[1], &converted)) { + end += 2; + } else if(*end == ']'){ + break; + } else { + ++end; + } + } + + marker.parameter = _hmml_persist_str(p, (struct _hmml_str){ p->cursor, end - p->cursor }); + p->cursor = end; + } + + if(*p->cursor != ']') { + _hmml_err(p, "Expected ']'"); + } + ++p->cursor; + } + + return marker; +} + +static HMML_Reference _hmml_parse_ref(struct _hmml_parser* p) +{ + HMML_Reference ref = {}; + + struct str_attr { + struct _hmml_str str; + char** dest; + } str_attrs[] = { + { HSTR("site") , &ref.site }, + { HSTR("page") , &ref.page }, + { HSTR("url") , &ref.url }, + { HSTR("title") , &ref.title }, + { HSTR("article") , &ref.article }, + { HSTR("author") , &ref.author }, + { HSTR("editor") , &ref.editor }, + { HSTR("publisher"), &ref.publisher }, + { HSTR("isbn") , &ref.isbn }, + }; + + for(;;) { +next_attr: + _hmml_skip_ws(p); + + if(*p->cursor == ']') { + ++p->cursor; + break; + } + + struct _hmml_str key, value; + _hmml_read_kv(p, &key, &value); + + for(size_t i = 0; i < countof(str_attrs); ++i) { + struct str_attr* s = str_attrs + i; + if(_hmml_str_eq(key, s->str)) { + *s->dest = _hmml_persist_str(p, value); + goto next_attr; + } + } + + _hmml_err(p, "Unknown reference attribute"); + } + + return ref; +} + +static void _hmml_parse_timecode(struct _hmml_parser* p, HMML_Annotation* anno) +{ + unsigned int h = 0, m = 0, s = 0; + int offset = 0; + int count = sscanf(p->cursor, "[%u:%u%n", &m, &s, &offset); + + if(count < 2) { + _hmml_err(p, "Unable to parse timecode"); + } + + p->cursor += offset; + char c = *p->cursor; + + if(c == ':') { + unsigned int tmp; + offset = 0; + if(sscanf(p->cursor, ":%u]%n", &tmp, &offset) != 1 || offset == 0) { + _hmml_err(p, "Unable to parse 3-part timecode"); + } + + h = m; + m = s; + s = tmp; + + p->cursor += offset; + + } else if(c != ']') { + _hmml_err(p, "Unable to parse timecode"); + } else { + ++p->cursor; + } + + if(s >= 60) { + _hmml_err(p, "Seconds cannot exceed 59"); + } + + if(m >= 60) { + _hmml_err(p, "Minutes cannot exceed 59"); + } + + anno->h = h; + anno->m = m; + anno->s = s; +} + +static void _hmml_parse_text(struct _hmml_parser* p, HMML_Annotation* anno) +{ + static char text_mem[4096]; + char* out = text_mem; + + memset(text_mem, 0, sizeof(text_mem)); + +#define CHECKSIZE(__n) \ + if(out + (__n) > text_mem + sizeof(text_mem)) {\ + _hmml_err(p, "Not enough text memory");\ + } + + for(;;) { + size_t n = strcspn(p->cursor, "\\\n[]:@~"); + char c = p->cursor[n]; + + CHECKSIZE(n); + memcpy(out, p->cursor, n); + + p->cursor += n; + out += n; + + if(c == '\0') { + _hmml_err(p, "Unexpected EOF"); + } + + else if(c == ']') { + ++p->cursor; + break; + } + + else if(c == '\\') { + char converted; + if(_hmml_unesc(p->cursor[1], &converted)) { + *out++ = converted; + p->cursor += 2; + } else { + *out++ = '\\'; + p->cursor++; + } + } + + else if(c == '\n') { + ++p->cursor; + } + + else if(c == '[') { + if(strncmp(p->cursor + 1, "ref", 3) == 0) { + p->cursor += 4; + HMML_Reference ref = _hmml_parse_ref(p); + _hmml_persist_array(p, &anno->references, &anno->reference_count, ref); + } else { + HMML_Marker m = _hmml_parse_marker(p); + _hmml_persist_array(p, &anno->markers, &anno->marker_count, m); + + size_t text_len = strlen(m.marker); + CHECKSIZE(text_len); + memcpy(out, m.marker, text_len); + out += text_len; + } + } + + // it is a @ ~ or : marker without parameters + else { + // if next char is a space, it can't be a marker + if(strchr(" \t\r\n", p->cursor[1])) { + *out++ = c; + ++p->cursor; + } else { + HMML_Marker m = _hmml_parse_marker(p); + _hmml_persist_array(p, &anno->markers, &anno->marker_count, m); + size_t text_len = strlen(m.marker); + CHECKSIZE(text_len); + memcpy(out, m.marker, text_len); + out += text_len; + } + } + + if((size_t)(out - text_mem) >= sizeof(text_mem)) { + _hmml_err(p, "Not enough text memory"); + } + } + + // trim trailing whitespace + while(out > text_mem && out[-1] && (uint8_t)(out[-1]) <= ' ') { + out[-1] = '\0'; + --out; + } + + anno->text = _hmml_persist_str(p, (struct _hmml_str){ text_mem, out - text_mem }); + +#undef CHECKSIZE +} + +static void _hmml_parse_quote(struct _hmml_parser* p, HMML_Annotation* anno) +{ + char member[256]; + int id; + int off = 0; + + if(sscanf(p->cursor, "[quote %255s %d]%n", member, &id, &off) == 2 && off) { + anno->quote.present = 1; + anno->quote.id = id; + anno->quote.author = _hmml_persist_str(p, (struct _hmml_str){ member, strlen(member) }); + } else if(sscanf(p->cursor, "[quote %d]%n", &id, &off) == 1 && off) { + anno->quote.present = 1; + anno->quote.id = id; + } else { + _hmml_err(p, "Unable to parse quote"); + } + + p->cursor += off; +} + +static void _hmml_parse_annotations(struct _hmml_parser* p) +{ + for(;;) { + _hmml_skip_ws(p); + + if(*p->cursor == '\0') { + _hmml_err(p, "Unexpected EOF"); + } + + if(strncmp(p->cursor, "[/video]", 8) == 0) { + break; + } + + HMML_Annotation anno = { + .line = p->line + }; + + _hmml_parse_timecode(p, &anno); + + if(*p->cursor != '[') { + _hmml_err(p, "Expected '['"); + } + + if(p->cursor[1] == '@') { + HMML_Marker m = _hmml_parse_marker(p); + anno.author = m.marker; + } + + ++p->cursor; + + _hmml_parse_text(p, &anno); + + if(p->cursor[0] == '[' && p->cursor[1] == ':') { + HMML_Marker m = _hmml_parse_marker(p); + _hmml_persist_array(p, &anno.markers, &anno.marker_count, m); + } + + if(p->cursor[0] == '[' && p->cursor[1] == 'q') { + _hmml_parse_quote(p, &anno); + } + + // convert all markers to lowercase + for(size_t i = 0; i < anno.marker_count; ++i) { + HMML_Marker* m = anno.markers + i; + for(char* c = m->marker; *c; ++c) { + if(*c >= 'A' && *c <= 'Z') { + *c = (*c - ('A' - 'a')); + } + } + } + + _hmml_persist_array(p, &p->out.annotations, &p->out.annotation_count, anno); + } +} + +static void _hmml_parse_video(struct _hmml_parser* p) +{ + struct str_attr { + struct _hmml_str str; + char** dest; + } str_attrs[] = { + { HSTR("member") , &p->out.metadata.member }, + { HSTR("stream_platform"), &p->out.metadata.stream_platform }, + { HSTR("stream_username"), &p->out.metadata.stream_username }, + { HSTR("project") , &p->out.metadata.project }, + { HSTR("title") , &p->out.metadata.title }, + { HSTR("vod_platform") , &p->out.metadata.vod_platform }, + { HSTR("id") , &p->out.metadata.id }, + { HSTR("template") , &p->out.metadata.template }, + { HSTR("medium") , &p->out.metadata.medium }, + { HSTR("output") , &p->out.metadata.output }, + }; + + for(;;) { +next_attr: + _hmml_skip_ws(p); + + if(*p->cursor == ']') { + ++p->cursor; + _hmml_parse_annotations(p); + return; + } + + struct _hmml_str key, value; + _hmml_read_kv(p, &key, &value); + + for(size_t i = 0; i < countof(str_attrs); ++i) { + struct str_attr* s = str_attrs + i; + if(_hmml_str_eq(key, s->str)) { + *s->dest = _hmml_persist_str(p, value); + goto next_attr; + } + } + + if(_hmml_str_eq(key, HSTR("credit"))) { + HMML_Credit credit = {}; + + char* colon = strchr(value.ptr, ':'); + if(colon) { + *colon = '\0'; + credit.name = _hmml_persist_str(p, (struct _hmml_str){ value.ptr, colon - value.ptr }); + credit.role = _hmml_persist_str(p, (struct _hmml_str){ colon+1, value.len - ((colon+1) - value.ptr) }); + } else { + credit.name = _hmml_persist_str(p, value); + } + + _hmml_persist_array(p, &p->out.metadata.credits, &p->out.metadata.credit_count, credit); + + goto next_attr; + } + + HMML_VideoCustomMetaData custom = { + .key = _hmml_persist_str(p, key), + .value = _hmml_persist_str(p, value), + }; + + _hmml_persist_array(p, &p->out.metadata.custom, &p->out.metadata.custom_count, custom); + } +} + +HMML_Output hmml_parse(const char* string) +{ + struct _hmml_parser p = { + .mem = string, + .cursor = string, + .line = 1, + }; + + if(setjmp(p.err_buf) == 1) { + // if it returns 1, an error happened + return p.out; + } + + static const struct _hmml_str prefix = HSTR("[video"); + if(strncasecmp(p.cursor, prefix.ptr, prefix.len)) { + _hmml_err(&p, "Missing initial video tag."); + } else { + p.cursor += prefix.len; + _hmml_parse_video(&p); + } + + p.out.free_list = p.free_list; + p.out.well_formed = 1; + return p.out; +} + +void hmml_free(HMML_Output* out) +{ + if(!out->free_list) { + return; + } + + for(uintptr_t i = 2; i < ((uintptr_t*)out->free_list)[1]; ++i) { + free(((void**)out->free_list)[i]); + } + + free(out->free_list); +} + +const struct HMML_Version hmml_version = { + 2, 0, 0 +}; + +#undef HSTX +#undef HSTR + +#endif diff --git a/hmmlib2/utils/Makefile b/hmmlib2/utils/Makefile new file mode 100644 index 0000000..4ae97d6 --- /dev/null +++ b/hmmlib2/utils/Makefile @@ -0,0 +1,2 @@ +hmmldump: dump.c ../hmmlib.h stb_sb.h + gcc -Wall -Wextra -I.. -g $< -o $@ diff --git a/hmmlib2/utils/dump.c b/hmmlib2/utils/dump.c new file mode 100644 index 0000000..2be6d39 --- /dev/null +++ b/hmmlib2/utils/dump.c @@ -0,0 +1,192 @@ +#define HMMLIB_IMPLEMENTATION +#include "hmmlib.h" +#include "stb_sb.h" + +typedef struct { + char* text; + int* lines; +} Index; + +static Index* index_find(Index* base, const char* text) +{ + for(size_t i = 0; i < sb_count(base); ++i){ + if(strcmp(base[i].text, text) == 0){ + return base + i; + } + } + return NULL; +} + +void hmml_dump(HMML_Output* hmml) +{ + if(!hmml){ + puts("(null)"); + return; + } + + if(!hmml->well_formed){ + printf("Error:%d:%d %s\n", hmml->error.line, hmml->error.col, hmml->error.message); + return; + } + + puts("Annotations:"); + for(size_t i = 0; i < hmml->annotation_count; ++i){ + HMML_Annotation* a = hmml->annotations + i; + + char time_buf[256]; + char* tp = time_buf; + + if(a->h) { + *tp++ = (a->h%10) + '0'; + sprintf(tp, ":%02d:%02d", a->m, a->s); + } else { + sprintf(tp, " %2d:%02d", a->m, a->s); + } + + printf("\t%3d [%s] [%s]\n", a->line, time_buf, a->text); + } + + Index* authors = NULL; + Index* markers[HMML_MARKER_COUNT] = {}; + int max_text_len = 0; + + for(size_t i = 0; i < hmml->annotation_count; ++i){ + HMML_Annotation* a = hmml->annotations + i; + + if(a->author){ + int len = strlen(a->author); + if(len > max_text_len){ + max_text_len = len; + } + + Index* idx; + if(!(idx = index_find(authors, a->author))){ + Index x = { .text = a->author }; + sb_push(authors, x); + idx = &sb_last(authors); + } + + sb_push(idx->lines, a->line); + } + } + + for(size_t i = 0; i < hmml->annotation_count; ++i){ + HMML_Annotation* a = hmml->annotations + i; + + for(size_t j = 0; j < a->marker_count; ++j){ + int type = a->markers[j].type; + char* text = a->markers[j].marker; + + int len = strlen(text); + if(len > max_text_len){ + max_text_len = len; + } + + Index* idx; + if(!(idx = index_find(markers[type], text))){ + Index x = { .text = text }; + sb_push(markers[type], x); + idx = &sb_last(markers[type]); + } + + sb_push(idx->lines, a->line); + } + } + + puts("Authors:"); + for(size_t i = 0; i < sb_count(authors); ++i){ + printf("\t %*s: ", max_text_len, authors[i].text); + for(size_t j = 0; j < sb_count(authors[i].lines); ++j){ + printf("%3d ", authors[i].lines[j]); + } + puts(""); + } + + + static const char* m_tags[HMML_MARKER_COUNT] = { "Categories", "Members", "Projects" }; + + for(size_t i = 0; i < HMML_MARKER_COUNT; ++i){ + printf("%s:\n", m_tags[i]); + for(size_t j = 0; j < sb_count(markers[i]); ++j){ + printf("\t %*s: ", max_text_len, markers[i][j].text); + for(size_t k = 0; k < sb_count(markers[i][j].lines); ++k){ + printf("%3d ", markers[i][j].lines[k]); + } + puts(""); + } + } + + static const char* r_tags[] = { "Site", "Page", "URL", "Title", "Article", "Author", "Editor", "Publisher", "ISBN" }; + puts("References:"); + for(size_t i = 0; i < hmml->annotation_count; ++i){ + HMML_Annotation* a = hmml->annotations + i; + for(size_t j = 0; j < a->reference_count; ++j){ + printf("\t%3d ", a->line); + HMML_Reference* r = a->references + j; + for(size_t k = 0; k < countof(r_tags); ++k){ + char* item = ((char**)r)[k]; + if(item){ + printf("[%s = %s] ", r_tags[k], item); + } + } + puts(""); + } + } + + puts("Quotes:"); + for(size_t i = 0; i < hmml->annotation_count; ++i){ + HMML_Annotation* a = hmml->annotations + i; + if(a->quote.present){ + if(a->quote.author){ + printf("\t%3d [Quote #%d, by %s]", a->line, a->quote.id, a->quote.author); + } else { + printf("\t%3d [Quote #%d]", a->line, a->quote.id); + } + puts(""); + } + } + + for(size_t i = 0; i < sb_count(authors); ++i){ + sb_free(authors[i].lines); + } + sb_free(authors); + + for(size_t i = 0; i < HMML_MARKER_COUNT; ++i){ + for(size_t j = 0; j < sb_count(markers[i]); ++j){ + sb_free(markers[i][j].lines); + } + sb_free(markers[i]); + } +} + +int main(int argc, char** argv) +{ + if(argc < 2) { + fprintf(stderr, "Usage: %s [file]\n", argv[0]); + return 1; + } + + FILE* f = fopen(argv[1], "r"); + if(!f) { + perror(argv[1]); + return 1; + } + + fseek(f, 0, SEEK_END); + long size = ftell(f); + rewind(f); + + char* mem = malloc(size+1); + mem[size] = 0; + + fread(mem, 1, size, f); + fclose(f); + + HMML_Output out = hmml_parse(mem); + free(mem); + + // asm("int3"); + hmml_dump(&out); + + hmml_free(&out); +} diff --git a/hmmlib2/obbg032_1.hmml b/hmmlib2/utils/obbg032_1.hmml similarity index 100% rename from hmmlib2/obbg032_1.hmml rename to hmmlib2/utils/obbg032_1.hmml diff --git a/hmmlib2/utils/stb_sb.h b/hmmlib2/utils/stb_sb.h new file mode 100644 index 0000000..6c1a306 --- /dev/null +++ b/hmmlib2/utils/stb_sb.h @@ -0,0 +1,59 @@ +// stb stretchy_buffer.h v1.02 nothings.org/stb +// with custom addtions sb_end, sb_pop, sb_erase + +#ifndef STB_STRETCHY_BUFFER_H_INCLUDED +#define STB_STRETCHY_BUFFER_H_INCLUDED + +#ifndef NO_STRETCHY_BUFFER_SHORT_NAMES +#define sb_free stb_sb_free +#define sb_push stb_sb_push +#define sb_count stb_sb_count +#define sb_add stb_sb_add +#define sb_last stb_sb_last +#define sb_end stb_sb_end +#define sb_pop stb_sb_pop +#define sb_erase stb_sb_erase +#define sb_each stb_sb_each +#endif + +#define stb_sb_free(a) ((a) ? free(stb__sbraw(a)),(a)=0,0 : 0) +#define stb_sb_push(a,v) (stb__sbmaybegrow(a,1), (a)[stb__sbn(a)++] = (v)) +#define stb_sb_count(a) ((a) ? stb__sbn(a) : 0) +#define stb_sb_add(a,n) (stb__sbmaybegrow(a,n), stb__sbn(a)+=(n), &(a)[stb__sbn(a)-(n)]) +#define stb_sb_last(a) ((a)[stb__sbn(a)-1]) +#define stb_sb_end(a) ((a) ? (a) + stb__sbn(a) : 0) +#define stb_sb_pop(a) (--stb__sbn(a)) +#define stb_sb_erase(a,i) ((a) ? memmove((a)+(i), (a)+(i)+1, sizeof(*(a))*((--stb__sbn(a))-(i))),0 : 0); + +#define stb__sbraw(a) ((size_t *) (a) - 2) +#define stb__sbm(a) stb__sbraw(a)[0] +#define stb__sbn(a) stb__sbraw(a)[1] + +#define stb__sbneedgrow(a,n) ((a)==0 || stb__sbn(a)+(n) >= stb__sbm(a)) +#define stb__sbmaybegrow(a,n) (stb__sbneedgrow(a,(n)) ? stb__sbgrow(a,n) : 0) +#define stb__sbgrow(a,n) ((a) = stb__sbgrowf((a), (n), sizeof(*(a)))) + +#define stb_sb_each(n,h) for(typeof(h) n = h; n < sb_end(h); ++n) + +#include + +static inline void * stb__sbgrowf(void *arr, int increment, int itemsize) +{ + size_t inc_cur = arr ? stb__sbm(arr) + (stb__sbm(arr) >> 1) : 0; + size_t min_needed = stb_sb_count(arr) + increment; + size_t m = inc_cur > min_needed ? inc_cur : min_needed; + size_t *p = (size_t *) realloc(arr ? stb__sbraw(arr) : 0, itemsize * m + sizeof(size_t)*2); + if (p) { + if (!arr) + p[1] = 0; + p[0] = m; + return p+2; + } else { + #ifdef STRETCHY_BUFFER_OUT_OF_MEMORY + STRETCHY_BUFFER_OUT_OF_MEMORY ; + #endif + return (void *) (2*sizeof(size_t)); // try to force a NULL pointer exception later + } +} + +#endif // STB_STRETCHY_BUFFER_H_INCLUDED