#if 0 gcc -g "$0" -o "${0%%.c}" exit 0 #endif #include "hmmlib.h" #include #include #include #include #include // // TODO: // parse quotes // marker parameter + episode number // track line numbers somehow // seems to be only needed for the error-func, so... // could probably do it after-the-fact, counting \n until we hit p->cursor // port over the hmml_dump function // test various files // cleanup // #define HSTX(x) x, sizeof(x)-1 #define HSTR(x) (struct _hmml_str){ HSTX(x) } #ifndef MALLOC #define MALLOC malloc #endif #ifndef REALLOC #define REALLOC realloc #endif #ifndef countof #define countof(x) (sizeof(x)/sizeof(*x)) #endif #define _hmml_debug(...) //#define _hmml_debug printf struct _hmml_parser { HMML_Output out; char* cursor; jmp_buf err_buf; intptr_t* free_list; }; struct _hmml_str { char* ptr; size_t len; }; // memory management boilerplate stuff static void* _hmml_store_ptr(struct _hmml_parser* p, void* input) { uintptr_t* ptr; if(p->free_list) { ptr = p->free_list; if(ptr[1] + 1 == ptr[0]) { size_t n = ptr[0] << 1; ptr = REALLOC(ptr, n * sizeof(uintptr_t)); ptr[0] = n; } ptr[ptr[1]] = (intptr_t)input; ptr[1]++; } else { ptr = MALLOC(8 * sizeof(uintptr_t)); ptr[0] = 8; ptr[1] = 3; ptr[2] = (intptr_t)input; } p->free_list = ptr; return input; } static char* _hmml_persist_str(struct _hmml_parser* p, const struct _hmml_str str) { char* mem = MALLOC(str.len+1); memcpy(mem, str.ptr, str.len); mem[str.len] = '\0'; return _hmml_store_ptr(p, mem); } static void _hmml_persist_array_fn(struct _hmml_parser* p, void** out, size_t* out_count, void* in, size_t in_size) { void* base; if(!*out) { base = MALLOC(in_size + sizeof(size_t)); _hmml_store_ptr(p, base); *(size_t*)base = p->free_list[1]-1; } else { base = (char*)(*out) - sizeof(size_t); base = REALLOC(base, (*out_count + 1) * in_size + sizeof(size_t)); size_t free_list_off = *(size_t*)base; p->free_list[free_list_off] = (intptr_t)base; } *out = (char*)base + sizeof(size_t); memcpy((char*)*out + (*out_count * in_size), in, in_size); ++(*out_count); } #define _hmml_persist_array(p, out, out_count, in) \ _hmml_persist_array_fn((p), (void**)(out), (out_count), &(in), sizeof(in)) // error handling #define _hmml_err(p, fmt, ...) \ _hmml_err_fn((p), fmt "\n", ##__VA_ARGS__) __attribute__((noreturn)) static void _hmml_err_fn(struct _hmml_parser* p, const char* fmt, ...) { static char error_buf[4096]; va_list va; va_start(va, fmt); int n = vsnprintf(error_buf, sizeof(error_buf), fmt, va); va_end(va); // TODO: figure out / keep track of the line number p->out.error.message = _hmml_persist_str(p, (struct _hmml_str){ error_buf, n }); longjmp(p->err_buf, 1); } // actual parsing stuff static void _hmml_skip_ws(struct _hmml_parser* p) { p->cursor += strspn(p->cursor, " \t\r\n"); } static _Bool _hmml_str_eq(struct _hmml_str a, struct _hmml_str b) { return a.len == b.len && memcmp(a.ptr, b.ptr, a.len) == 0; } static _Bool _hmml_unesc(char in, char* out) { if(strchr("[]:@~\\\"", in)) { *out = in; return 1; } else { return 0; } } static char* _hmml_read_attr(struct _hmml_parser* p, char* mem, size_t mem_size) { char* src = p->cursor; char* dst = mem; if(*src == '"') { ++src; while(*src && *src != '"' && src - p->cursor < mem_size) { char converted; if(*src == '\\' && _hmml_unesc(src[1], &converted)) { *dst++ = converted; src += 2; } else { *dst++ = *src++; } } if(*src != '"') { _hmml_err(p, "Partially quoted attribute"); } *dst = '\0'; p->cursor = src+1; } else { size_t n = strcspn(src, " ]\r\n\t"); if(n >= mem_size) { _hmml_err(p, "Attribute [%.10s...] too long", p->cursor); } memcpy(dst, src, n); dst += n; *dst = '\0'; p->cursor += n; } return dst; } static void _hmml_read_kv(struct _hmml_parser* p, struct _hmml_str* key, struct _hmml_str* val) { static char key_memory[64]; static char val_memory[1024]; size_t key_len = strcspn(p->cursor, " \r\n\t="); if(key_len >= sizeof(key_memory)) { _hmml_err(p, "Attribute key [%.10s...] too long", p->cursor); } memcpy(key_memory, p->cursor, key_len); key_memory[key_len] = '\0'; p->cursor += key_len; _hmml_skip_ws(p); if(*p->cursor != '=') { _hmml_err(p, "Expected '=', got [%.3s]", p->cursor); } ++p->cursor; char* src = p->cursor; char* dst = _hmml_read_attr(p, val_memory, sizeof(val_memory)); _hmml_debug("read kv [%s] = [%s]\n", key_memory, val_memory); key->ptr = key_memory; key->len = key_len; val->ptr = val_memory; val->len = dst - val_memory; } static HMML_Marker _hmml_parse_marker(struct _hmml_parser* p) { static char marker_mem[4096]; // the extended markers are inside [ ] and can contain parameters _Bool extended = *p->cursor == '['; if(extended) { ++p->cursor; } HMML_Marker marker = {}; char c = *p->cursor; if(c == '~') { marker.type = HMML_PROJECT; } else if(c == '@') { marker.type = HMML_MEMBER; } else if(c == ':') { marker.type = HMML_CATEGORY; } else { _hmml_err(p, "Unknown marker type"); } ++p->cursor; char* end = _hmml_read_attr(p, marker_mem, sizeof(marker_mem)); marker.marker = _hmml_persist_str(p, (struct _hmml_str){ marker_mem, end - marker_mem }); _hmml_skip_ws(p); if(*p->cursor == '#') { // TODO: marker.episode = smth } else { // TODO: marker.parameter = _hmml_persist_str(p, (struct _hmml_str){ }); } if(extended) { if(*p->cursor != ']') { _hmml_err(p, "Expected ']'"); } ++p->cursor; } return marker; } static HMML_Reference _hmml_parse_ref(struct _hmml_parser* p) { HMML_Reference ref = {}; struct str_attr { struct _hmml_str str; char** dest; } str_attrs[] = { { HSTR("site") , &ref.site }, { HSTR("page") , &ref.page }, { HSTR("url") , &ref.url }, { HSTR("title") , &ref.title }, { HSTR("article") , &ref.article }, { HSTR("author") , &ref.author }, { HSTR("editor") , &ref.editor }, { HSTR("publisher"), &ref.publisher }, { HSTR("isbn") , &ref.isbn }, }; for(;;) { next_attr: _hmml_skip_ws(p); if(*p->cursor == ']') { ++p->cursor; break; } struct _hmml_str key, value; _hmml_read_kv(p, &key, &value); for(int i = 0; i < countof(str_attrs); ++i) { struct str_attr* s = str_attrs + i; if(_hmml_str_eq(key, s->str)) { *s->dest = _hmml_persist_str(p, value); goto next_attr; } } _hmml_err(p, "Unknown reference attribute"); } return ref; } static void _hmml_parse_timecode(struct _hmml_parser* p, HMML_Annotation* anno) { int h = 0, m = 0, s = 0, offset = 0; int count = sscanf(p->cursor, "[%u:%u%n", &m, &s, &offset); if(count < 2) { _hmml_err(p, "Unable to parse timecode"); } p->cursor += offset; char c = *p->cursor; if(c == ':') { int tmp; offset = 0; if(sscanf(p->cursor, ":%u]%n", &tmp, &offset) != 1 || offset == 0) { _hmml_err(p, "Unable to parse 3-part timecode"); } h = m; m = s; s = tmp; p->cursor += offset; } else if(c != ']') { _hmml_err(p, "Unable to parse timecode"); } else { ++p->cursor; } if(s >= 60) { _hmml_err(p, "Seconds cannot exceed 59"); } if(m >= 60) { _hmml_err(p, "Minutes cannot exceed 59"); } anno->h = h; anno->m = m; anno->s = s; } static void _hmml_parse_text(struct _hmml_parser* p, HMML_Annotation* anno) { static char text_mem[4096]; char* out = text_mem; memset(text_mem, 0, sizeof(text_mem)); for(;;) { size_t n = strcspn(p->cursor, "\\[]:@~"); char c = p->cursor[n]; if(out + n > text_mem + sizeof(text_mem)) { _hmml_err(p, "Not enough text memory"); } memcpy(out, p->cursor, n); p->cursor += n; out += n; if(c == '\0') { _hmml_err(p, "Unexpected EOF"); } else if(c == ']') { ++p->cursor; break; } else if(c == '\\') { *out++ = p->cursor[1]; p->cursor += 2; } else if(c == '[') { if(strncmp(p->cursor + 1, "ref", 3) == 0) { p->cursor += 4; HMML_Reference ref = _hmml_parse_ref(p); _hmml_persist_array(p, &anno->references, &anno->reference_count, ref); } else { HMML_Marker m = _hmml_parse_marker(p); _hmml_persist_array(p, &anno->markers, &anno->marker_count, m); } } else { // it is a @ ~ or : marker without parameters HMML_Marker m = _hmml_parse_marker(p); _hmml_persist_array(p, &anno->markers, &anno->marker_count, m); } if(out - text_mem >= sizeof(text_mem)) { _hmml_err(p, "Not enough text memory"); } } anno->text = _hmml_persist_str(p, (struct _hmml_str){ text_mem, out - text_mem }); } static void _hmml_parse_annotations(struct _hmml_parser* p) { for(;;) { _hmml_skip_ws(p); if(*p->cursor == '\0') { _hmml_err(p, "Unexpected EOF"); } if(strncmp(p->cursor, "[/video]", 8) == 0) { break; } HMML_Annotation anno = {}; _hmml_parse_timecode(p, &anno); if(*p->cursor != '[') { _hmml_err(p, "Expected '['"); } if(p->cursor[1] == '@') { HMML_Marker m = _hmml_parse_marker(p); anno.author = m.marker; } ++p->cursor; _hmml_parse_text(p, &anno); if(p->cursor[0] == '[' && p->cursor[1] == ':') { HMML_Marker m = _hmml_parse_marker(p); _hmml_persist_array(p, &anno.markers, &anno.marker_count, m); } if(p->cursor[0] == '[' && p->cursor[1] == 'q') { //_hmml_parse_quote(); } _hmml_persist_array(p, &p->out.annotations, &p->out.annotation_count, anno); } } static void _hmml_parse_video(struct _hmml_parser* p) { struct str_attr { struct _hmml_str str; char** dest; } str_attrs[] = { { HSTR("member") , &p->out.metadata.member }, { HSTR("stream_platform"), &p->out.metadata.stream_platform }, { HSTR("stream_username"), &p->out.metadata.stream_username }, { HSTR("project") , &p->out.metadata.project }, { HSTR("title") , &p->out.metadata.title }, { HSTR("vod_platform") , &p->out.metadata.vod_platform }, { HSTR("id") , &p->out.metadata.id }, { HSTR("template") , &p->out.metadata.template }, { HSTR("medium") , &p->out.metadata.medium }, { HSTR("output") , &p->out.metadata.output }, }; for(;;) { next_attr: _hmml_skip_ws(p); if(*p->cursor == ']') { ++p->cursor; return _hmml_parse_annotations(p); } struct _hmml_str key, value; _hmml_read_kv(p, &key, &value); for(int i = 0; i < countof(str_attrs); ++i) { struct str_attr* s = str_attrs + i; if(_hmml_str_eq(key, s->str)) { *s->dest = _hmml_persist_str(p, value); goto next_attr; } } if(_hmml_str_eq(key, HSTR("credit"))) { HMML_Credit credit = {}; char* colon = strchr(value.ptr, ':'); if(colon) { *colon = '\0'; credit.name = _hmml_persist_str(p, (struct _hmml_str){ value.ptr, colon - value.ptr }); credit.role = _hmml_persist_str(p, (struct _hmml_str){ colon+1, value.len - ((colon+1) - value.ptr) }); } else { credit.name = _hmml_persist_str(p, value); } _hmml_persist_array(p, &p->out.metadata.credits, &p->out.metadata.credit_count, credit); goto next_attr; } HMML_VideoCustomMetaData custom = { .key = _hmml_persist_str(p, key), .value = _hmml_persist_str(p, value), }; _hmml_persist_array(p, &p->out.metadata.custom, &p->out.metadata.custom_count, custom); } } HMML_Output hmml_parse(char* mem) { struct _hmml_parser p = {}; p.cursor = mem; if(setjmp(p.err_buf) == 1) { // if it returns 1, an error happened return p.out; } static const struct _hmml_str prefix = HSTR("[video"); if(strncasecmp(p.cursor, prefix.ptr, prefix.len)) { _hmml_err(&p, "Missing initial video tag."); } else { p.cursor += prefix.len; _hmml_parse_video(&p); } p.out.free_list = p.free_list; p.out.well_formed = 1; return p.out; } void hmml_free(HMML_Output* out) { if(!out->free_list) { return; } for(int i = 2; i < ((uintptr_t*)out->free_list)[1]; ++i) { free(((void**)out->free_list)[i]); } free(out->free_list); } // for testing, should be removed or #if 0'd for the library int main(int argc, char** argv) { if(argc < 2) { fprintf(stderr, "Usage: %s [file]\n", argv[0]); return 1; } FILE* f = fopen(argv[1], "r"); if(!f) { perror(argv[1]); return 1; } fseek(f, 0, SEEK_END); long size = ftell(f); rewind(f); char* mem = malloc(size+1); mem[size] = 0; fread(mem, 1, size, f); fclose(f); HMML_Output out = hmml_parse(mem); free(mem); // to look at "out" in gdb asm("int3"); hmml_free(&out); }