#ifndef HMMLIB_H_ #define HMMLIB_H_ #include #include // Data structures typedef struct { char* name; char* role; } HMML_Credit; typedef struct { char* key; char* value; } HMML_VideoCustomMetaData; typedef struct { char* stream_platform; char* project; char* title; char* vod_platform; char* id; char* output; char* template; char* medium; HMML_Credit* credits; size_t credit_count; HMML_Credit* uncredits; size_t uncredit_count; HMML_VideoCustomMetaData* custom; size_t custom_count; } HMML_VideoMetaData; typedef struct { char* site; char* page; char* url; char* title; char* article; char* author; char* editor; char* publisher; char* isbn; int offset; } HMML_Reference; typedef enum { HMML_CATEGORY, HMML_MEMBER, HMML_PROJECT, HMML_MARKER_COUNT, } HMML_MarkerType; typedef struct { HMML_MarkerType type; char* marker; char* parameter; char* episode; int offset; } HMML_Marker; typedef struct { _Bool present; int id; char* author; } HMML_Quote; typedef struct { int line; int h, m, s; char* text; char* author; HMML_Reference* references; size_t reference_count; HMML_Marker* markers; size_t marker_count; HMML_Quote quote; } HMML_Timestamp; typedef struct { int line; int col; char* message; } HMML_Error; typedef struct { _Bool well_formed; HMML_VideoMetaData metadata; HMML_Timestamp* timestamps; size_t timestamp_count; HMML_Error error; void* free_list; // implementation detail } HMML_Output; // Functions HMML_Output hmml_parse (const char* string); void hmml_free (HMML_Output* output); // Version extern const struct HMML_Version { int Major, Minor, Patch; } hmml_version; #endif #ifdef HMMLIB_IMPLEMENTATION #include #include #include #include #include #define HSTX(x) x, sizeof(x)-1 #define HSTR(x) (const struct _hmml_str){ HSTX(x) } #ifndef MALLOC #define MALLOC malloc #endif #ifndef REALLOC #define REALLOC realloc #endif #ifndef countof #define countof(x) (sizeof(x)/sizeof(*x)) #endif #define _hmml_debug(...) //#define _hmml_debug printf struct _hmml_parser { HMML_Output out; const char* mem; const char* cursor; jmp_buf err_buf; uintptr_t* free_list; int line; }; struct _hmml_str { const char* ptr; size_t len; }; // memory management boilerplate stuff static void* _hmml_store_ptr(struct _hmml_parser* p, void* input) { uintptr_t* ptr; if(p->free_list) { ptr = p->free_list; if(ptr[1] + 1 == ptr[0]) { size_t n = ptr[0] << 1; ptr = REALLOC(ptr, n * sizeof(uintptr_t)); ptr[0] = n; } ptr[ptr[1]] = (uintptr_t)input; ptr[1]++; } else { ptr = MALLOC(8 * sizeof(uintptr_t)); ptr[0] = 8; ptr[1] = 3; ptr[2] = (uintptr_t)input; } p->free_list = ptr; return input; } static char* _hmml_persist_str(struct _hmml_parser* p, const struct _hmml_str str) { char* mem = MALLOC(str.len+1); memcpy(mem, str.ptr, str.len); mem[str.len] = '\0'; return _hmml_store_ptr(p, mem); } static void _hmml_persist_array_fn(struct _hmml_parser* p, void** out, size_t* out_count, void* in, size_t in_size) { void* base; if(!*out) { base = MALLOC(in_size + sizeof(size_t)); _hmml_store_ptr(p, base); *(size_t*)base = p->free_list[1]-1; } else { base = (char*)(*out) - sizeof(size_t); base = REALLOC(base, (*out_count + 1) * in_size + sizeof(size_t)); size_t free_list_off = *(size_t*)base; p->free_list[free_list_off] = (intptr_t)base; } *out = (char*)base + sizeof(size_t); memcpy((char*)*out + (*out_count * in_size), in, in_size); ++(*out_count); } #define _hmml_persist_array(p, out, out_count, in) \ _hmml_persist_array_fn((p), (void**)(out), (out_count), &(in), sizeof(in)) // error handling #define _hmml_err(p, fmt, ...) \ _hmml_err_fn((p), fmt "\n", ##__VA_ARGS__) __attribute__((noreturn)) static void _hmml_err_fn(struct _hmml_parser* p, const char* fmt, ...) { static char error_buf[4096]; va_list va; va_start(va, fmt); int n = vsnprintf(error_buf, sizeof(error_buf), fmt, va); va_end(va); int line = 1, col = 1; for(const char* ptr = p->mem; ptr != p->cursor; ++ptr) { if(*ptr == '\n') { ++line; col = 1; } else { ++col; } } p->out.error.message = _hmml_persist_str(p, (struct _hmml_str){ error_buf, n }); p->out.error.line = line; p->out.error.col = col; longjmp(p->err_buf, 1); } // actual parsing stuff static void _hmml_skip_ws(struct _hmml_parser* p) { for(;;) { uint8_t c = *p->cursor; if(c && c <= ' ') { if(c == '\n') { ++p->line; } ++p->cursor; } else { break; } } } static _Bool _hmml_str_eq(struct _hmml_str a, struct _hmml_str b) { return a.len == b.len && memcmp(a.ptr, b.ptr, a.len) == 0; } static _Bool _hmml_unesc(char in, char* out) { if(strchr("[]:@~\\\"", in)) { *out = in; return 1; } else { return 0; } } static char* _hmml_read_attr(struct _hmml_parser* p, char* mem, size_t mem_size, _Bool break_on_punct) { const char* src = p->cursor; char* dst = mem; if(*src == '"') { ++src; while(*src && *src != '"' && (size_t)(src - p->cursor) < mem_size) { char converted; if(*src == '\\' && _hmml_unesc(src[1], &converted)) { *dst++ = converted; src += 2; } else { *dst++ = *src++; } } if(*src != '"') { _hmml_err(p, "Partially quoted attribute"); } *dst = '\0'; p->cursor = src+1; } else { const char* breaks = break_on_punct ? " ]\r\n\t:,'-.#=[\\?!…()\"%" : " ]\r\n\t" ; size_t n = strcspn(src, breaks); if(n >= mem_size) { _hmml_err(p, "Attribute [%.10s...] too long", p->cursor); } memcpy(dst, src, n); dst += n; *dst = '\0'; p->cursor += n; } return dst; } static void _hmml_read_kv(struct _hmml_parser* p, struct _hmml_str* key, struct _hmml_str* val) { static char key_memory[64]; static char val_memory[1024]; size_t key_len = strcspn(p->cursor, " \r\n\t="); if(key_len >= sizeof(key_memory)) { _hmml_err(p, "Attribute key [%.10s...] too long", p->cursor); } memcpy(key_memory, p->cursor, key_len); key_memory[key_len] = '\0'; p->cursor += key_len; _hmml_skip_ws(p); if(*p->cursor != '=') { _hmml_err(p, "Expected '=', got [%.3s]", p->cursor); } ++p->cursor; char* end = _hmml_read_attr(p, val_memory, sizeof(val_memory), 0); _hmml_debug("read kv [%s] = [%s]\n", key_memory, val_memory); key->ptr = key_memory; key->len = key_len; val->ptr = val_memory; val->len = end - val_memory; } static HMML_Marker _hmml_parse_marker(struct _hmml_parser* p) { static char marker_mem[4096]; // the extended markers are inside [ ] and can contain parameters _Bool extended = *p->cursor == '['; if(extended) { ++p->cursor; } HMML_Marker marker = { .offset = -1, }; char c = *p->cursor; if(c == '~') { marker.type = HMML_PROJECT; } else if(c == '@') { marker.type = HMML_MEMBER; } else if(c == ':') { marker.type = HMML_CATEGORY; } else { _hmml_err(p, "Unknown marker type"); } ++p->cursor; char* end = _hmml_read_attr(p, marker_mem, sizeof(marker_mem), !extended); marker.marker = _hmml_persist_str(p, (struct _hmml_str){ marker_mem, end - marker_mem }); if(extended) { _hmml_skip_ws(p); if(*p->cursor == '#') { ++p->cursor; size_t n = strcspn(p->cursor, " "); marker.episode = _hmml_persist_str(p, (struct _hmml_str){ p->cursor, n }); p->cursor += n + 1; } if(*p->cursor != ']') { const char* end = p->cursor; for(;;) { if(!*end) { break; } char converted; if(*end == '\\' && _hmml_unesc(end[1], &converted)) { end += 2; } else if(*end == ']'){ break; } else { ++end; } } marker.parameter = _hmml_persist_str(p, (struct _hmml_str){ p->cursor, end - p->cursor }); p->cursor = end; } if(*p->cursor != ']') { _hmml_err(p, "Expected ']'"); } ++p->cursor; } return marker; } static HMML_Reference _hmml_parse_ref(struct _hmml_parser* p) { HMML_Reference ref = { .offset = -1, }; struct str_attr { struct _hmml_str str; char** dest; } str_attrs[] = { { HSTR("site") , &ref.site }, { HSTR("page") , &ref.page }, { HSTR("url") , &ref.url }, { HSTR("title") , &ref.title }, { HSTR("article") , &ref.article }, { HSTR("author") , &ref.author }, { HSTR("editor") , &ref.editor }, { HSTR("publisher"), &ref.publisher }, { HSTR("isbn") , &ref.isbn }, }; for(;;) { next_attr: _hmml_skip_ws(p); if(*p->cursor == ']') { ++p->cursor; break; } struct _hmml_str key, value; _hmml_read_kv(p, &key, &value); for(size_t i = 0; i < countof(str_attrs); ++i) { struct str_attr* s = str_attrs + i; if(_hmml_str_eq(key, s->str)) { *s->dest = _hmml_persist_str(p, value); goto next_attr; } } _hmml_err(p, "Unknown reference attribute"); } return ref; } static void _hmml_parse_timecode(struct _hmml_parser* p, HMML_Timestamp* ts) { unsigned int h = 0, m = 0, s = 0; int offset = 0; int count = sscanf(p->cursor, "[%u:%u%n", &m, &s, &offset); if(count < 2) { _hmml_err(p, "Unable to parse timecode"); } p->cursor += offset; char c = *p->cursor; if(c == ':') { unsigned int tmp; offset = 0; if(sscanf(p->cursor, ":%u]%n", &tmp, &offset) != 1 || offset == 0) { _hmml_err(p, "Unable to parse 3-part timecode"); } h = m; m = s; s = tmp; p->cursor += offset; } else if(c != ']') { _hmml_err(p, "Unable to parse timecode"); } else { ++p->cursor; } if(s >= 60) { _hmml_err(p, "Seconds cannot exceed 59"); } if(m >= 60) { _hmml_err(p, "Minutes cannot exceed 59"); } ts->h = h; ts->m = m; ts->s = s; } static void _hmml_store_marker(struct _hmml_parser* p, HMML_Timestamp* ts, char** out, char* text_mem, size_t text_mem_size) { HMML_Marker m = _hmml_parse_marker(p); m.offset = (*out) - text_mem; _hmml_persist_array(p, &ts->markers, &ts->marker_count, m); const char* marker_text = m.parameter ? m.parameter : m.marker ; size_t text_len = strlen(marker_text); if((*out) + text_len > text_mem + text_mem_size) {\ _hmml_err(p, "Not enough text memory");\ } memcpy(*out, marker_text, text_len); *out += text_len; } static size_t _hmml_parse_text(struct _hmml_parser* p, HMML_Timestamp* ts) { static char text_mem[4096]; char* out = text_mem; memset(text_mem, 0, sizeof(text_mem)); for(;;) { size_t n = strcspn(p->cursor, "\\\n\r[]:@~"); char c = p->cursor[n]; if(out + n > text_mem + sizeof(text_mem)) {\ _hmml_err(p, "Not enough text memory");\ } memcpy(out, p->cursor, n); p->cursor += n; out += n; if(c == '\0') { _hmml_err(p, "Unexpected EOF"); } else if(c == ']') { ++p->cursor; break; } else if(c == '\\') { char converted; if(_hmml_unesc(p->cursor[1], &converted)) { *out++ = converted; p->cursor += 2; } else { *out++ = '\\'; p->cursor++; } } else if(c == '\n' || c == '\r') { ++p->cursor; } else if(c == '[') { if(strncmp(p->cursor + 1, "ref", 3) == 0) { p->cursor += 4; HMML_Reference ref = _hmml_parse_ref(p); ref.offset = out - text_mem; _hmml_persist_array(p, &ts->references, &ts->reference_count, ref); } else { _hmml_store_marker(p, ts, &out, text_mem, sizeof(text_mem)); } } // it is a @ ~ or : marker without parameters else { // if next char is a space, or prev char is not a space*, then it can't be a marker // * unless it's the first char if(strchr(" \t\r\n", p->cursor[1]) || !(out == text_mem || strchr(" \t\r\n", p->cursor[-1]))) { *out++ = c; ++p->cursor; } else { _hmml_store_marker(p, ts, &out, text_mem, sizeof(text_mem)); } } if((size_t)(out - text_mem) >= sizeof(text_mem)) { _hmml_err(p, "Not enough text memory"); } } // trim trailing whitespace while(out > text_mem && (uint8_t)(out[-1]) <= ' ') { out[-1] = '\0'; --out; } size_t text_size = out - text_mem; ts->text = _hmml_persist_str(p, (struct _hmml_str){ text_mem, text_size }); return text_size; } static void _hmml_parse_quote(struct _hmml_parser* p, HMML_Timestamp* ts) { char member[256]; int id; int off = 0; if(sscanf(p->cursor, "[quote %255s %d]%n", member, &id, &off) == 2 && off) { ts->quote.present = 1; ts->quote.id = id; ts->quote.author = _hmml_persist_str(p, (struct _hmml_str){ member, strlen(member) }); } else if(sscanf(p->cursor, "[quote %d]%n", &id, &off) == 1 && off) { ts->quote.present = 1; ts->quote.id = id; } else { _hmml_err(p, "Unable to parse quote"); } p->cursor += off; } static void _hmml_parse_timestamps(struct _hmml_parser* p) { for(;;) { _hmml_skip_ws(p); if(*p->cursor == '\0') { _hmml_err(p, "Unexpected EOF"); } if(strncmp(p->cursor, "[/video]", 8) == 0) { break; } HMML_Timestamp ts = { .line = p->line }; _hmml_parse_timecode(p, &ts); if(*p->cursor != '[') { _hmml_err(p, "Expected '['"); } if(p->cursor[1] == '@') { HMML_Marker m = _hmml_parse_marker(p); ts.author = m.marker; } ++p->cursor; int text_len = _hmml_parse_text(p, &ts); if(p->cursor[0] == '[' && p->cursor[1] == ':') { ++p->cursor; do { HMML_Marker m = _hmml_parse_marker(p); _hmml_persist_array(p, &ts.markers, &ts.marker_count, m); _hmml_skip_ws(p); if(*p->cursor != ':' && *p->cursor != ']') { _hmml_err(p, "Unterminated post-text category node"); } } while(*p->cursor == ':'); ++p->cursor; } if(p->cursor[0] == '[' && p->cursor[1] == 'q') { _hmml_parse_quote(p, &ts); } // convert all markers to lowercase, fix any out of range offsets for(size_t i = 0; i < ts.marker_count; ++i) { HMML_Marker* m = ts.markers + i; for(char* c = m->marker; *c; ++c) { if(*c >= 'A' && *c <= 'Z') { *c = (*c - ('A' - 'a')); } } if(m->offset > text_len) { m->offset = text_len; } } for(size_t i = 0; i < ts.reference_count; ++i) { HMML_Reference* ref = ts.references + i; if(ref->offset > text_len) { ref->offset = text_len; } } _hmml_persist_array(p, &p->out.timestamps, &p->out.timestamp_count, ts); } } static HMML_Credit _hmml_parse_credit(struct _hmml_parser* p, struct _hmml_str value) { HMML_Credit credit = {}; char* colon = strchr(value.ptr, ':'); if(colon) { *colon = '\0'; credit.name = _hmml_persist_str(p, (struct _hmml_str){ value.ptr, colon - value.ptr }); credit.role = _hmml_persist_str(p, (struct _hmml_str){ colon+1, value.len - ((colon+1) - value.ptr) }); } else { credit.name = _hmml_persist_str(p, value); } return credit; } static void _hmml_parse_video(struct _hmml_parser* p) { struct str_attr { struct _hmml_str str; char** dest; } str_attrs[] = { { HSTR("stream_platform"), &p->out.metadata.stream_platform }, { HSTR("project") , &p->out.metadata.project }, { HSTR("title") , &p->out.metadata.title }, { HSTR("vod_platform") , &p->out.metadata.vod_platform }, { HSTR("id") , &p->out.metadata.id }, { HSTR("template") , &p->out.metadata.template }, { HSTR("medium") , &p->out.metadata.medium }, { HSTR("output") , &p->out.metadata.output }, }; for(;;) { next_attr: _hmml_skip_ws(p); if(*p->cursor == ']') { ++p->cursor; _hmml_parse_timestamps(p); return; } struct _hmml_str key, value; _hmml_read_kv(p, &key, &value); for(size_t i = 0; i < countof(str_attrs); ++i) { struct str_attr* s = str_attrs + i; if(_hmml_str_eq(key, s->str)) { *s->dest = _hmml_persist_str(p, value); goto next_attr; } } if(_hmml_str_eq(key, HSTR("credit"))) { HMML_Credit credit = _hmml_parse_credit(p, value); _hmml_persist_array(p, &p->out.metadata.credits, &p->out.metadata.credit_count, credit); goto next_attr; } if(_hmml_str_eq(key, HSTR("uncredit"))) { HMML_Credit uncredit = _hmml_parse_credit(p, value); _hmml_persist_array(p, &p->out.metadata.uncredits, &p->out.metadata.uncredit_count, uncredit); goto next_attr; } HMML_VideoCustomMetaData custom = { .key = _hmml_persist_str(p, key), .value = _hmml_persist_str(p, value), }; _hmml_persist_array(p, &p->out.metadata.custom, &p->out.metadata.custom_count, custom); } } HMML_Output hmml_parse(const char* string) { struct _hmml_parser p = { .mem = string, .cursor = string, .line = 1, }; if(setjmp(p.err_buf) == 1) { // if it returns 1, an error happened p.out.free_list = p.free_list; return p.out; } const struct _hmml_str prefix = HSTR("[video"); if(strncmp(p.cursor, prefix.ptr, prefix.len)) { _hmml_err(&p, "Missing initial video tag."); } else { p.cursor += prefix.len; _hmml_parse_video(&p); } p.out.free_list = p.free_list; p.out.well_formed = 1; return p.out; } void hmml_free(HMML_Output* out) { if(!out->free_list) { return; } for(uintptr_t i = 2; i < ((uintptr_t*)out->free_list)[1]; ++i) { free(((void**)out->free_list)[i]); } free(out->free_list); } const struct HMML_Version hmml_version = { 2, 0, 12 }; #undef HSTX #undef HSTR #endif