#ifndef HMMLIB_H_ #define HMMLIB_H_ #include #include // Data structures typedef struct { char* name; char* role; } HMML_Credit; typedef struct { char* key; char* value; } HMML_VideoCustomMetaData; typedef struct { char* member; char* stream_platform; char* stream_username; char* project; char* title; char* vod_platform; char* id; char* output; char* template; char* medium; HMML_Credit* credits; size_t credit_count; HMML_VideoCustomMetaData* custom; size_t custom_count; } HMML_VideoMetaData; typedef struct { char* site; char* page; char* url; char* title; char* article; char* author; char* editor; char* publisher; char* isbn; } HMML_Reference; typedef enum { HMML_CATEGORY, HMML_MEMBER, HMML_PROJECT, HMML_MARKER_COUNT, } HMML_MarkerType; typedef struct { HMML_MarkerType type; char* marker; char* parameter; char* episode; } HMML_Marker; typedef struct { _Bool present; int id; char* author; } HMML_Quote; typedef struct { int line; int h, m, s; char* text; char* author; HMML_Reference* references; size_t reference_count; HMML_Marker* markers; size_t marker_count; HMML_Quote quote; } HMML_Annotation; typedef struct { int line; int col; char* message; } HMML_Error; typedef struct { _Bool well_formed; HMML_VideoMetaData metadata; HMML_Annotation* annotations; size_t annotation_count; HMML_Error error; void* free_list; // implementation detail } HMML_Output; // Functions HMML_Output hmml_parse (const char* string); void hmml_free (HMML_Output* output); // Version extern const struct HMML_Version { int Major, Minor, Patch; } hmml_version; #endif #ifdef HMMLIB_IMPLEMENTATION #include #include #include #include #include #define HSTX(x) x, sizeof(x)-1 #define HSTR(x) (const struct _hmml_str){ HSTX(x) } #ifndef MALLOC #define MALLOC malloc #endif #ifndef REALLOC #define REALLOC realloc #endif #ifndef countof #define countof(x) (sizeof(x)/sizeof(*x)) #endif #define _hmml_debug(...) //#define _hmml_debug printf struct _hmml_parser { HMML_Output out; const char* mem; const char* cursor; jmp_buf err_buf; uintptr_t* free_list; int line; }; struct _hmml_str { const char* ptr; size_t len; }; // memory management boilerplate stuff static void* _hmml_store_ptr(struct _hmml_parser* p, void* input) { uintptr_t* ptr; if(p->free_list) { ptr = p->free_list; if(ptr[1] + 1 == ptr[0]) { size_t n = ptr[0] << 1; ptr = REALLOC(ptr, n * sizeof(uintptr_t)); ptr[0] = n; } ptr[ptr[1]] = (uintptr_t)input; ptr[1]++; } else { ptr = MALLOC(8 * sizeof(uintptr_t)); ptr[0] = 8; ptr[1] = 3; ptr[2] = (uintptr_t)input; } p->free_list = ptr; return input; } static char* _hmml_persist_str(struct _hmml_parser* p, const struct _hmml_str str) { char* mem = MALLOC(str.len+1); memcpy(mem, str.ptr, str.len); mem[str.len] = '\0'; return _hmml_store_ptr(p, mem); } static void _hmml_persist_array_fn(struct _hmml_parser* p, void** out, size_t* out_count, void* in, size_t in_size) { void* base; if(!*out) { base = MALLOC(in_size + sizeof(size_t)); _hmml_store_ptr(p, base); *(size_t*)base = p->free_list[1]-1; } else { base = (char*)(*out) - sizeof(size_t); base = REALLOC(base, (*out_count + 1) * in_size + sizeof(size_t)); size_t free_list_off = *(size_t*)base; p->free_list[free_list_off] = (intptr_t)base; } *out = (char*)base + sizeof(size_t); memcpy((char*)*out + (*out_count * in_size), in, in_size); ++(*out_count); } #define _hmml_persist_array(p, out, out_count, in) \ _hmml_persist_array_fn((p), (void**)(out), (out_count), &(in), sizeof(in)) // error handling #define _hmml_err(p, fmt, ...) \ _hmml_err_fn((p), fmt "\n", ##__VA_ARGS__) __attribute__((noreturn)) static void _hmml_err_fn(struct _hmml_parser* p, const char* fmt, ...) { static char error_buf[4096]; va_list va; va_start(va, fmt); int n = vsnprintf(error_buf, sizeof(error_buf), fmt, va); va_end(va); int line = 1, col = 1; for(const char* ptr = p->mem; ptr != p->cursor; ++ptr) { if(*ptr == '\n') { ++line; col = 1; } else { ++col; } } p->out.error.message = _hmml_persist_str(p, (struct _hmml_str){ error_buf, n }); p->out.error.line = line; p->out.error.col = col; longjmp(p->err_buf, 1); } // actual parsing stuff static void _hmml_skip_ws(struct _hmml_parser* p) { for(;;) { uint8_t c = *p->cursor; if(c && c <= ' ') { if(c == '\n') { ++p->line; } ++p->cursor; } else { break; } } } static _Bool _hmml_str_eq(struct _hmml_str a, struct _hmml_str b) { return a.len == b.len && memcmp(a.ptr, b.ptr, a.len) == 0; } static _Bool _hmml_unesc(char in, char* out) { if(strchr("[]:@~\\\"", in)) { *out = in; return 1; } else { return 0; } } static char* _hmml_read_attr(struct _hmml_parser* p, char* mem, size_t mem_size, _Bool break_on_punct) { const char* src = p->cursor; char* dst = mem; if(*src == '"') { ++src; while(*src && *src != '"' && (size_t)(src - p->cursor) < mem_size) { char converted; if(*src == '\\' && _hmml_unesc(src[1], &converted)) { *dst++ = converted; src += 2; } else { *dst++ = *src++; } } if(*src != '"') { _hmml_err(p, "Partially quoted attribute"); } *dst = '\0'; p->cursor = src+1; } else { const char* breaks = break_on_punct ? " ]\r\n\t:,'-./#=" : " ]\r\n\t" ; size_t n = strcspn(src, breaks); if(n >= mem_size) { _hmml_err(p, "Attribute [%.10s...] too long", p->cursor); } memcpy(dst, src, n); dst += n; *dst = '\0'; p->cursor += n; } return dst; } static void _hmml_read_kv(struct _hmml_parser* p, struct _hmml_str* key, struct _hmml_str* val) { static char key_memory[64]; static char val_memory[1024]; size_t key_len = strcspn(p->cursor, " \r\n\t="); if(key_len >= sizeof(key_memory)) { _hmml_err(p, "Attribute key [%.10s...] too long", p->cursor); } memcpy(key_memory, p->cursor, key_len); key_memory[key_len] = '\0'; p->cursor += key_len; _hmml_skip_ws(p); if(*p->cursor != '=') { _hmml_err(p, "Expected '=', got [%.3s]", p->cursor); } ++p->cursor; char* end = _hmml_read_attr(p, val_memory, sizeof(val_memory), 0); _hmml_debug("read kv [%s] = [%s]\n", key_memory, val_memory); key->ptr = key_memory; key->len = key_len; val->ptr = val_memory; val->len = end - val_memory; } static HMML_Marker _hmml_parse_marker(struct _hmml_parser* p) { static char marker_mem[4096]; // the extended markers are inside [ ] and can contain parameters _Bool extended = *p->cursor == '['; if(extended) { ++p->cursor; } HMML_Marker marker = {}; char c = *p->cursor; if(c == '~') { marker.type = HMML_PROJECT; } else if(c == '@') { marker.type = HMML_MEMBER; } else if(c == ':') { marker.type = HMML_CATEGORY; } else { _hmml_err(p, "Unknown marker type"); } ++p->cursor; char* end = _hmml_read_attr(p, marker_mem, sizeof(marker_mem), 1); marker.marker = _hmml_persist_str(p, (struct _hmml_str){ marker_mem, end - marker_mem }); if(extended) { _hmml_skip_ws(p); if(*p->cursor == '#') { ++p->cursor; size_t n = strcspn(p->cursor, " "); marker.episode = _hmml_persist_str(p, (struct _hmml_str){ p->cursor, n }); p->cursor += n; } else if(*p->cursor != ']') { const char* end = p->cursor; for(;;) { if(!*end) { break; } char converted; if(*end == '\\' && _hmml_unesc(end[1], &converted)) { end += 2; } else if(*end == ']'){ break; } else { ++end; } } marker.parameter = _hmml_persist_str(p, (struct _hmml_str){ p->cursor, end - p->cursor }); p->cursor = end; } if(*p->cursor != ']') { _hmml_err(p, "Expected ']'"); } ++p->cursor; } return marker; } static HMML_Reference _hmml_parse_ref(struct _hmml_parser* p) { HMML_Reference ref = {}; struct str_attr { struct _hmml_str str; char** dest; } str_attrs[] = { { HSTR("site") , &ref.site }, { HSTR("page") , &ref.page }, { HSTR("url") , &ref.url }, { HSTR("title") , &ref.title }, { HSTR("article") , &ref.article }, { HSTR("author") , &ref.author }, { HSTR("editor") , &ref.editor }, { HSTR("publisher"), &ref.publisher }, { HSTR("isbn") , &ref.isbn }, }; for(;;) { next_attr: _hmml_skip_ws(p); if(*p->cursor == ']') { ++p->cursor; break; } struct _hmml_str key, value; _hmml_read_kv(p, &key, &value); for(size_t i = 0; i < countof(str_attrs); ++i) { struct str_attr* s = str_attrs + i; if(_hmml_str_eq(key, s->str)) { *s->dest = _hmml_persist_str(p, value); goto next_attr; } } _hmml_err(p, "Unknown reference attribute"); } return ref; } static void _hmml_parse_timecode(struct _hmml_parser* p, HMML_Annotation* anno) { unsigned int h = 0, m = 0, s = 0; int offset = 0; int count = sscanf(p->cursor, "[%u:%u%n", &m, &s, &offset); if(count < 2) { _hmml_err(p, "Unable to parse timecode"); } p->cursor += offset; char c = *p->cursor; if(c == ':') { unsigned int tmp; offset = 0; if(sscanf(p->cursor, ":%u]%n", &tmp, &offset) != 1 || offset == 0) { _hmml_err(p, "Unable to parse 3-part timecode"); } h = m; m = s; s = tmp; p->cursor += offset; } else if(c != ']') { _hmml_err(p, "Unable to parse timecode"); } else { ++p->cursor; } if(s >= 60) { _hmml_err(p, "Seconds cannot exceed 59"); } if(m >= 60) { _hmml_err(p, "Minutes cannot exceed 59"); } anno->h = h; anno->m = m; anno->s = s; } static void _hmml_parse_text(struct _hmml_parser* p, HMML_Annotation* anno) { static char text_mem[4096]; char* out = text_mem; memset(text_mem, 0, sizeof(text_mem)); #define CHECKSIZE(__n) \ if(out + (__n) > text_mem + sizeof(text_mem)) {\ _hmml_err(p, "Not enough text memory");\ } for(;;) { size_t n = strcspn(p->cursor, "\\\n[]:@~"); char c = p->cursor[n]; CHECKSIZE(n); memcpy(out, p->cursor, n); p->cursor += n; out += n; if(c == '\0') { _hmml_err(p, "Unexpected EOF"); } else if(c == ']') { ++p->cursor; break; } else if(c == '\\') { char converted; if(_hmml_unesc(p->cursor[1], &converted)) { *out++ = converted; p->cursor += 2; } else { *out++ = '\\'; p->cursor++; } } else if(c == '\n') { ++p->cursor; } else if(c == '[') { if(strncmp(p->cursor + 1, "ref", 3) == 0) { p->cursor += 4; HMML_Reference ref = _hmml_parse_ref(p); _hmml_persist_array(p, &anno->references, &anno->reference_count, ref); } else { HMML_Marker m = _hmml_parse_marker(p); _hmml_persist_array(p, &anno->markers, &anno->marker_count, m); size_t text_len = strlen(m.marker); CHECKSIZE(text_len); memcpy(out, m.marker, text_len); out += text_len; } } // it is a @ ~ or : marker without parameters else { // if next char is a space, it can't be a marker if(strchr(" \t\r\n", p->cursor[1])) { *out++ = c; ++p->cursor; } else { HMML_Marker m = _hmml_parse_marker(p); _hmml_persist_array(p, &anno->markers, &anno->marker_count, m); size_t text_len = strlen(m.marker); CHECKSIZE(text_len); memcpy(out, m.marker, text_len); out += text_len; } } if((size_t)(out - text_mem) >= sizeof(text_mem)) { _hmml_err(p, "Not enough text memory"); } } // trim trailing whitespace while(out > text_mem && out[-1] && (uint8_t)(out[-1]) <= ' ') { out[-1] = '\0'; --out; } anno->text = _hmml_persist_str(p, (struct _hmml_str){ text_mem, out - text_mem }); #undef CHECKSIZE } static void _hmml_parse_quote(struct _hmml_parser* p, HMML_Annotation* anno) { char member[256]; int id; int off = 0; if(sscanf(p->cursor, "[quote %255s %d]%n", member, &id, &off) == 2 && off) { anno->quote.present = 1; anno->quote.id = id; anno->quote.author = _hmml_persist_str(p, (struct _hmml_str){ member, strlen(member) }); } else if(sscanf(p->cursor, "[quote %d]%n", &id, &off) == 1 && off) { anno->quote.present = 1; anno->quote.id = id; } else { _hmml_err(p, "Unable to parse quote"); } p->cursor += off; } static void _hmml_parse_annotations(struct _hmml_parser* p) { for(;;) { _hmml_skip_ws(p); if(*p->cursor == '\0') { _hmml_err(p, "Unexpected EOF"); } if(strncmp(p->cursor, "[/video]", 8) == 0) { break; } HMML_Annotation anno = { .line = p->line }; _hmml_parse_timecode(p, &anno); if(*p->cursor != '[') { _hmml_err(p, "Expected '['"); } if(p->cursor[1] == '@') { HMML_Marker m = _hmml_parse_marker(p); anno.author = m.marker; } ++p->cursor; _hmml_parse_text(p, &anno); if(p->cursor[0] == '[' && p->cursor[1] == ':') { HMML_Marker m = _hmml_parse_marker(p); _hmml_persist_array(p, &anno.markers, &anno.marker_count, m); } if(p->cursor[0] == '[' && p->cursor[1] == 'q') { _hmml_parse_quote(p, &anno); } // convert all markers to lowercase for(size_t i = 0; i < anno.marker_count; ++i) { HMML_Marker* m = anno.markers + i; for(char* c = m->marker; *c; ++c) { if(*c >= 'A' && *c <= 'Z') { *c = (*c - ('A' - 'a')); } } } _hmml_persist_array(p, &p->out.annotations, &p->out.annotation_count, anno); } } static void _hmml_parse_video(struct _hmml_parser* p) { struct str_attr { struct _hmml_str str; char** dest; } str_attrs[] = { { HSTR("member") , &p->out.metadata.member }, { HSTR("stream_platform"), &p->out.metadata.stream_platform }, { HSTR("stream_username"), &p->out.metadata.stream_username }, { HSTR("project") , &p->out.metadata.project }, { HSTR("title") , &p->out.metadata.title }, { HSTR("vod_platform") , &p->out.metadata.vod_platform }, { HSTR("id") , &p->out.metadata.id }, { HSTR("template") , &p->out.metadata.template }, { HSTR("medium") , &p->out.metadata.medium }, { HSTR("output") , &p->out.metadata.output }, }; for(;;) { next_attr: _hmml_skip_ws(p); if(*p->cursor == ']') { ++p->cursor; _hmml_parse_annotations(p); return; } struct _hmml_str key, value; _hmml_read_kv(p, &key, &value); for(size_t i = 0; i < countof(str_attrs); ++i) { struct str_attr* s = str_attrs + i; if(_hmml_str_eq(key, s->str)) { *s->dest = _hmml_persist_str(p, value); goto next_attr; } } if(_hmml_str_eq(key, HSTR("credit"))) { HMML_Credit credit = {}; char* colon = strchr(value.ptr, ':'); if(colon) { *colon = '\0'; credit.name = _hmml_persist_str(p, (struct _hmml_str){ value.ptr, colon - value.ptr }); credit.role = _hmml_persist_str(p, (struct _hmml_str){ colon+1, value.len - ((colon+1) - value.ptr) }); } else { credit.name = _hmml_persist_str(p, value); } _hmml_persist_array(p, &p->out.metadata.credits, &p->out.metadata.credit_count, credit); goto next_attr; } HMML_VideoCustomMetaData custom = { .key = _hmml_persist_str(p, key), .value = _hmml_persist_str(p, value), }; _hmml_persist_array(p, &p->out.metadata.custom, &p->out.metadata.custom_count, custom); } } HMML_Output hmml_parse(const char* string) { struct _hmml_parser p = { .mem = string, .cursor = string, .line = 1, }; if(setjmp(p.err_buf) == 1) { // if it returns 1, an error happened return p.out; } static const struct _hmml_str prefix = HSTR("[video"); if(strncasecmp(p.cursor, prefix.ptr, prefix.len)) { _hmml_err(&p, "Missing initial video tag."); } else { p.cursor += prefix.len; _hmml_parse_video(&p); } p.out.free_list = p.free_list; p.out.well_formed = 1; return p.out; } void hmml_free(HMML_Output* out) { if(!out->free_list) { return; } for(uintptr_t i = 2; i < ((uintptr_t*)out->free_list)[1]; ++i) { free(((void**)out->free_list)[i]); } free(out->free_list); } const struct HMML_Version hmml_version = { 2, 0, 0 }; #undef HSTX #undef HSTR #endif