hmmlib2: re-instsate "offset" for marker/ref + fixes

fixes:
    - indentation
    - parsing bug for marker with both episode + parameter
This commit is contained in:
Alex Baines 2021-04-04 13:07:50 +01:00
parent 968437d263
commit 1993e9f1dd
1 changed files with 69 additions and 51 deletions

View File

@ -16,14 +16,14 @@ typedef struct {
} HMML_VideoCustomMetaData; } HMML_VideoCustomMetaData;
typedef struct { typedef struct {
char* member; char* member;
char* stream_platform; char* stream_platform;
char* stream_username; char* stream_username;
char* project; char* project;
char* title; char* title;
char* vod_platform; char* vod_platform;
char* id; char* id;
char* output; char* output;
char* template; char* template;
char* medium; char* medium;
@ -36,67 +36,69 @@ typedef struct {
} HMML_VideoMetaData; } HMML_VideoMetaData;
typedef struct { typedef struct {
char* site; char* site;
char* page; char* page;
char* url; char* url;
char* title; char* title;
char* article; char* article;
char* author; char* author;
char* editor; char* editor;
char* publisher; char* publisher;
char* isbn; char* isbn;
int offset;
} HMML_Reference; } HMML_Reference;
typedef enum { typedef enum {
HMML_CATEGORY, HMML_CATEGORY,
HMML_MEMBER, HMML_MEMBER,
HMML_PROJECT, HMML_PROJECT,
HMML_MARKER_COUNT, HMML_MARKER_COUNT,
} HMML_MarkerType; } HMML_MarkerType;
typedef struct { typedef struct {
HMML_MarkerType type; HMML_MarkerType type;
char* marker; char* marker;
char* parameter; char* parameter;
char* episode; char* episode;
int offset;
} HMML_Marker; } HMML_Marker;
typedef struct { typedef struct {
_Bool present; _Bool present;
int id; int id;
char* author; char* author;
} HMML_Quote; } HMML_Quote;
typedef struct { typedef struct {
int line; int line;
int h, m, s; int h, m, s;
char* text; char* text;
char* author; char* author;
HMML_Reference* references; HMML_Reference* references;
size_t reference_count; size_t reference_count;
HMML_Marker* markers; HMML_Marker* markers;
size_t marker_count; size_t marker_count;
HMML_Quote quote; HMML_Quote quote;
} HMML_Annotation; } HMML_Annotation;
typedef struct { typedef struct {
int line; int line;
int col; int col;
char* message; char* message;
} HMML_Error; } HMML_Error;
typedef struct { typedef struct {
_Bool well_formed; _Bool well_formed;
HMML_VideoMetaData metadata; HMML_VideoMetaData metadata;
HMML_Annotation* annotations; HMML_Annotation* annotations;
size_t annotation_count; size_t annotation_count;
HMML_Error error; HMML_Error error;
void* free_list; // implementation detail void* free_list; // implementation detail
} HMML_Output; } HMML_Output;
@ -108,7 +110,7 @@ void hmml_free (HMML_Output* output);
// Version // Version
extern const struct HMML_Version { extern const struct HMML_Version {
int Major, Minor, Patch; int Major, Minor, Patch;
} hmml_version; } hmml_version;
#endif #endif
@ -356,7 +358,9 @@ static HMML_Marker _hmml_parse_marker(struct _hmml_parser* p)
++p->cursor; ++p->cursor;
} }
HMML_Marker marker = {}; HMML_Marker marker = {
.offset = -1,
};
char c = *p->cursor; char c = *p->cursor;
if(c == '~') { if(c == '~') {
@ -381,8 +385,10 @@ static HMML_Marker _hmml_parse_marker(struct _hmml_parser* p)
++p->cursor; ++p->cursor;
size_t n = strcspn(p->cursor, " "); size_t n = strcspn(p->cursor, " ");
marker.episode = _hmml_persist_str(p, (struct _hmml_str){ p->cursor, n }); marker.episode = _hmml_persist_str(p, (struct _hmml_str){ p->cursor, n });
p->cursor += n; p->cursor += n + 1;
} else if(*p->cursor != ']') { }
if(*p->cursor != ']') {
const char* end = p->cursor; const char* end = p->cursor;
for(;;) { for(;;) {
@ -415,7 +421,9 @@ static HMML_Marker _hmml_parse_marker(struct _hmml_parser* p)
static HMML_Reference _hmml_parse_ref(struct _hmml_parser* p) static HMML_Reference _hmml_parse_ref(struct _hmml_parser* p)
{ {
HMML_Reference ref = {}; HMML_Reference ref = {
.offset = -1,
};
struct str_attr { struct str_attr {
struct _hmml_str str; struct _hmml_str str;
@ -503,7 +511,7 @@ static void _hmml_parse_timecode(struct _hmml_parser* p, HMML_Annotation* anno)
anno->s = s; anno->s = s;
} }
static void _hmml_parse_text(struct _hmml_parser* p, HMML_Annotation* anno) static size_t _hmml_parse_text(struct _hmml_parser* p, HMML_Annotation* anno)
{ {
static char text_mem[4096]; static char text_mem[4096];
char* out = text_mem; char* out = text_mem;
@ -553,9 +561,11 @@ static void _hmml_parse_text(struct _hmml_parser* p, HMML_Annotation* anno)
if(strncmp(p->cursor + 1, "ref", 3) == 0) { if(strncmp(p->cursor + 1, "ref", 3) == 0) {
p->cursor += 4; p->cursor += 4;
HMML_Reference ref = _hmml_parse_ref(p); HMML_Reference ref = _hmml_parse_ref(p);
ref.offset = out - text_mem;
_hmml_persist_array(p, &anno->references, &anno->reference_count, ref); _hmml_persist_array(p, &anno->references, &anno->reference_count, ref);
} else { } else {
HMML_Marker m = _hmml_parse_marker(p); HMML_Marker m = _hmml_parse_marker(p);
m.offset = out - text_mem;
_hmml_persist_array(p, &anno->markers, &anno->marker_count, m); _hmml_persist_array(p, &anno->markers, &anno->marker_count, m);
size_t text_len = strlen(m.marker); size_t text_len = strlen(m.marker);
@ -573,7 +583,9 @@ static void _hmml_parse_text(struct _hmml_parser* p, HMML_Annotation* anno)
++p->cursor; ++p->cursor;
} else { } else {
HMML_Marker m = _hmml_parse_marker(p); HMML_Marker m = _hmml_parse_marker(p);
m.offset = out - text_mem;
_hmml_persist_array(p, &anno->markers, &anno->marker_count, m); _hmml_persist_array(p, &anno->markers, &anno->marker_count, m);
size_t text_len = strlen(m.marker); size_t text_len = strlen(m.marker);
CHECKSIZE(text_len); CHECKSIZE(text_len);
memcpy(out, m.marker, text_len); memcpy(out, m.marker, text_len);
@ -592,9 +604,12 @@ static void _hmml_parse_text(struct _hmml_parser* p, HMML_Annotation* anno)
--out; --out;
} }
anno->text = _hmml_persist_str(p, (struct _hmml_str){ text_mem, out - text_mem }); size_t text_size = out - text_mem;
anno->text = _hmml_persist_str(p, (struct _hmml_str){ text_mem, text_size });
#undef CHECKSIZE #undef CHECKSIZE
return text_size;
} }
static void _hmml_parse_quote(struct _hmml_parser* p, HMML_Annotation* anno) static void _hmml_parse_quote(struct _hmml_parser* p, HMML_Annotation* anno)
@ -647,7 +662,7 @@ static void _hmml_parse_annotations(struct _hmml_parser* p)
++p->cursor; ++p->cursor;
_hmml_parse_text(p, &anno); int text_len = _hmml_parse_text(p, &anno);
if(p->cursor[0] == '[' && p->cursor[1] == ':') { if(p->cursor[0] == '[' && p->cursor[1] == ':') {
HMML_Marker m = _hmml_parse_marker(p); HMML_Marker m = _hmml_parse_marker(p);
@ -658,7 +673,7 @@ static void _hmml_parse_annotations(struct _hmml_parser* p)
_hmml_parse_quote(p, &anno); _hmml_parse_quote(p, &anno);
} }
// convert all markers to lowercase // convert all markers to lowercase, fix any out of range offsets
for(size_t i = 0; i < anno.marker_count; ++i) { for(size_t i = 0; i < anno.marker_count; ++i) {
HMML_Marker* m = anno.markers + i; HMML_Marker* m = anno.markers + i;
for(char* c = m->marker; *c; ++c) { for(char* c = m->marker; *c; ++c) {
@ -666,6 +681,9 @@ static void _hmml_parse_annotations(struct _hmml_parser* p)
*c = (*c - ('A' - 'a')); *c = (*c - ('A' - 'a'));
} }
} }
if(m->offset > text_len) {
m->offset = text_len;
}
} }
_hmml_persist_array(p, &p->out.annotations, &p->out.annotation_count, anno); _hmml_persist_array(p, &p->out.annotations, &p->out.annotation_count, anno);