479 lines
14 KiB
Plaintext
479 lines
14 KiB
Plaintext
%{
|
|
#include <stdio.h>
|
|
#include <stdbool.h>
|
|
#include <ctype.h>
|
|
#include "stb_sb.h"
|
|
#include "hmmlib.h"
|
|
|
|
typedef struct {
|
|
int line;
|
|
HMML_Annotation* annos;
|
|
|
|
HMML_VideoMetaData meta;
|
|
HMML_Annotation an;
|
|
HMML_Reference ref;
|
|
|
|
HMML_Error error;
|
|
|
|
void* attr;
|
|
int mnext;
|
|
bool first;
|
|
} HMML_ParseState;
|
|
|
|
#define HMML_ERR(fmt, ...) \
|
|
do { \
|
|
asprintf(&yyextra->error.message, fmt, ##__VA_ARGS__);\
|
|
yyextra->error.line = yyextra->line;\
|
|
return 1;\
|
|
} while(0)
|
|
|
|
#define CHECKESCAPE(x) do { if(!strchr("[]:@~\\\"", x)) HMML_ERR("Unknown backslash escape code '%c'", x); } while(0)
|
|
|
|
#define V_(x) &yyextra->meta.x
|
|
#define R_(x) &yyextra->ref.x
|
|
#define M_(x, state) do { HMML_Marker m = { HMML_ ## x }; sb_push(yyextra->an.markers, m); yyextra->mnext = state; } while(0)
|
|
#define M_ADD(t, n) \
|
|
do { \
|
|
char* c = UNQUOTE(strndup(t, n));\
|
|
sb_last(yyextra->an.markers).marker = c;\
|
|
sb_last(yyextra->an.markers).offset = sb_count(yyextra->an.text);\
|
|
if(yyextra->mnext == TEXT){\
|
|
memcpy(sb_add(yyextra->an.text, n), c, n);\
|
|
}\
|
|
} while(0)
|
|
|
|
#define MX_ADD(c) \
|
|
do { \
|
|
sb_push(sb_last(yyextra->an.markers).parameter, c);\
|
|
if(c) sb_push(yyextra->an.text, c);\
|
|
} while(0)
|
|
|
|
#define NEWANNO() \
|
|
do { \
|
|
if(!yyextra->first) sb_push(yyextra->annos, yyextra->an); \
|
|
memset(&yyextra->an, 0, sizeof(yyextra->an));\
|
|
yyextra->an.line = yyextra->line;\
|
|
yyextra->first = false;\
|
|
} while(0)
|
|
|
|
#define UNQUOTE(_attr) ({ \
|
|
typeof(_attr) attr = (_attr); \
|
|
size_t len = strlen(attr); \
|
|
for(char* c = attr; *c; ++c){\
|
|
if(*c == '\\'){ \
|
|
CHECKESCAPE(c[1]); \
|
|
memmove(c, c+1, len-(c-attr)); \
|
|
--len; \
|
|
} \
|
|
} \
|
|
attr; \
|
|
})
|
|
|
|
%}
|
|
|
|
%option reentrant
|
|
%option extra-type="HMML_ParseState*"
|
|
%option noyywrap
|
|
|
|
S [\t \r]*
|
|
ATTR_SIMPLE [^\" \]\t\r\n][^ \]\t\r\n]*
|
|
ATTR_ALNUM [0-9a-zA-Z][0-9a-zA-Z_]*
|
|
ATTR_QUOTED \"([^\n\"\\]|\\.)*\"
|
|
TAG_VIDEO_OPEN \[video
|
|
TIMECODE \[[0-9]{1,2}(:[0-5][0-9]){1,2}\]
|
|
BAD_TIMECODE \[[0-9]{1,2}(:[6-9][0-9]){1,2}\]
|
|
TEXT_BREAK [^\\:@~\[\]\r\n\t ]
|
|
LB \[
|
|
RB \]
|
|
|
|
%s VIDEO
|
|
%s V_ATTR
|
|
%s V2_ATTR
|
|
%s ANNOTATION
|
|
%s TEXT_START
|
|
%s TEXT
|
|
%s MARKER
|
|
%s MARKER_XTRA
|
|
%s REF
|
|
%s R_ATTR
|
|
%s AFTERTEXT
|
|
%s AUTHOR
|
|
%s CATEGORIES
|
|
%s QUOTES
|
|
|
|
%%
|
|
|
|
<<EOF>> { HMML_ERR("Unexpected EOF, video close tag not found."); }
|
|
\r\n|\n { yyextra->line++; }
|
|
|
|
<INITIAL>{TAG_VIDEO_OPEN} { BEGIN(VIDEO); }
|
|
<INITIAL>. { HMML_ERR("Missing video tag."); }
|
|
|
|
<VIDEO>{S}
|
|
<VIDEO>member{S}= { yyextra->attr = V_(member); BEGIN(V_ATTR); }
|
|
<VIDEO>stream_platform{S}= { yyextra->attr = V_(stream_platform); BEGIN(V_ATTR); }
|
|
<VIDEO>stream_username{S}= { yyextra->attr = V_(stream_username); BEGIN(V_ATTR); }
|
|
<VIDEO>project{S}= { yyextra->attr = V_(project); BEGIN(V_ATTR); }
|
|
<VIDEO>title{S}= { yyextra->attr = V_(title); BEGIN(V_ATTR); }
|
|
<VIDEO>vod_platform{S}= { yyextra->attr = V_(vod_platform); BEGIN(V_ATTR); }
|
|
<VIDEO>id{S}= { yyextra->attr = V_(id); BEGIN(V_ATTR); }
|
|
<VIDEO>co\-host{S}= { yyextra->attr = V_(co_hosts); BEGIN(V2_ATTR); }
|
|
<VIDEO>guest{S}= { yyextra->attr = V_(guests); BEGIN(V2_ATTR); }
|
|
<VIDEO>annotator{S}= { yyextra->attr = V_(annotators); BEGIN(V2_ATTR); }
|
|
<VIDEO>\] { BEGIN(ANNOTATION); };
|
|
<VIDEO>. { HMML_ERR("Invalid char '%s' in video tag.", yytext); }
|
|
|
|
<V_ATTR>{S} { BEGIN(VIDEO); }
|
|
<V_ATTR>{ATTR_SIMPLE} { *(char**)yyextra->attr = strndup(yytext , yyleng ); BEGIN(VIDEO); }
|
|
<V_ATTR>{ATTR_QUOTED} { *(char**)yyextra->attr = UNQUOTE(strndup(yytext+1, yyleng-2)); BEGIN(VIDEO); }
|
|
<V_ATTR>\] { yyless(0); BEGIN(VIDEO); }
|
|
|
|
<V2_ATTR>{S} { BEGIN(VIDEO); }
|
|
<V2_ATTR>{ATTR_SIMPLE} { sb_push(*(char***)yyextra->attr, strndup(yytext , yyleng )); BEGIN(VIDEO); }
|
|
<V2_ATTR>{ATTR_QUOTED} { sb_push(*(char***)yyextra->attr, UNQUOTE(strndup(yytext+1, yyleng-2))); BEGIN(VIDEO); }
|
|
<V2_ATTR>\] { yyless(0); BEGIN(VIDEO); }
|
|
|
|
<ANNOTATION>{TIMECODE}{LB}@ { NEWANNO(); yyextra->an.time = strndup(yytext+1, yyleng-4); BEGIN(AUTHOR); }
|
|
<ANNOTATION>{TIMECODE} { NEWANNO(); yyextra->an.time = strndup(yytext+1, yyleng-2); BEGIN(TEXT_START); }
|
|
<ANNOTATION>{BAD_TIMECODE} { HMML_ERR("Timecode %s out of range.", yytext); }
|
|
<ANNOTATION>{S}
|
|
<ANNOTATION>. { HMML_ERR("Cannot parse annotation. Expected timecode."); }
|
|
|
|
<TEXT_START>{LB}: { M_(CATEGORY, TEXT); BEGIN(MARKER); }
|
|
<TEXT_START>{LB}@ { M_(MEMBER , TEXT); BEGIN(MARKER); }
|
|
<TEXT_START>{LB}~ { M_(PROJECT , TEXT); BEGIN(MARKER); }
|
|
<TEXT_START>{LB} { yyless(0); BEGIN(TEXT); }
|
|
<TEXT_START>. { HMML_ERR("Unknown character '%s' after timecode.", yytext); }
|
|
|
|
<TEXT>{TEXT_BREAK}+ { memcpy(sb_add(yyextra->an.text, yyleng), yytext, yyleng); }
|
|
<TEXT>\\. { CHECKESCAPE(yytext[1]); memcpy(sb_add(yyextra->an.text, yyleng-1), yytext+1, yyleng-1); }
|
|
<TEXT>[ \r\t]+: { sb_push(yyextra->an.text, ' '); M_(CATEGORY, TEXT); BEGIN(MARKER); }
|
|
<TEXT>[ \r\t]+@ { sb_push(yyextra->an.text, ' '); M_(MEMBER , TEXT); BEGIN(MARKER); }
|
|
<TEXT>[ \r\t]+~ { sb_push(yyextra->an.text, ' '); M_(PROJECT , TEXT); BEGIN(MARKER); }
|
|
<TEXT>{LB}: { M_(CATEGORY, MARKER_XTRA); BEGIN(MARKER); }
|
|
<TEXT>{LB}@ { M_(MEMBER , MARKER_XTRA); BEGIN(MARKER); }
|
|
<TEXT>{LB}~ { M_(PROJECT , MARKER_XTRA); BEGIN(MARKER); }
|
|
<TEXT>\] { BEGIN(AFTERTEXT); }
|
|
<TEXT>{LB}ref { yyextra->ref.offset = sb_count(yyextra->an.text); BEGIN(REF); }
|
|
<TEXT>{LB}
|
|
<TEXT>{S} { sb_push(yyextra->an.text, ' '); }
|
|
<TEXT>. { sb_push(yyextra->an.text, *yytext); }
|
|
|
|
<MARKER>{ATTR_ALNUM} { M_ADD(yytext , yyleng ); BEGIN(yyextra->mnext); };
|
|
<MARKER>{ATTR_QUOTED} { M_ADD(yytext+1, yyleng-2); BEGIN(yyextra->mnext); };
|
|
<MARKER>. { HMML_ERR("Cannot parse Marker. Expected quoted or alphanumeric attribute."); }
|
|
|
|
<MARKER_XTRA>\\] { MX_ADD(']'); }
|
|
<MARKER_XTRA>\\# { MX_ADD('#'); }
|
|
<MARKER_XTRA>\] { MX_ADD('\0'); BEGIN(TEXT); }
|
|
<MARKER_XTRA>[ ]\#[0-9]+ {
|
|
HMML_Marker* m = &sb_last(yyextra->an.markers);
|
|
if(m->type == HMML_PROJECT){
|
|
m->episode = strdup(yytext+2);
|
|
} else {
|
|
memcpy(sb_add(m->parameter, yyleng-1), yytext+1, yyleng-1);
|
|
memcpy(sb_add(yyextra->an.text, yyleng-1), yytext+1, yyleng-1);
|
|
}
|
|
}
|
|
<MARKER_XTRA>[ ] { if(sb_last(yyextra->an.markers).parameter){ MX_ADD(' '); } }
|
|
<MARKER_XTRA>. { MX_ADD(*yytext); }
|
|
|
|
<REF>{S}
|
|
<REF>site{S}= { yyextra->attr = R_(site); BEGIN(R_ATTR); }
|
|
<REF>page{S}= { yyextra->attr = R_(page); BEGIN(R_ATTR); }
|
|
<REF>url{S}= { yyextra->attr = R_(url); BEGIN(R_ATTR); }
|
|
<REF>title{S}= { yyextra->attr = R_(title); BEGIN(R_ATTR); }
|
|
<REF>article{S}= { yyextra->attr = R_(article); BEGIN(R_ATTR); }
|
|
<REF>author{S}= { yyextra->attr = R_(author); BEGIN(R_ATTR); }
|
|
<REF>editor{S}= { yyextra->attr = R_(editor); BEGIN(R_ATTR); }
|
|
<REF>publisher{S}= { yyextra->attr = R_(publisher); BEGIN(R_ATTR); }
|
|
<REF>isbn{S}= { yyextra->attr = R_(isbn); BEGIN(R_ATTR); }
|
|
<REF>\] { sb_push(yyextra->an.references, yyextra->ref); memset(&yyextra->ref, 0, sizeof(yyextra->ref)); BEGIN(TEXT); }
|
|
<REF>. { HMML_ERR("Unexpected item in ref: %s", yytext); }
|
|
|
|
<R_ATTR>{S}
|
|
<R_ATTR>{ATTR_SIMPLE} { *(char**)yyextra->attr = strndup(yytext, yyleng); BEGIN(REF); }
|
|
<R_ATTR>{ATTR_QUOTED} { *(char**)yyextra->attr = UNQUOTE(strndup(yytext+1, yyleng-2)); BEGIN(REF); }
|
|
|
|
<AFTERTEXT,ANNOTATION>\[\/video\] { NEWANNO(); return 0; }
|
|
|
|
<AFTERTEXT>{S}
|
|
<AFTERTEXT>{LB}quote { BEGIN(QUOTES); }
|
|
<AFTERTEXT>{LB}: { BEGIN(CATEGORIES); yyless(1); }
|
|
<AFTERTEXT>{LB}[0-9] { BEGIN(ANNOTATION); yyless(0); }
|
|
<AFTERTEXT>.. { HMML_ERR("Unexpected thing after text node: %s", yytext); }
|
|
<AFTERTEXT>. { HMML_ERR("Unexpected thing after text node: %s", yytext); }
|
|
|
|
<AUTHOR>[^\]\n]+\] { yyextra->an.author = strndup(yytext, yyleng-1); BEGIN(TEXT_START); }
|
|
<AUTHOR>{S}
|
|
|
|
<CATEGORIES>{S}
|
|
<CATEGORIES>:{ATTR_SIMPLE} { HMML_Marker m = { HMML_CATEGORY, strndup(yytext+1, yyleng-1), NULL, NULL, -1 }; sb_push(yyextra->an.markers, m); }
|
|
<CATEGORIES>:{ATTR_QUOTED} { HMML_Marker m = { HMML_CATEGORY, UNQUOTE(strndup(yytext+2, yyleng-3)), NULL, NULL, -1 }; sb_push(yyextra->an.markers, m); }
|
|
<CATEGORIES>\]{LB} { BEGIN(QUOTES); }
|
|
<CATEGORIES>\] { BEGIN(ANNOTATION); }
|
|
<CATEGORIES>. { HMML_ERR("Unexpected character in category tag: '%s'\n", yytext); }
|
|
|
|
<QUOTES>{S}
|
|
<QUOTES>[0-9]+{S}\] { yyextra->an.is_quote = true; yyextra->an.quote.id = atoi(yytext); BEGIN(ANNOTATION); }
|
|
<QUOTES>{ATTR_ALNUM} { memcpy(sb_add(yyextra->an.quote.author, yyleng), yytext, yyleng); }
|
|
<QUOTES>. { HMML_ERR("Unexpected character in quotes tag: %s", yytext); }
|
|
|
|
%%
|
|
|
|
#define HMML_REF_ITEMS 9
|
|
|
|
static void _hmml_free_ref(HMML_Reference*);
|
|
static void _hmml_free_anno(HMML_Annotation*);
|
|
|
|
HMML_Output hmml_parse_file(FILE* f){
|
|
HMML_Output output = {};
|
|
HMML_ParseState state = {};
|
|
|
|
state.first = true;
|
|
state.line = 1;
|
|
|
|
yyscan_t scan;
|
|
yylex_init_extra(&state, &scan);
|
|
yyset_in(f, scan);
|
|
|
|
output.well_formed = yylex(scan) == 0;
|
|
|
|
memcpy(&output.metadata, &state.meta, sizeof(HMML_VideoMetaData));
|
|
|
|
output.metadata.co_host_count = sb_count(output.metadata.co_hosts);
|
|
output.metadata.guest_count = sb_count(output.metadata.guests);
|
|
output.metadata.annotator_count = sb_count(output.metadata.annotators);
|
|
|
|
output.annotations = state.annos;
|
|
output.annotation_count = sb_count(state.annos);
|
|
|
|
for(size_t i = 0; i < output.annotation_count; ++i){
|
|
HMML_Annotation* a = output.annotations + i;
|
|
sb_push(a->text, 0);
|
|
a->marker_count = sb_count(a->markers);
|
|
a->reference_count = sb_count(a->references);
|
|
|
|
for(size_t j = 0; j < sb_count(a->markers); ++j){
|
|
for(char* c = a->markers[j].marker; *c; ++c) *c = tolower(*c);
|
|
}
|
|
|
|
if(a->is_quote && a->quote.author){
|
|
sb_push(a->quote.author, 0);
|
|
}
|
|
}
|
|
|
|
if(!output.well_formed){
|
|
hmml_free(&output);
|
|
memcpy(&output.error, &state.error, sizeof(HMML_Error));
|
|
}
|
|
|
|
_hmml_free_anno(&state.an);
|
|
_hmml_free_ref(&state.ref);
|
|
|
|
yylex_destroy(scan);
|
|
|
|
return output;
|
|
}
|
|
|
|
static void _hmml_free_ref(HMML_Reference* r){
|
|
for(size_t k = 0; k < HMML_REF_ITEMS; ++k){
|
|
free(((char**)r)[k]);
|
|
}
|
|
}
|
|
|
|
static void _hmml_free_anno(HMML_Annotation* a){
|
|
free(a->time);
|
|
free(a->author);
|
|
sb_free(a->text);
|
|
|
|
sb_each(r, a->references){
|
|
_hmml_free_ref(r);
|
|
}
|
|
sb_free(a->references);
|
|
|
|
sb_each(m, a->markers){
|
|
free(m->marker);
|
|
free(m->episode);
|
|
sb_free(m->parameter);
|
|
}
|
|
sb_free(a->markers);
|
|
|
|
sb_free(a->quote.author);
|
|
}
|
|
|
|
void hmml_free(HMML_Output* hmml){
|
|
if(!hmml) return;
|
|
|
|
for(size_t i = 0; i < hmml->annotation_count; ++i){
|
|
_hmml_free_anno(hmml->annotations + i);
|
|
}
|
|
|
|
sb_free(hmml->annotations);
|
|
|
|
free(hmml->metadata.member);
|
|
free(hmml->metadata.stream_platform);
|
|
free(hmml->metadata.stream_username);
|
|
free(hmml->metadata.project);
|
|
free(hmml->metadata.title);
|
|
free(hmml->metadata.vod_platform);
|
|
free(hmml->metadata.id);
|
|
|
|
sb_each(i, hmml->metadata.co_hosts) free(*i);
|
|
sb_each(i, hmml->metadata.guests) free(*i);
|
|
sb_each(i, hmml->metadata.annotators) free(*i);
|
|
|
|
sb_free(hmml->metadata.co_hosts);
|
|
sb_free(hmml->metadata.guests);
|
|
sb_free(hmml->metadata.annotators);
|
|
|
|
free(hmml->error.message);
|
|
memset(hmml, 0, sizeof(*hmml));
|
|
}
|
|
|
|
typedef struct {
|
|
char* text;
|
|
int* lines;
|
|
} Index;
|
|
|
|
Index* index_find(Index* base, const char* text){
|
|
for(size_t i = 0; i < sb_count(base); ++i){
|
|
if(strcmp(base[i].text, text) == 0){
|
|
return base + i;
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
void hmml_dump(HMML_Output* hmml){
|
|
|
|
if(!hmml){
|
|
puts("(null");
|
|
return;
|
|
}
|
|
|
|
if(!hmml->well_formed){
|
|
printf("Parse error on line %d: %s\n", hmml->error.line, hmml->error.message);
|
|
return;
|
|
}
|
|
|
|
|
|
puts("Annotations:");
|
|
for(size_t i = 0; i < hmml->annotation_count; ++i){
|
|
HMML_Annotation* a = hmml->annotations + i;
|
|
printf("\t%3d [%7s] [%s]\n", a->line, a->time, a->text);
|
|
}
|
|
|
|
Index* authors = NULL;
|
|
Index* markers[HMML_MARKER_COUNT] = {};
|
|
int max_text_len = 0;
|
|
|
|
for(size_t i = 0; i < hmml->annotation_count; ++i){
|
|
HMML_Annotation* a = hmml->annotations + i;
|
|
|
|
if(a->author){
|
|
size_t len = strlen(a->author);
|
|
if(len > max_text_len){
|
|
max_text_len = len;
|
|
}
|
|
|
|
Index* idx;
|
|
if(!(idx = index_find(authors, a->author))){
|
|
Index x = { a->author };
|
|
sb_push(authors, x);
|
|
idx = &sb_last(authors);
|
|
}
|
|
|
|
sb_push(idx->lines, a->line);
|
|
}
|
|
}
|
|
|
|
for(size_t i = 0; i < hmml->annotation_count; ++i){
|
|
HMML_Annotation* a = hmml->annotations + i;
|
|
|
|
for(size_t j = 0; j < a->marker_count; ++j){
|
|
int type = a->markers[j].type;
|
|
char* text = a->markers[j].marker;
|
|
|
|
size_t len = strlen(text);
|
|
if(len > max_text_len){
|
|
max_text_len = len;
|
|
}
|
|
|
|
Index* idx;
|
|
if(!(idx = index_find(markers[type], text))){
|
|
Index x = { text };
|
|
sb_push(markers[type], x);
|
|
idx = &sb_last(markers[type]);
|
|
}
|
|
|
|
sb_push(idx->lines, a->line);
|
|
}
|
|
}
|
|
|
|
puts("Authors:");
|
|
for(size_t i = 0; i < sb_count(authors); ++i){
|
|
printf("\t %*s: ", max_text_len, authors[i].text);
|
|
for(size_t j = 0; j < sb_count(authors[i].lines); ++j){
|
|
printf("%3d ", authors[i].lines[j]);
|
|
}
|
|
puts("");
|
|
}
|
|
|
|
|
|
static const char* m_tags[HMML_MARKER_COUNT] = { "Categories", "Members", "Projects" };
|
|
|
|
for(size_t i = 0; i < HMML_MARKER_COUNT; ++i){
|
|
printf("%s:\n", m_tags[i]);
|
|
for(size_t j = 0; j < sb_count(markers[i]); ++j){
|
|
printf("\t %*s: ", max_text_len, markers[i][j].text);
|
|
for(size_t k = 0; k < sb_count(markers[i][j].lines); ++k){
|
|
printf("%3d ", markers[i][j].lines[k]);
|
|
}
|
|
puts("");
|
|
}
|
|
}
|
|
|
|
static const char* r_tags[] = { "Site", "Page", "URL", "Title", "Article", "Author", "Editor", "Publisher", "ISBN" };
|
|
puts("References:");
|
|
for(size_t i = 0; i < hmml->annotation_count; ++i){
|
|
HMML_Annotation* a = hmml->annotations + i;
|
|
for(size_t j = 0; j < a->reference_count; ++j){
|
|
printf("\t%3d ", a->line);
|
|
HMML_Reference* r = a->references + j;
|
|
for(size_t k = 0; k < HMML_REF_ITEMS; ++k){
|
|
char* item = ((char**)r)[k];
|
|
if(item){
|
|
printf("[%s = %s] ", r_tags[k], item);
|
|
}
|
|
}
|
|
puts("");
|
|
}
|
|
}
|
|
|
|
puts("Quotes:");
|
|
for(size_t i = 0; i < hmml->annotation_count; ++i){
|
|
HMML_Annotation* a = hmml->annotations + i;
|
|
if(a->is_quote){
|
|
if(a->quote.author){
|
|
printf("\t%3d [Quote #%d, by %s]", a->line, a->quote.id, a->quote.author);
|
|
} else {
|
|
printf("\t%3d [Quote #%d]", a->line, a->quote.id);
|
|
}
|
|
puts("");
|
|
}
|
|
}
|
|
|
|
for(size_t i = 0; i < sb_count(authors); ++i){
|
|
sb_free(authors[i].lines);
|
|
}
|
|
sb_free(authors);
|
|
|
|
for(size_t i = 0; i < HMML_MARKER_COUNT; ++i){
|
|
for(size_t j = 0; j < sb_count(markers[i]); ++j){
|
|
sb_free(markers[i][j].lines);
|
|
}
|
|
sb_free(markers[i]);
|
|
}
|
|
}
|