391 lines
12 KiB
Plaintext
391 lines
12 KiB
Plaintext
|
%{
|
||
|
#include <stdio.h>
|
||
|
#include <stdbool.h>
|
||
|
#include <ctype.h>
|
||
|
#include "stb_sb.h"
|
||
|
#include "hmmlib.h"
|
||
|
|
||
|
typedef struct {
|
||
|
int line;
|
||
|
HMML_Annotation* annos;
|
||
|
|
||
|
HMML_VideoMetaData meta;
|
||
|
HMML_Annotation an;
|
||
|
HMML_Reference ref;
|
||
|
|
||
|
HMML_Error* error;
|
||
|
|
||
|
char** attr;
|
||
|
int mnext;
|
||
|
bool first;
|
||
|
} HMML_ParseState;
|
||
|
|
||
|
#define HMML_ERR(fmt, ...) \
|
||
|
do { \
|
||
|
asprintf(&yyextra->error->message, fmt, ##__VA_ARGS__);\
|
||
|
yyextra->error->line = yyextra->line;\
|
||
|
return 1;\
|
||
|
} while(0)
|
||
|
|
||
|
#define V_(x) &yyextra->meta.x
|
||
|
#define R_(x) &yyextra->ref.x
|
||
|
#define M_(x, state) do { HMML_Marker m = { HMML_ ## x }; sb_push(yyextra->an.markers, m); yyextra->mnext = state; } while(0)
|
||
|
#define M_ADD(t, n) do { char* c = strndup(t, n); sb_last(yyextra->an.markers).text = c; memcpy(sb_add(yyextra->an.text, n), c, n); } while(0)
|
||
|
|
||
|
#define NEWANNO() \
|
||
|
do { \
|
||
|
if(!yyextra->first) sb_push(yyextra->annos, yyextra->an); \
|
||
|
memset(&yyextra->an, 0, sizeof(yyextra->an));\
|
||
|
yyextra->an.line = yyextra->line;\
|
||
|
yyextra->first = false;\
|
||
|
} while(0)
|
||
|
|
||
|
#define CHECKESCAPE(x) do { if(!strchr("[]:@~\\", x)) HMML_ERR("Unknown backslash escape code '%c'", x); } while(0)
|
||
|
%}
|
||
|
|
||
|
%option reentrant
|
||
|
%option extra-type="HMML_ParseState*"
|
||
|
%option noyywrap
|
||
|
|
||
|
S [\t ]*
|
||
|
ATTR_SIMPLE [^\" \]\t\r\n][^ \]\t\r\n]*
|
||
|
ATTR_ALNUM [0-9a-zA-Z][0-9a-zA-Z_]*
|
||
|
ATTR_QUOTED \"([^\n\"\\]|\\.)*\"
|
||
|
TAG_VIDEO_OPEN \[video
|
||
|
TIMECODE \[[0-9]{1,2}(:[0-5][0-9]){1,2}\]
|
||
|
BAD_TIMECODE \[[0-9]{1,2}(:[6-9][0-9]){1,2}\]
|
||
|
LB \[
|
||
|
RB \]
|
||
|
|
||
|
%s VIDEO
|
||
|
%s V_ATTR
|
||
|
%s ANNOTATION
|
||
|
%s TEXT_START
|
||
|
%s TEXT
|
||
|
%s MARKER
|
||
|
%s MARKER_XTRA
|
||
|
%s REF
|
||
|
%s R_ATTR
|
||
|
%s AFTERTEXT
|
||
|
%s AUTHOR
|
||
|
%s CATEGORIES
|
||
|
%s QUOTES
|
||
|
|
||
|
%%
|
||
|
|
||
|
<<EOF>> { HMML_ERR("Unexpected EOF, video close tag not found."); }
|
||
|
\n { yyextra->line++; }
|
||
|
|
||
|
<INITIAL>{TAG_VIDEO_OPEN} { BEGIN(VIDEO); }
|
||
|
<INITIAL>. { HMML_ERR("Missing video tag."); }
|
||
|
|
||
|
<VIDEO>{S}
|
||
|
<VIDEO>member{S}= { yyextra->attr = V_(member); BEGIN(V_ATTR); }
|
||
|
<VIDEO>twitch_username{S}= { yyextra->attr = V_(twitch); BEGIN(V_ATTR); }
|
||
|
<VIDEO>project{S}= { yyextra->attr = V_(project); BEGIN(V_ATTR); }
|
||
|
<VIDEO>title{S}= { yyextra->attr = V_(title); BEGIN(V_ATTR); }
|
||
|
<VIDEO>platform{S}= { yyextra->attr = V_(platform); BEGIN(V_ATTR); }
|
||
|
<VIDEO>id{S}= { yyextra->attr = V_(id); BEGIN(V_ATTR); }
|
||
|
<VIDEO>annotator{S}= { yyextra->attr = V_(annotator); BEGIN(V_ATTR); }
|
||
|
<VIDEO>\] { BEGIN(ANNOTATION); };
|
||
|
<VIDEO>. { HMML_ERR("Invalid char '%c' in video tag.", *yytext); }
|
||
|
|
||
|
<V_ATTR>{S} { BEGIN(VIDEO); }
|
||
|
<V_ATTR>{ATTR_SIMPLE} { *yyextra->attr = strndup(yytext , yyleng ); BEGIN(VIDEO); }
|
||
|
<V_ATTR>{ATTR_QUOTED} { *yyextra->attr = strndup(yytext+1, yyleng-2); BEGIN(VIDEO); }
|
||
|
<V_ATTR>\] { yyless(0); BEGIN(VIDEO); }
|
||
|
|
||
|
<ANNOTATION>{TIMECODE}{LB}@ { NEWANNO(); yyextra->an.time = strndup(yytext+1, yyleng-4); BEGIN(AUTHOR); }
|
||
|
<ANNOTATION>{TIMECODE} { NEWANNO(); yyextra->an.time = strndup(yytext+1, yyleng-2); BEGIN(TEXT_START); }
|
||
|
<ANNOTATION>{BAD_TIMECODE} { HMML_ERR("Timecode %s out of range.", yytext); }
|
||
|
<ANNOTATION>{S}
|
||
|
<ANNOTATION>. { HMML_ERR("Cannot parse annotation. Expected timecode."); }
|
||
|
|
||
|
<TEXT_START>{LB}: { M_(CATEGORY, TEXT); BEGIN(MARKER); }
|
||
|
<TEXT_START>{LB}@ { M_(MEMBER , TEXT); BEGIN(MARKER); }
|
||
|
<TEXT_START>{LB}~ { M_(PROJECT , TEXT); BEGIN(MARKER); }
|
||
|
<TEXT_START>{LB} { yyless(0); BEGIN(TEXT); }
|
||
|
<TEXT_START>. { HMML_ERR("Unknown character '%c' after timecode.", *yytext); }
|
||
|
|
||
|
<TEXT>[^\\:@~\]\n\[ ]+ { memcpy(sb_add(yyextra->an.text, yyleng), yytext, yyleng); }
|
||
|
<TEXT>\\. { CHECKESCAPE(yytext[1]); memcpy(sb_add(yyextra->an.text, yyleng-1), yytext+1, yyleng-1); }
|
||
|
<TEXT>[ ]: { sb_push(yyextra->an.text, ' '); M_(CATEGORY, TEXT); BEGIN(MARKER); }
|
||
|
<TEXT>[ ]@ { sb_push(yyextra->an.text, ' '); M_(MEMBER , TEXT); BEGIN(MARKER); }
|
||
|
<TEXT>[ ]~ { sb_push(yyextra->an.text, ' '); M_(PROJECT , TEXT); BEGIN(MARKER); }
|
||
|
<TEXT>{LB}: { M_(CATEGORY, MARKER_XTRA); BEGIN(MARKER); }
|
||
|
<TEXT>{LB}@ { M_(MEMBER , MARKER_XTRA); BEGIN(MARKER); }
|
||
|
<TEXT>{LB}~ { M_(PROJECT , MARKER_XTRA); BEGIN(MARKER); }
|
||
|
<TEXT>\] { BEGIN(AFTERTEXT); }
|
||
|
<TEXT>{LB}ref { memset(&yyextra->ref, 0, sizeof(yyextra->ref)); BEGIN(REF); }
|
||
|
<TEXT>{LB}
|
||
|
<TEXT>{S}{S} { sb_push(yyextra->an.text, ' '); }
|
||
|
<TEXT>. { sb_push(yyextra->an.text, *yytext); }
|
||
|
|
||
|
<MARKER>{ATTR_ALNUM} { M_ADD(yytext , yyleng ); BEGIN(yyextra->mnext); };
|
||
|
<MARKER>{ATTR_QUOTED} { M_ADD(yytext+1, yyleng-2); BEGIN(yyextra->mnext); };
|
||
|
<MARKER>. { HMML_ERR("Cannot parse Marker. Expected quoted or alphanumeric attribute."); }
|
||
|
|
||
|
/* TODO: store the extra text somewhere */
|
||
|
<MARKER_XTRA>\] { BEGIN(TEXT); }
|
||
|
<MARKER_XTRA>.
|
||
|
|
||
|
<REF>[\t ]
|
||
|
<REF>site{S}= { yyextra->attr = R_(site); BEGIN(R_ATTR); }
|
||
|
<REF>page{S}= { yyextra->attr = R_(page); BEGIN(R_ATTR); }
|
||
|
<REF>url{S}= { yyextra->attr = R_(url); BEGIN(R_ATTR); }
|
||
|
<REF>title{S}= { yyextra->attr = R_(title); BEGIN(R_ATTR); }
|
||
|
<REF>article{S}= { yyextra->attr = R_(article); BEGIN(R_ATTR); }
|
||
|
<REF>author{S}= { yyextra->attr = R_(author); BEGIN(R_ATTR); }
|
||
|
<REF>editor{S}= { yyextra->attr = R_(editor); BEGIN(R_ATTR); }
|
||
|
<REF>publisher{S}= { yyextra->attr = R_(publisher); BEGIN(R_ATTR); }
|
||
|
<REF>isbn{S}= { yyextra->attr = R_(isbn); BEGIN(R_ATTR); }
|
||
|
<REF>\] { sb_push(yyextra->an.references, yyextra->ref); BEGIN(TEXT); }
|
||
|
<REF>. { HMML_ERR("Unexpected item in ref: %s", yytext); }
|
||
|
|
||
|
<R_ATTR>{S}
|
||
|
<R_ATTR>{ATTR_SIMPLE} { *yyextra->attr = strndup(yytext , yyleng ); BEGIN(REF); }
|
||
|
<R_ATTR>{ATTR_QUOTED} { *yyextra->attr = strndup(yytext+1, yyleng-2); BEGIN(REF); }
|
||
|
|
||
|
<AFTERTEXT,ANNOTATION>\[\/video\] { NEWANNO(); return 0; }
|
||
|
|
||
|
<AFTERTEXT>{S}
|
||
|
<AFTERTEXT>{LB}quote { BEGIN(QUOTES); }
|
||
|
<AFTERTEXT>{LB}: { BEGIN(CATEGORIES); yyless(1); }
|
||
|
<AFTERTEXT>{LB}[0-9] { BEGIN(ANNOTATION); yyless(0); }
|
||
|
<AFTERTEXT>.. { HMML_ERR("Unexpected thing after text node: %s", yytext); }
|
||
|
<AFTERTEXT>. { HMML_ERR("Unexpected thing after text node: %s", yytext); }
|
||
|
|
||
|
<AUTHOR>[^\]\n]+\] { yyextra->an.author = strndup(yytext, yyleng-1); BEGIN(TEXT_START); }
|
||
|
<AUTHOR>{S}
|
||
|
|
||
|
<CATEGORIES>{S}
|
||
|
<CATEGORIES>:{ATTR_SIMPLE} { HMML_Marker m = { HMML_CATEGORY, strndup(yytext+1, yyleng-1) }; sb_push(yyextra->an.markers, m); }
|
||
|
<CATEGORIES>:{ATTR_QUOTED} { HMML_Marker m = { HMML_CATEGORY, strndup(yytext+2, yyleng-3) }; sb_push(yyextra->an.markers, m); }
|
||
|
<CATEGORIES>\]{LB} { BEGIN(QUOTES); }
|
||
|
<CATEGORIES>\] { BEGIN(ANNOTATION); }
|
||
|
<CATEGORIES>. { HMML_ERR("Unexpected character in category tag: '%c'\n", *yytext); }
|
||
|
|
||
|
<QUOTES>{S}
|
||
|
<QUOTES>[0-9]+{S}\] { yyextra->an.is_quote = true; yyextra->an.quote.id = atoi(yytext); BEGIN(ANNOTATION); }
|
||
|
<QUOTES>{ATTR_ALNUM} { memcpy(sb_add(yyextra->an.quote.author, yyleng), yytext, yyleng); }
|
||
|
<QUOTES>. { HMML_ERR("Unexpected character in quotes tag: %s", yytext); }
|
||
|
|
||
|
%%
|
||
|
|
||
|
HMML_Output hmml_parse_file(FILE* f){
|
||
|
HMML_Output output = {};
|
||
|
HMML_ParseState state = {};
|
||
|
|
||
|
state.error = &output.error;
|
||
|
state.first = true;
|
||
|
state.line = 1;
|
||
|
|
||
|
yyscan_t scan;
|
||
|
yylex_init_extra(&state, &scan);
|
||
|
yyset_in(f, scan);
|
||
|
|
||
|
output.well_formed = yylex(scan) == 0;
|
||
|
|
||
|
if(output.well_formed){
|
||
|
memcpy(&output.metadata, &state.meta, sizeof(HMML_VideoMetaData));
|
||
|
|
||
|
output.annotations = state.annos;
|
||
|
output.annotation_count = sb_count(state.annos);
|
||
|
|
||
|
for(size_t i = 0; i < output.annotation_count; ++i){
|
||
|
HMML_Annotation* a = output.annotations + i;
|
||
|
sb_push(a->text, 0);
|
||
|
a->marker_count = sb_count(a->markers);
|
||
|
a->reference_count = sb_count(a->references);
|
||
|
|
||
|
if(a->is_quote && a->quote.author){
|
||
|
sb_push(a->quote.author, 0);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
yylex_destroy(scan);
|
||
|
|
||
|
return output;
|
||
|
}
|
||
|
|
||
|
void hmml_free(HMML_Output* hmml){
|
||
|
if(!hmml) return;
|
||
|
|
||
|
for(size_t i = 0; i < sizeof(HMML_VideoMetaData)/sizeof(char*); ++i){
|
||
|
free(((char**)&hmml->metadata)[i]);
|
||
|
}
|
||
|
|
||
|
for(size_t i = 0; i < hmml->annotation_count; ++i){
|
||
|
HMML_Annotation* a = hmml->annotations + i;
|
||
|
free(a->time);
|
||
|
free(a->author);
|
||
|
sb_free(a->text);
|
||
|
|
||
|
for(size_t j = 0; j < a->reference_count; ++j){
|
||
|
for(size_t k = 0; k < sizeof(HMML_Reference)/sizeof(char*); ++k){
|
||
|
free(((char**)a->references + j)[k]);
|
||
|
}
|
||
|
}
|
||
|
sb_free(a->references);
|
||
|
|
||
|
for(size_t j = 0; j < a->marker_count; ++j){
|
||
|
free(a->markers[j].text);
|
||
|
}
|
||
|
sb_free(a->markers);
|
||
|
sb_free(a->quote.author);
|
||
|
}
|
||
|
|
||
|
sb_free(hmml->annotations);
|
||
|
free(hmml->error.message);
|
||
|
memset(hmml, 0, sizeof(*hmml));
|
||
|
}
|
||
|
|
||
|
typedef struct {
|
||
|
char* text;
|
||
|
int* lines;
|
||
|
} Index;
|
||
|
|
||
|
Index* index_find(Index* base, const char* text){
|
||
|
for(size_t i = 0; i < sb_count(base); ++i){
|
||
|
if(strcmp(base[i].text, text) == 0){
|
||
|
return base + i;
|
||
|
}
|
||
|
}
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
void hmml_dump(HMML_Output* hmml){
|
||
|
|
||
|
if(!hmml){
|
||
|
puts("(null");
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
if(!hmml->well_formed){
|
||
|
printf("Parse error on line %d: %s\n", hmml->error.line, hmml->error.message);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
|
||
|
puts("Annotations:");
|
||
|
for(size_t i = 0; i < hmml->annotation_count; ++i){
|
||
|
HMML_Annotation* a = hmml->annotations + i;
|
||
|
printf("\t%3d [%7s] [%s]\n", a->line, a->time, a->text);
|
||
|
}
|
||
|
|
||
|
Index* authors = NULL;
|
||
|
Index* markers[HMML_MARKER_COUNT] = {};
|
||
|
int max_text_len = 0;
|
||
|
|
||
|
for(size_t i = 0; i < hmml->annotation_count; ++i){
|
||
|
HMML_Annotation* a = hmml->annotations + i;
|
||
|
|
||
|
if(a->author){
|
||
|
size_t len = strlen(a->author);
|
||
|
if(len > max_text_len){
|
||
|
max_text_len = len;
|
||
|
}
|
||
|
|
||
|
Index* idx;
|
||
|
if(!(idx = index_find(authors, a->author))){
|
||
|
Index x = { a->author };
|
||
|
sb_push(authors, x);
|
||
|
idx = &sb_last(authors);
|
||
|
}
|
||
|
|
||
|
sb_push(idx->lines, a->line);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for(size_t i = 0; i < hmml->annotation_count; ++i){
|
||
|
HMML_Annotation* a = hmml->annotations + i;
|
||
|
|
||
|
for(size_t j = 0; j < a->marker_count; ++j){
|
||
|
int type = a->markers[j].type;
|
||
|
char* text = a->markers[j].text;
|
||
|
|
||
|
size_t len = strlen(text);
|
||
|
if(len > max_text_len){
|
||
|
max_text_len = len;
|
||
|
}
|
||
|
|
||
|
for(char* c = text; *c; ++c) *c = tolower(*c);
|
||
|
|
||
|
Index* idx;
|
||
|
if(!(idx = index_find(markers[type], text))){
|
||
|
Index x = { text };
|
||
|
sb_push(markers[type], x);
|
||
|
idx = &sb_last(markers[type]);
|
||
|
}
|
||
|
|
||
|
sb_push(idx->lines, a->line);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
puts("Authors:");
|
||
|
for(size_t i = 0; i < sb_count(authors); ++i){
|
||
|
printf("\t %*s: ", max_text_len, authors[i].text);
|
||
|
for(size_t j = 0; j < sb_count(authors[i].lines); ++j){
|
||
|
printf("%3d ", authors[i].lines[j]);
|
||
|
}
|
||
|
puts("");
|
||
|
}
|
||
|
|
||
|
|
||
|
static const char* m_tags[HMML_MARKER_COUNT] = { "Categories", "Members", "Projects" };
|
||
|
|
||
|
for(size_t i = 0; i < HMML_MARKER_COUNT; ++i){
|
||
|
printf("%s:\n", m_tags[i]);
|
||
|
for(size_t j = 0; j < sb_count(markers[i]); ++j){
|
||
|
printf("\t %*s: ", max_text_len, markers[i][j].text);
|
||
|
for(size_t k = 0; k < sb_count(markers[i][j].lines); ++k){
|
||
|
printf("%3d ", markers[i][j].lines[k]);
|
||
|
}
|
||
|
puts("");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static const char* r_tags[] = { "Site", "Page", "URL", "Title", "Article", "Author", "Editor", "Publisher", "ISBN" };
|
||
|
puts("References:");
|
||
|
for(size_t i = 0; i < hmml->annotation_count; ++i){
|
||
|
HMML_Annotation* a = hmml->annotations + i;
|
||
|
for(size_t j = 0; j < a->reference_count; ++j){
|
||
|
printf("\t%3d ", a->line);
|
||
|
HMML_Reference* r = a->references + j;
|
||
|
for(size_t k = 0; k < 9; ++k){
|
||
|
char* item = ((char**)r)[k];
|
||
|
if(item){
|
||
|
printf("[%s = %s] ", r_tags[k], item);
|
||
|
}
|
||
|
}
|
||
|
puts("");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
puts("Quotes:");
|
||
|
for(size_t i = 0; i < hmml->annotation_count; ++i){
|
||
|
HMML_Annotation* a = hmml->annotations + i;
|
||
|
if(a->is_quote){
|
||
|
if(a->quote.author){
|
||
|
printf("\t%3d [Quote #%d, by %s]", a->line, a->quote.id, a->quote.author);
|
||
|
} else {
|
||
|
printf("\t%3d [Quote #%d]", a->line, a->quote.id);
|
||
|
}
|
||
|
puts("");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for(size_t i = 0; i < sb_count(authors); ++i){
|
||
|
sb_free(authors[i].lines);
|
||
|
}
|
||
|
sb_free(authors);
|
||
|
|
||
|
for(size_t i = 0; i < HMML_MARKER_COUNT; ++i){
|
||
|
for(size_t j = 0; j < sb_count(markers[i]); ++j){
|
||
|
sb_free(markers[i][j].lines);
|
||
|
}
|
||
|
sb_free(markers[i]);
|
||
|
}
|
||
|
}
|