Annotation-System/hmmlib-js/src/source.l

211 lines
7.8 KiB
Plaintext

%{
function ERR(yy, err){
console.log("hmmlib error: L%d: %s\n", yy.line, err);
throw { message: err, line: yy.line };
}
function CHECKESCAPE(yy, str){
if("[]:@~\\\"".indexOf(str) == -1){
ERR(yy, "hmmlib: Unknown backslash escape code: %s", str);
}
}
function UNQUOTE(yy, str){
var i = 0;
var j = 0;
while((j = str.indexOf('\\', i)) != -1){
CHECKESCAPE(yy, str.charAt(j));
str = str.slice(i, j) + str.slice(j+1);
i = j+1;
}
return str;
}
function M_(yy, str, s){
yy.an.markers.push({ type: str });
yy.mnext = s;
}
function M_ADD(yy, t, n){
yy.an.markers[yy.an.markers.length - 1].marker = t.substr(0, n);
yy.an.markers[yy.an.markers.length - 1].offset = yy.an.text.length;
if(yy.mnext === "TEXT"){
yy.an.text += t.substr(0, n);
}
}
function MX_ADD(yy, c){
var m = yy.an.markers[yy.an.markers.length - 1];
if(m.parameter){
m.parameter += c;
} else {
m.parameter = c;
}
yy.an.text += c;
}
function NEWANNO(yy){
if(!yy.first) yy.annos.push(yy.an);
yy.an = {
line: yy.line,
time: "",
text: "",
author: "",
references: [],
markers: [],
quote: {
id: 0,
author: ""
},
is_quote: false
};
yy.an.line = yy.line;
yy.first = false;
}
%}
%option reentrant
%option noyywrap
S [\t \r]*
SP [\t \r]+
ATTR_SIMPLE [^\" \]\t\r\n][^ \]\t\r\n]*
ATTR_ALNUM [0-9a-zA-Z][0-9a-zA-Z_]*
ATTR_QUOTED \"([^\n\"\\]|\\.)*\"
TAG_VIDEO_OPEN "[video"
TIMECODE \[[0-9]{1,2}(\:[0-5][0-9]){1,2}\]
BAD_TIMECODE \[[0-9]{1,2}(\:[6-9][0-9]){1,2}\]
TEXT_BREAK [^\\\:\@\~\[\]\r\n\t ]
LB \[
RB \]
%s VIDEO
%s V_ATTR
%s ANNOTATION
%s TEXT_START
%s TEXT
%s MARKER
%s MARKER_XTRA
%s REF
%s R_ATTR
%s AFTERTEXT
%s AUTHOR
%s CATEGORIES
%s QUOTES
%%
<<EOF>> { ERR(yy, "Unexpected EOF, video close tag not found."); }
\r\n|\n { yy.line++; }
<INITIAL>{TAG_VIDEO_OPEN} { yy_.begin("VIDEO"); }
<INITIAL>. { ERR(yy, "Missing video tag."); }
<VIDEO>{SP} {}
<VIDEO>member{S}\= { yy.attr = "member"; yy_.begin("V_ATTR"); }
<VIDEO>stream_platform{S}\= { yy.attr = "stream_platform"; yy_.begin("V_ATTR"); }
<VIDEO>stream_username{S}\= { yy.attr = "stream_username"; yy_.begin("V_ATTR"); }
<VIDEO>project{S}\= { yy.attr = "project"; yy_.begin("V_ATTR"); }
<VIDEO>title{S}\= { yy.attr = "title"; yy_.begin("V_ATTR"); }
<VIDEO>vod_platform{S}\= { yy.attr = "vod_platform"; yy_.begin("V_ATTR"); }
<VIDEO>id{S}\= { yy.attr = "id"; yy_.begin("V_ATTR"); }
<VIDEO>co_host{S}\= { yy.attr = "co_host" yy_.begin("V_ATTR"); }
<VIDEO>guest{S}\= { yy.attr = "guest" yy_.begin("V_ATTR"); }
<VIDEO>annotator{S}\= { yy.attr = "annotator"; yy_.begin("V_ATTR"); }
<VIDEO>\] { yy_.begin("ANNOTATION"); };
<VIDEO>. { ERR(yy, "Invalid char '"+ yytext +"' in video tag."); }
<V_ATTR>{SP} { yy_.begin("VIDEO"); }
<V_ATTR>{ATTR_SIMPLE} { yy.meta[yy.attr] = yytext; yy_.begin("VIDEO"); }
<V_ATTR>{ATTR_QUOTED} { yy.meta[yy.attr] = UNQUOTE(yy, yytext.substr(1, yyleng-2)); yy_.begin("VIDEO"); }
<V_ATTR>\] { yy_.less(0); yy_.begin("VIDEO"); }
<ANNOTATION>\[\/video\] { NEWANNO(yy); return 1; }
<ANNOTATION>{TIMECODE}{LB}\@ { NEWANNO(yy); yy.an.time = yytext.substr(1, yyleng-4); yy_.begin("AUTHOR"); }
<ANNOTATION>{TIMECODE} { NEWANNO(yy); yy.an.time = yytext.substr(1, yyleng-2); yy_.begin("TEXT_START"); }
<ANNOTATION>{BAD_TIMECODE} { ERR(yy, "Timecode '"+ yytext +"' out of range."); }
<ANNOTATION>{SP} {}
<ANNOTATION>. { ERR(yy, "Cannot parse annotation. Expected timecode."); }
<TEXT_START>{LB}\: { M_(yy, "CATEGORY", "TEXT"); yy_.begin("MARKER"); }
<TEXT_START>{LB}\@ { M_(yy, "MEMBER" , "TEXT"); yy_.begin("MARKER"); }
<TEXT_START>{LB}\~ { M_(yy, "PROJECT" , "TEXT"); yy_.begin("MARKER"); }
<TEXT_START>{LB} { yy_.less(0); yy_.begin("TEXT"); }
<TEXT_START>. { ERR(yy, "Unknown character '"+ yytext +"' after timecode."); }
<TEXT>{TEXT_BREAK}+ { yy.an.text += yytext; }
<TEXT>\\. { CHECKESCAPE(yy, yytext.charAt(1)); yy.an.text += yytext.substr(1, yyleng-1); }
<TEXT>[ \r\t]+\: { yy.an.text += ' '; M_(yy, "CATEGORY", "TEXT"); yy_.begin("MARKER"); }
<TEXT>[ \r\t]+\@ { yy.an.text += ' '; M_(yy, "MEMBER" , "TEXT"); yy_.begin("MARKER"); }
<TEXT>[ \r\t]+\~ { yy.an.text += ' '; M_(yy, "PROJECT" , "TEXT"); yy_.begin("MARKER"); }
<TEXT>{LB}\: { M_(yy, "CATEGORY", "MARKER_XTRA"); yy_.begin("MARKER"); }
<TEXT>{LB}\@ { M_(yy, "MEMBER" , "MARKER_XTRA"); yy_.begin("MARKER"); }
<TEXT>{LB}\~ { M_(yy, "PROJECT" , "MARKER_XTRA"); yy_.begin("MARKER"); }
<TEXT>\] { yy_.begin("AFTERTEXT"); }
<TEXT>{LB}ref { yy.ref.offset = yy.an.text.length; yy_.begin("REF"); }
<TEXT>{LB} {}
<TEXT>{SP} { yy.an.text += ' '; }
<TEXT>. { yy.an.text += yytext; }
<MARKER>{ATTR_ALNUM} { M_ADD(yy, yytext); yy_.begin(yy.mnext); };
<MARKER>{ATTR_QUOTED} { M_ADD(yy, yytext.substr(1, yyleng-2)); yy_.begin(yy.mnext); };
<MARKER>. { ERR(yy, "Cannot parse Marker. Expected quoted or alphanumeric attribute."); }
<MARKER_XTRA>\\] { MX_ADD(yy, ']'); }
<MARKER_XTRA>\\\# { MX_ADD(yy, '#'); }
<MARKER_XTRA>\] { yy_.begin("TEXT"); }
<MARKER_XTRA>[ ] { if(yy.an.markers[yy.an.markers.length - 1].parameter){ MX_ADD(yy, ' '); } }
<MARKER_XTRA>[ ]\#[0-9]+ {
var m = yy.an.markers[yy.an.markers.length - 1];
if(m.type == "PROJECT"){
m.episode = yytext.substr(2);
} else {
MX_ADD(yytext.substr(1));
}
}
<MARKER_XTRA>. { MX_ADD(yy, yytext); }
<REF>{SP} {}
<REF>site{S}\= { yy.attr = "site"; yy_.begin("R_ATTR"); }
<REF>page{S}\= { yy.attr = "page"; yy_.begin("R_ATTR"); }
<REF>url{S}\= { yy.attr = "url"; yy_.begin("R_ATTR"); }
<REF>title{S}\= { yy.attr = "title"; yy_.begin("R_ATTR"); }
<REF>article{S}\= { yy.attr = "article"; yy_.begin("R_ATTR"); }
<REF>author{S}\= { yy.attr = "author"; yy_.begin("R_ATTR"); }
<REF>editor{S}\= { yy.attr = "editor"; yy_.begin("R_ATTR"); }
<REF>publisher{S}\= { yy.attr = "publisher"; yy_.begin("R_ATTR"); }
<REF>isbn{S}\= { yy.attr = "isbn"; yy_.begin("R_ATTR"); }
<REF>\] { yy.an.references.push(yy.ref); yy.ref = {}; yy_.begin("TEXT"); }
<REF>. { ERR(yy, "Unexpected item in ref: " + yytext); }
<R_ATTR>{SP} {}
<R_ATTR>{ATTR_SIMPLE} { yy.ref[yy.attr] = yytext; yy_.begin("REF"); }
<R_ATTR>{ATTR_QUOTED} { yy.ref[yy.attr] = UNQUOTE(yy, yytext.substr(1, yyleng-2)); yy_.begin("REF"); }
<AFTERTEXT>\[\/video\] { NEWANNO(yy); return 1; }
<AFTERTEXT>{SP} {}
<AFTERTEXT>{LB}quote { yy_.begin("QUOTES"); }
<AFTERTEXT>{LB}\: { yy_.begin("CATEGORIES"); yy_.less(1); }
<AFTERTEXT>{LB}[0-9] { yy_.begin("ANNOTATION"); yy_.less(0); }
<AFTERTEXT>.. { ERR(yy, "Unexpected thing after text node: " + yytext); }
<AFTERTEXT>. { ERR(yy, "Unexpected thing after text node: " + yytext); }
<AUTHOR>[^\]\n]+\] { yy.an.author = yytext.substr(0, yyleng-1); yy_.begin("TEXT_START"); }
<AUTHOR>{SP} {}
<CATEGORIES>{SP} {}
<CATEGORIES>\:{ATTR_SIMPLE} { yy.an.markers.push({ type: "CATEGORY", marker: yytext.substr(1, yyleng-1), offset: -1 }); }
<CATEGORIES>\:{ATTR_QUOTED} { yy.an.markers.push({ type: "CATEGORY", marker: UNQUOTE(yy, yytext.substr(2, yyleng-3)), offset: -1 }); }
<CATEGORIES>\]{LB} { yy_.begin("QUOTES"); }
<CATEGORIES>\] { yy_.begin("ANNOTATION"); }
<CATEGORIES>. { ERR(yy, "Unexpected character in category tag: " + yytext); }
<QUOTES>{SP} {}
<QUOTES>[0-9]+{S}\] { yy.an.is_quote = true; yy.an.quote.id = parseInt(yytext); yy_.begin("ANNOTATION"); }
<QUOTES>{ATTR_ALNUM} { yy.an.quote.author += yytext; }
<QUOTES>. { ERR(yy, "Unexpected character in quotes tag: " + yytext); }
%%