hmmlib-js: Add source files, fix a few bugs.

The source files are only needed to build hmmlib.js, they needn't be
included in other projects.

Bugs fixed:
* marker offset should be correct now.
* after-text markers (-1 offset) now have type CATEGORY not HMML_CATEGORY
  which is consistent with the in-text ones.
* markers with parameters have proper .text and the marker.parameter
  doesn't start with "undefined" any more.
This commit is contained in:
Alex Baines 2017-06-19 14:47:55 +01:00
parent 648e3b29fe
commit 6729e0b4c3
6 changed files with 311 additions and 8 deletions

1
hmmlib-js/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
src/temp.js

View File

@ -339,6 +339,16 @@ HMMLexer.prototype = {
}
},
// return next match that has a token
lex: function lex() {
var r = this.next();
if (r) {
return r;
} else {
return this.lex();
}
},
// activates a new lexer condition state (pushes the new lexer condition state onto the condition stack)
begin: function begin(condition) {
this.conditionStack.push(condition);
@ -383,7 +393,7 @@ HMMLexer.prototype = {
return this.conditionStack.length;
},
options: {
"moduleName": "test"
"moduleName": "temp"
},
performAction: function anonymous(yy, yy_, $avoiding_name_collisions, YY_START) {
function ERR(yy, err) {
@ -419,12 +429,19 @@ HMMLexer.prototype = {
function M_ADD(yy, t, n) {
yy.an.markers[yy.an.markers.length - 1].marker = t.substr(0, n);
yy.an.markers[yy.an.markers.length - 1].offset = t.length;
yy.an.text += t.substr(0, n);
yy.an.markers[yy.an.markers.length - 1].offset = yy.an.text.length;
if (yy.mnext === "TEXT") {
yy.an.text += t.substr(0, n);
}
}
function MX_ADD(yy, c) {
yy.an.markers[yy.an.markers.length - 1].parameter += c;
var m = yy.an.markers[yy.an.markers.length - 1];
if (m.parameter) {
m.parameter += c;
} else {
m.parameter = c;
}
yy.an.text += c;
}
@ -712,14 +729,14 @@ HMMLexer.prototype = {
break;
case 73:
yy.an.markers.push({
type: "HMML_CATEGORY",
type: "CATEGORY",
marker: yy_.yytext.substr(1, yy_.yyleng - 1),
offset: -1
});
break;
case 74:
yy.an.markers.push({
type: "HMML_CATEGORY",
type: "CATEGORY",
marker: UNQUOTE(yy, yy_.yytext.substr(2, yy_.yyleng - 3)),
offset: -1
});
@ -807,4 +824,4 @@ HMMLexer.prototype = {
"inclusive": true
}
}
};
}

View File

@ -12,7 +12,7 @@
var x = new XMLHttpRequest();
x.addEventListener("load", function(){
var state = HMML_parse(this.responseText);
console.log(state);
console.dir(state);
});
x.open("GET", "test.hmml");
x.send();

75
hmmlib-js/src/header.js Normal file
View File

@ -0,0 +1,75 @@
function HMML_parse(contents) {
var l = new HMMLexer();
var state = {
line: 0,
annos: [],
meta: {
member: "",
twitch: "",
project: "",
title: "",
platform: "",
id: "",
annotator: ""
},
an: {
line: 0,
time: "",
text: "",
author: "",
references: [],
markers: [],
quote: {
id: 0,
author: ""
},
is_quote: false
},
ref: {
site: "",
page: "",
url: "",
title: "",
article: "",
author: "",
editor: "",
publisher: "",
isbn: "",
offset: 0
},
error: {
line: 0,
msg: ""
},
attr: "",
mnext: 0,
first: true
};
l.setInput(contents, state);
try {
var r;
do {
r = l.next();
} while (!r);
state.annos.forEach(function(a) {
if (!a.is_quote) {
delete a.quote;
}
if (a.author === "") delete a.author;
delete a.is_quote;
});
return {
metadata: state.meta,
annotations: state.annos
};
} catch (e) {
return {
error: state.error
};
}
}
function HMMLexer() {}
/* generated by jison-lex 0.3.4 */
HMMLexer.prototype = {

11
hmmlib-js/src/makefile Normal file
View File

@ -0,0 +1,11 @@
../hmmlib.js: header.js temp.js
tail -n +4 temp.js | head -n -2 | head -c -3 | cat header.js - > $@
js-beautify -r $@
temp.js: source.l
jison-lex $< -o $@
clean:
$(RM) temp.js ../hmmlib.js
.PHONY: clean

199
hmmlib-js/src/source.l Normal file
View File

@ -0,0 +1,199 @@
%{
function ERR(yy, err){
console.log(err);
yy.error.line = yy.line;
yy.error.msg = err;
throw "tantrum";
}
function CHECKESCAPE(yy, str){
if(!"[]:@~\\\"".find(str)){
ERR(yy, "hmmlib: Unknown backslash escape code: %s", str);
}
}
function UNQUOTE(yy, str){
var i = 0;
var j = 0;
while((j = str.indexOf('\\', i)) != -1){
CHECKESCAPE(yy, str.charAt(j));
str = str.slice(i, j) + str.slice(j+1);
i = j+1;
}
return str;
}
function M_(yy, str, s){
yy.an.markers.push({ type: str });
yy.mnext = s;
}
function M_ADD(yy, t, n){
yy.an.markers[yy.an.markers.length - 1].marker = t.substr(0, n);
yy.an.markers[yy.an.markers.length - 1].offset = yy.an.text.length;
if(yy.mnext === "TEXT"){
yy.an.text += t.substr(0, n);
}
}
function MX_ADD(yy, c){
var m = yy.an.markers[yy.an.markers.length - 1];
if(m.parameter){
m.parameter += c;
} else {
m.parameter = c;
}
yy.an.text += c;
}
function NEWANNO(yy){
if(!yy.first) yy.annos.push(yy.an);
yy.an = {
line: yy.line,
time: "",
text: "",
author: "",
references: [],
markers: [],
quote: {
id: 0,
author: ""
},
is_quote: false
};
yy.an.line = yy.line;
yy.first = false;
}
%}
%option reentrant
%option noyywrap
S [\t \r]*
SP [\t \r]+
ATTR_SIMPLE [^\" \]\t\r\n][^ \]\t\r\n]*
ATTR_ALNUM [0-9a-zA-Z][0-9a-zA-Z_]*
ATTR_QUOTED \"([^\n\"\\]|\\.)*\"
TAG_VIDEO_OPEN "[video"
TIMECODE \[[0-9]{1,2}(\:[0-5][0-9]){1,2}\]
BAD_TIMECODE \[[0-9]{1,2}(\:[6-9][0-9]){1,2}\]
TEXT_BREAK [^\\\:\@\~\[\]\r\n\t ]
LB \[
RB \]
%s VIDEO
%s V_ATTR
%s ANNOTATION
%s TEXT_START
%s TEXT
%s MARKER
%s MARKER_XTRA
%s REF
%s R_ATTR
%s AFTERTEXT
%s AUTHOR
%s CATEGORIES
%s QUOTES
%%
<<EOF>> { ERR(yy, "Unexpected EOF, video close tag not found."); }
\r\n|\n { yy.line++; }
<INITIAL>{TAG_VIDEO_OPEN} { yy_.begin("VIDEO"); }
<INITIAL>. { ERR(yy, "Missing video tag."); }
<VIDEO>{SP} {}
<VIDEO>member{S}\= { yy.attr = "member"; yy_.begin("V_ATTR"); }
<VIDEO>twitch_username{S}\= { yy.attr = "twitch"; yy_.begin("V_ATTR"); }
<VIDEO>project{S}\= { yy.attr = "project"; yy_.begin("V_ATTR"); }
<VIDEO>title{S}\= { yy.attr = "title"; yy_.begin("V_ATTR"); }
<VIDEO>platform{S}\= { yy.attr = "platform"; yy_.begin("V_ATTR"); }
<VIDEO>id{S}\= { yy.attr = "id"; yy_.begin("V_ATTR"); }
<VIDEO>annotator{S}\= { yy.attr = "annotator"; yy_.begin("V_ATTR"); }
<VIDEO>\] { yy_.begin("ANNOTATION"); };
<VIDEO>. { ERR(yy, "Invalid char '"+ yytext +"' in video tag."); }
<V_ATTR>{SP} { yy_.begin("VIDEO"); }
<V_ATTR>{ATTR_SIMPLE} { yy.meta[yy.attr] = yytext; yy_.begin("VIDEO"); }
<V_ATTR>{ATTR_QUOTED} { yy.meta[yy.attr] = UNQUOTE(yy, yytext.substr(1, yyleng-2)); yy_.begin("VIDEO"); }
<V_ATTR>\] { yy_.less(0); yy_.begin("VIDEO"); }
<ANNOTATION>{TIMECODE}{LB}\@ { NEWANNO(yy); yy.an.time = yytext.substr(1, yyleng-4); yy_.begin("AUTHOR"); }
<ANNOTATION>{TIMECODE} { NEWANNO(yy); yy.an.time = yytext.substr(1, yyleng-2); yy_.begin("TEXT_START"); }
<ANNOTATION>{BAD_TIMECODE} { ERR(yy, "Timecode '"+ yytext +"' out of range."); }
<ANNOTATION>{SP} {}
<ANNOTATION>. { ERR(yy, "Cannot parse annotation. Expected timecode."); }
<TEXT_START>{LB}\: { M_(yy, "CATEGORY", "TEXT"); yy_.begin("MARKER"); }
<TEXT_START>{LB}\@ { M_(yy, "MEMBER" , "TEXT"); yy_.begin("MARKER"); }
<TEXT_START>{LB}\~ { M_(yy, "PROJECT" , "TEXT"); yy_.begin("MARKER"); }
<TEXT_START>{LB} { yy_.less(0); yy_.begin("TEXT"); }
<TEXT_START>. { ERR(yy, "Unknown character '"+ yytext +"' after timecode."); }
<TEXT>{TEXT_BREAK}+ { yy.an.text += yytext; }
<TEXT>\\. { CHECKESCAPE(yy, yytext.charAt(1)); yy.an.text += yytext.substr(1, yyleng-1); }
<TEXT>[ \r\t]+\: { yy.an.text += ' '; M_(yy, "CATEGORY", "TEXT"); yy_.begin("MARKER"); }
<TEXT>[ \r\t]+\@ { yy.an.text += ' '; M_(yy, "MEMBER" , "TEXT"); yy_.begin("MARKER"); }
<TEXT>[ \r\t]+\~ { yy.an.text += ' '; M_(yy, "PROJECT" , "TEXT"); yy_.begin("MARKER"); }
<TEXT>{LB}\: { M_(yy, "CATEGORY", "MARKER_XTRA"); yy_.begin("MARKER"); }
<TEXT>{LB}\@ { M_(yy, "MEMBER" , "MARKER_XTRA"); yy_.begin("MARKER"); }
<TEXT>{LB}\~ { M_(yy, "PROJECT" , "MARKER_XTRA"); yy_.begin("MARKER"); }
<TEXT>\] { yy_.begin("AFTERTEXT"); }
<TEXT>{LB}ref { yy.ref.offset = yy.an.text.length; yy_.begin("REF"); }
<TEXT>{LB} {}
<TEXT>{SP} { yy.an.text += ' '; }
<TEXT>. { yy.an.text += yytext; }
<MARKER>{ATTR_ALNUM} { M_ADD(yy, yytext); yy_.begin(yy.mnext); };
<MARKER>{ATTR_QUOTED} { M_ADD(yy, yytext.substr(1, yyleng-2)); yy_.begin(yy.mnext); };
<MARKER>. { ERR(yy, "Cannot parse Marker. Expected quoted or alphanumeric attribute."); }
<MARKER_XTRA>\\] { MX_ADD(yy, ']'); }
<MARKER_XTRA>\] { yy_.begin("TEXT"); }
<MARKER_XTRA>[ ] { if(yy.an.markers[yy.an.markers.length - 1].parameter){ MX_ADD(yy, ' '); } }
<MARKER_XTRA>. { MX_ADD(yy, yytext); }
<REF>{SP} {}
<REF>site{S}\= { yy.attr = "site"; yy_.begin("R_ATTR"); }
<REF>page{S}\= { yy.attr = "page"; yy_.begin("R_ATTR"); }
<REF>url{S}\= { yy.attr = "url"; yy_.begin("R_ATTR"); }
<REF>title{S}\= { yy.attr = "title"; yy_.begin("R_ATTR"); }
<REF>article{S}\= { yy.attr = "article"; yy_.begin("R_ATTR"); }
<REF>author{S}\= { yy.attr = "author"; yy_.begin("R_ATTR"); }
<REF>editor{S}\= { yy.attr = "editor"; yy_.begin("R_ATTR"); }
<REF>publisher{S}\= { yy.attr = "publisher"; yy_.begin("R_ATTR"); }
<REF>isbn{S}\= { yy.attr = "isbn"; yy_.begin("R_ATTR"); }
<REF>\] { yy.an.references.push(yy.ref); yy.ref = {}; yy_.begin("TEXT"); }
<REF>. { ERR(yy, "Unexpected item in ref: " + yytext); }
<R_ATTR>{SP} {}
<R_ATTR>{ATTR_SIMPLE} { yy.ref[yy.attr] = yytext; yy_.begin("REF"); }
<R_ATTR>{ATTR_QUOTED} { yy.ref[yy.attr] = UNQUOTE(yy, yytext.substr(1, yyleng-2)); yy_.begin("REF"); }
<AFTERTEXT,ANNOTATION>\[\/video\] { NEWANNO(yy); return 1; }
<AFTERTEXT>{SP} {}
<AFTERTEXT>{LB}quote { yy_.begin("QUOTES"); }
<AFTERTEXT>{LB}\: { yy_.begin("CATEGORIES"); yy_.less(1); }
<AFTERTEXT>{LB}[0-9] { yy_.begin("ANNOTATION"); yy_.less(0); }
<AFTERTEXT>.. { ERR(yy, "Unexpected thing after text node: " + yytext); }
<AFTERTEXT>. { ERR(yy, "Unexpected thing after text node: " + yytext); }
<AUTHOR>[^\]\n]+\] { yy.an.author = yytext.substr(0, yyleng-1); yy_.begin("TEXT_START"); }
<AUTHOR>{SP} {}
<CATEGORIES>{SP} {}
<CATEGORIES>\:{ATTR_SIMPLE} { yy.an.markers.push({ type: "CATEGORY", marker: yytext.substr(1, yyleng-1), offset: -1 }); }
<CATEGORIES>\:{ATTR_QUOTED} { yy.an.markers.push({ type: "CATEGORY", marker: UNQUOTE(yy, yytext.substr(2, yyleng-3)), offset: -1 }); }
<CATEGORIES>\]{LB} { yy_.begin("QUOTES"); }
<CATEGORIES>\] { yy_.begin("ANNOTATION"); }
<CATEGORIES>. { ERR(yy, "Unexpected character in category tag: " + yytext); }
<QUOTES>{SP} {}
<QUOTES>[0-9]+{S}\] { yy.an.is_quote = true; yy.an.quote.id = parseInt(yytext); yy_.begin("ANNOTATION"); }
<QUOTES>{ATTR_ALNUM} { yy.an.quote.author += yytext; }
<QUOTES>. { ERR(yy, "Unexpected character in quotes tag: " + yytext); }
%%