Annotation-System/hmmlib-js/hmmlib.js

835 lines
27 KiB
JavaScript

function HMML_parse(contents) {
var l = new HMMLexer();
var state = {
line: 0,
annos: [],
meta: {
annotators: [],
guests: [],
co_hosts: []
},
an: {
line: 0,
text: "",
references: [],
markers: [],
},
ref: {},
first: true
};
l.setInput(contents, state);
try {
var r;
do {
r = l.next();
} while (!r);
return {
metadata: state.meta,
annotations: state.annos
};
} catch (e) {
return {
error: e
};
}
}
function HMMLexer() {}
/* generated by jison-lex 0.3.4 */
HMMLexer.prototype = {
EOF: 1,
parseError: function parseError(str, hash) {
if (this.yy.parser) {
this.yy.parser.parseError(str, hash);
} else {
throw new Error(str);
}
},
// resets the lexer, sets new input
setInput: function(input, yy) {
this.yy = yy || this.yy || {};
this._input = input;
this._more = this._backtrack = this.done = false;
this.yylineno = this.yyleng = 0;
this.yytext = this.matched = this.match = '';
this.conditionStack = ['INITIAL'];
this.yylloc = {
first_line: 1,
first_column: 0,
last_line: 1,
last_column: 0
};
if (this.options.ranges) {
this.yylloc.range = [0, 0];
}
this.offset = 0;
return this;
},
// consumes and returns one char from the input
input: function() {
var ch = this._input[0];
this.yytext += ch;
this.yyleng++;
this.offset++;
this.match += ch;
this.matched += ch;
var lines = ch.match(/(?:\r\n?|\n).*/g);
if (lines) {
this.yylineno++;
this.yylloc.last_line++;
} else {
this.yylloc.last_column++;
}
if (this.options.ranges) {
this.yylloc.range[1]++;
}
this._input = this._input.slice(1);
return ch;
},
// unshifts one char (or a string) into the input
unput: function(ch) {
var len = ch.length;
var lines = ch.split(/(?:\r\n?|\n)/g);
this._input = ch + this._input;
this.yytext = this.yytext.substr(0, this.yytext.length - len);
//this.yyleng -= len;
this.offset -= len;
var oldLines = this.match.split(/(?:\r\n?|\n)/g);
this.match = this.match.substr(0, this.match.length - 1);
this.matched = this.matched.substr(0, this.matched.length - 1);
if (lines.length - 1) {
this.yylineno -= lines.length - 1;
}
var r = this.yylloc.range;
this.yylloc = {
first_line: this.yylloc.first_line,
last_line: this.yylineno + 1,
first_column: this.yylloc.first_column,
last_column: lines ?
(lines.length === oldLines.length ? this.yylloc.first_column : 0) +
oldLines[oldLines.length - lines.length].length - lines[0].length : this.yylloc.first_column - len
};
if (this.options.ranges) {
this.yylloc.range = [r[0], r[0] + this.yyleng - len];
}
this.yyleng = this.yytext.length;
return this;
},
// When called from action, caches matched text and appends it on next action
more: function() {
this._more = true;
return this;
},
// When called from action, signals the lexer that this rule fails to match the input, so the next matching rule (regex) should be tested instead.
reject: function() {
if (this.options.backtrack_lexer) {
this._backtrack = true;
} else {
return this.parseError('Lexical error on line ' + (this.yylineno + 1) + '. You can only invoke reject() in the lexer when the lexer is of the backtracking persuasion (options.backtrack_lexer = true).\n' + this.showPosition(), {
text: "",
token: null,
line: this.yylineno
});
}
return this;
},
// retain first n characters of the match
less: function(n) {
this.unput(this.match.slice(n));
},
// displays already matched input, i.e. for error messages
pastInput: function() {
var past = this.matched.substr(0, this.matched.length - this.match.length);
return (past.length > 20 ? '...' : '') + past.substr(-20).replace(/\n/g, "");
},
// displays upcoming input, i.e. for error messages
upcomingInput: function() {
var next = this.match;
if (next.length < 20) {
next += this._input.substr(0, 20 - next.length);
}
return (next.substr(0, 20) + (next.length > 20 ? '...' : '')).replace(/\n/g, "");
},
// displays the character position where the lexing error occurred, i.e. for error messages
showPosition: function() {
var pre = this.pastInput();
var c = new Array(pre.length + 1).join("-");
return pre + this.upcomingInput() + "\n" + c + "^";
},
// test the lexed token: return FALSE when not a match, otherwise return token
test_match: function(match, indexed_rule) {
var token,
lines,
backup;
if (this.options.backtrack_lexer) {
// save context
backup = {
yylineno: this.yylineno,
yylloc: {
first_line: this.yylloc.first_line,
last_line: this.last_line,
first_column: this.yylloc.first_column,
last_column: this.yylloc.last_column
},
yytext: this.yytext,
match: this.match,
matches: this.matches,
matched: this.matched,
yyleng: this.yyleng,
offset: this.offset,
_more: this._more,
_input: this._input,
yy: this.yy,
conditionStack: this.conditionStack.slice(0),
done: this.done
};
if (this.options.ranges) {
backup.yylloc.range = this.yylloc.range.slice(0);
}
}
lines = match[0].match(/(?:\r\n?|\n).*/g);
if (lines) {
this.yylineno += lines.length;
}
this.yylloc = {
first_line: this.yylloc.last_line,
last_line: this.yylineno + 1,
first_column: this.yylloc.last_column,
last_column: lines ?
lines[lines.length - 1].length - lines[lines.length - 1].match(/\r?\n?/)[0].length : this.yylloc.last_column + match[0].length
};
this.yytext += match[0];
this.match += match[0];
this.matches = match;
this.yyleng = this.yytext.length;
if (this.options.ranges) {
this.yylloc.range = [this.offset, this.offset += this.yyleng];
}
this._more = false;
this._backtrack = false;
this._input = this._input.slice(match[0].length);
this.matched += match[0];
token = this.performAction.call(this, this.yy, this, indexed_rule, this.conditionStack[this.conditionStack.length - 1]);
if (this.done && this._input) {
this.done = false;
}
if (token) {
return token;
} else if (this._backtrack) {
// recover context
for (var k in backup) {
this[k] = backup[k];
}
return false; // rule action called reject() implying the next rule should be tested instead.
}
return false;
},
// return next match in input
next: function() {
if (this.done) {
return this.EOF;
}
if (!this._input) {
this.done = true;
}
var token,
match,
tempMatch,
index;
if (!this._more) {
this.yytext = '';
this.match = '';
}
var rules = this._currentRules();
for (var i = 0; i < rules.length; i++) {
tempMatch = this._input.match(this.rules[rules[i]]);
if (tempMatch && (!match || tempMatch[0].length > match[0].length)) {
match = tempMatch;
index = i;
if (this.options.backtrack_lexer) {
token = this.test_match(tempMatch, rules[i]);
if (token !== false) {
return token;
} else if (this._backtrack) {
match = false;
continue; // rule action called reject() implying a rule MISmatch.
} else {
// else: this is a lexer rule which consumes input without producing a token (e.g. whitespace)
return false;
}
} else if (!this.options.flex) {
break;
}
}
}
if (match) {
token = this.test_match(match, rules[index]);
if (token !== false) {
return token;
}
// else: this is a lexer rule which consumes input without producing a token (e.g. whitespace)
return false;
}
if (this._input === "") {
return this.EOF;
} else {
return this.parseError('Lexical error on line ' + (this.yylineno + 1) + '. Unrecognized text.\n' + this.showPosition(), {
text: "",
token: null,
line: this.yylineno
});
}
},
// return next match that has a token
lex: function lex() {
var r = this.next();
if (r) {
return r;
} else {
return this.lex();
}
},
// activates a new lexer condition state (pushes the new lexer condition state onto the condition stack)
begin: function begin(condition) {
this.conditionStack.push(condition);
},
// pop the previously active lexer condition state off the condition stack
popState: function popState() {
var n = this.conditionStack.length - 1;
if (n > 0) {
return this.conditionStack.pop();
} else {
return this.conditionStack[0];
}
},
// produce the lexer rule set which is active for the currently active lexer condition state
_currentRules: function _currentRules() {
if (this.conditionStack.length && this.conditionStack[this.conditionStack.length - 1]) {
return this.conditions[this.conditionStack[this.conditionStack.length - 1]].rules;
} else {
return this.conditions["INITIAL"].rules;
}
},
// return the currently active lexer condition state; when an index argument is provided it produces the N-th previous condition state, if available
topState: function topState(n) {
n = this.conditionStack.length - 1 - Math.abs(n || 0);
if (n >= 0) {
return this.conditionStack[n];
} else {
return "INITIAL";
}
},
// alias for begin(condition)
pushState: function pushState(condition) {
this.begin(condition);
},
// return the number of states currently on the stack
stateStackSize: function stateStackSize() {
return this.conditionStack.length;
},
options: {
"moduleName": "temp"
},
performAction: function anonymous(yy, yy_, $avoiding_name_collisions, YY_START) {
function ERR(yy, err) {
console.log("hmmlib error: L%d: %s\n", yy.line, err);
throw {
message: err,
line: yy.line
};
}
function CHECKESCAPE(yy, str) {
if ("[]:@~\\\"".indexOf(str) == -1) {
ERR(yy, "hmmlib: Unknown backslash escape code: %s", str);
}
}
function UNQUOTE(yy, str) {
var i = 0;
var j = 0;
while ((j = str.indexOf('\\', i)) != -1) {
CHECKESCAPE(yy, str.charAt(j));
str = str.slice(i, j) + str.slice(j + 1);
i = j + 1;
}
return str;
}
function M_(yy, str, s) {
yy.an.markers.push({
type: str
});
yy.mnext = s;
}
function M_ADD(yy, t, n) {
yy.an.markers[yy.an.markers.length - 1].marker = t.substr(0, n);
yy.an.markers[yy.an.markers.length - 1].offset = yy.an.text.length;
if (yy.mnext === "TEXT") {
yy.an.text += t.substr(0, n);
}
}
function MX_ADD(yy, c) {
var m = yy.an.markers[yy.an.markers.length - 1];
if (m.parameter) {
m.parameter += c;
} else {
m.parameter = c;
}
yy.an.text += c;
}
function NEWANNO(yy) {
if (!yy.first) yy.annos.push(yy.an);
yy.an = {
line: yy.line,
text: "",
references: [],
markers: [],
};
yy.an.line = yy.line;
yy.first = false;
}
var YYSTATE = YY_START;
switch ($avoiding_name_collisions) {
case 0:
ERR(yy, "Unexpected EOF, video close tag not found.");
break;
case 1:
yy.line++;
break;
case 2:
yy_.begin("VIDEO");
break;
case 3:
ERR(yy, "Missing video tag.");
break;
case 4:
break;
case 5:
yy.attr = "member";
yy_.begin("V_ATTR");
break;
case 6:
yy.attr = "stream_platform";
yy_.begin("V_ATTR");
break;
case 7:
yy.attr = "stream_username";
yy_.begin("V_ATTR");
break;
case 8:
yy.attr = "project";
yy_.begin("V_ATTR");
break;
case 9:
yy.attr = "title";
yy_.begin("V_ATTR");
break;
case 10:
yy.attr = "vod_platform";
yy_.begin("V_ATTR");
break;
case 11:
yy.attr = "id";
yy_.begin("V_ATTR");
break;
case 12:
yy.attr = "co_hosts"
yy_.begin("V2_ATTR");
break;
case 13:
yy.attr = "guests"
yy_.begin("V2_ATTR");
break;
case 14:
yy.attr = "annotators";
yy_.begin("V2_ATTR");
break;
case 15:
yy_.begin("ANNOTATION");
break;
case 16:
ERR(yy, "Invalid char '" + yy_.yytext + "' in video tag.");
break;
case 17:
yy_.begin("VIDEO");
break;
case 18:
yy.meta[yy.attr] = yy_.yytext;
yy_.begin("VIDEO");
break;
case 19:
yy.meta[yy.attr] = UNQUOTE(yy, yy_.yytext.substr(1, yy_.yyleng - 2));
yy_.begin("VIDEO");
break;
case 20:
yy_.less(0);
yy_.begin("VIDEO");
break;
case 21:
yy_.begin("VIDEO");
break;
case 22:
yy.meta[yy.attr].push(yy_.yytext);
yy_.begin("VIDEO");
break;
case 23:
yy.meta[yy.attr].push(UNQUOTE(yy, yy_.yytext.substr(1, yy_.yyleng - 2)));
yy_.begin("VIDEO");
break;
case 24:
yy_.less(0);
yy_.begin("VIDEO");
break;
case 25:
NEWANNO(yy);
return 1;
break;
case 26:
NEWANNO(yy);
yy.an.time = yy_.yytext.substr(1, yy_.yyleng - 4);
yy_.begin("AUTHOR");
break;
case 27:
NEWANNO(yy);
yy.an.time = yy_.yytext.substr(1, yy_.yyleng - 2);
yy_.begin("TEXT_START");
break;
case 28:
ERR(yy, "Timecode '" + yy_.yytext + "' out of range.");
break;
case 29:
break;
case 30:
ERR(yy, "Cannot parse annotation. Expected timecode.");
break;
case 31:
M_(yy, "CATEGORY", "TEXT");
yy_.begin("MARKER");
break;
case 32:
M_(yy, "MEMBER", "TEXT");
yy_.begin("MARKER");
break;
case 33:
M_(yy, "PROJECT", "TEXT");
yy_.begin("MARKER");
break;
case 34:
yy_.less(0);
yy_.begin("TEXT");
break;
case 35:
ERR(yy, "Unknown character '" + yy_.yytext + "' after timecode.");
break;
case 36:
yy.an.text += yy_.yytext;
break;
case 37:
CHECKESCAPE(yy, yy_.yytext.charAt(1));
yy.an.text += yy_.yytext.substr(1, yy_.yyleng - 1);
break;
case 38:
yy.an.text += ' ';
M_(yy, "CATEGORY", "TEXT");
yy_.begin("MARKER");
break;
case 39:
yy.an.text += ' ';
M_(yy, "MEMBER", "TEXT");
yy_.begin("MARKER");
break;
case 40:
yy.an.text += ' ';
M_(yy, "PROJECT", "TEXT");
yy_.begin("MARKER");
break;
case 41:
M_(yy, "CATEGORY", "MARKER_XTRA");
yy_.begin("MARKER");
break;
case 42:
M_(yy, "MEMBER", "MARKER_XTRA");
yy_.begin("MARKER");
break;
case 43:
M_(yy, "PROJECT", "MARKER_XTRA");
yy_.begin("MARKER");
break;
case 44:
yy_.begin("AFTERTEXT");
break;
case 45:
yy.ref.offset = yy.an.text.length;
yy_.begin("REF");
break;
case 46:
break;
case 47:
yy.an.text += ' ';
break;
case 48:
yy.an.text += yy_.yytext;
break;
case 49:
M_ADD(yy, yy_.yytext);
yy_.begin(yy.mnext);
break;
case 50:
M_ADD(yy, yy_.yytext.substr(1, yy_.yyleng - 2));
yy_.begin(yy.mnext);
break;
case 51:
ERR(yy, "Cannot parse Marker. Expected quoted or alphanumeric attribute.");
break;
case 52:
MX_ADD(yy, ']');
break;
case 53:
MX_ADD(yy, '#');
break;
case 54:
yy_.begin("TEXT");
break;
case 55:
if (yy.an.markers[yy.an.markers.length - 1].parameter) {
MX_ADD(yy, ' ');
}
break;
case 56:
var m = yy.an.markers[yy.an.markers.length - 1];
if (m.type == "PROJECT") {
m.episode = yy_.yytext.substr(2);
} else {
MX_ADD(yy_.yytext.substr(1));
}
break;
case 57:
MX_ADD(yy, yy_.yytext);
break;
case 58:
break;
case 59:
yy.attr = "site";
yy_.begin("R_ATTR");
break;
case 60:
yy.attr = "page";
yy_.begin("R_ATTR");
break;
case 61:
yy.attr = "url";
yy_.begin("R_ATTR");
break;
case 62:
yy.attr = "title";
yy_.begin("R_ATTR");
break;
case 63:
yy.attr = "article";
yy_.begin("R_ATTR");
break;
case 64:
yy.attr = "author";
yy_.begin("R_ATTR");
break;
case 65:
yy.attr = "editor";
yy_.begin("R_ATTR");
break;
case 66:
yy.attr = "publisher";
yy_.begin("R_ATTR");
break;
case 67:
yy.attr = "isbn";
yy_.begin("R_ATTR");
break;
case 68:
yy.an.references.push(yy.ref);
yy.ref = {};
yy_.begin("TEXT");
break;
case 69:
ERR(yy, "Unexpected item in ref: " + yy_.yytext);
break;
case 70:
break;
case 71:
yy.ref[yy.attr] = yy_.yytext;
yy_.begin("REF");
break;
case 72:
yy.ref[yy.attr] = UNQUOTE(yy, yy_.yytext.substr(1, yy_.yyleng - 2));
yy_.begin("REF");
break;
case 73:
NEWANNO(yy);
return 1;
break;
case 74:
break;
case 75:
yy_.begin("QUOTES");
break;
case 76:
yy_.begin("CATEGORIES");
yy_.less(1);
break;
case 77:
yy_.begin("ANNOTATION");
yy_.less(0);
break;
case 78:
ERR(yy, "Unexpected thing after text node: " + yy_.yytext);
break;
case 79:
ERR(yy, "Unexpected thing after text node: " + yy_.yytext);
break;
case 80:
yy.an.author = yy_.yytext.substr(0, yy_.yyleng - 1);
yy_.begin("TEXT_START");
break;
case 81:
break;
case 82:
break;
case 83:
yy.an.markers.push({
type: "CATEGORY",
marker: yy_.yytext.substr(1, yy_.yyleng - 1),
offset: -1
});
break;
case 84:
yy.an.markers.push({
type: "CATEGORY",
marker: UNQUOTE(yy, yy_.yytext.substr(2, yy_.yyleng - 3)),
offset: -1
});
break;
case 85:
yy_.begin("QUOTES");
break;
case 86:
yy_.begin("ANNOTATION");
break;
case 87:
ERR(yy, "Unexpected character in category tag: " + yy_.yytext);
break;
case 88:
break;
case 89:
yy.an.quote = {
author: "",
id: parseInt(yy_.yytext)
};
yy_.begin("ANNOTATION");
break;
case 90:
yy.an.quote.author += yy_.yytext;
break;
case 91:
ERR(yy, "Unexpected character in quotes tag: " + yy_.yytext);
break;
}
},
rules: [/^(?:$)/, /^(?:\r\n|\n)/, /^(?:(\[video\b))/, /^(?:.)/, /^(?:([\t \r]+))/, /^(?:member([\t \r]*)=)/, /^(?:stream_platform([\t \r]*)=)/, /^(?:stream_username([\t \r]*)=)/, /^(?:project([\t \r]*)=)/, /^(?:title([\t \r]*)=)/, /^(?:vod_platform([\t \r]*)=)/, /^(?:id([\t \r]*)=)/, /^(?:co-host([\t \r]*)=)/, /^(?:guest([\t \r]*)=)/, /^(?:annotator([\t \r]*)=)/, /^(?:\])/, /^(?:.)/, /^(?:([\t \r]+))/, /^(?:([^\" \]\t\r\n][^ \]\t\r\n]*))/, /^(?:("([^\n\"\\]|\\.)*"))/, /^(?:\])/, /^(?:([\t \r]+))/, /^(?:([^\" \]\t\r\n][^ \]\t\r\n]*))/, /^(?:("([^\n\"\\]|\\.)*"))/, /^(?:\])/, /^(?:\[\/video\])/, /^(?:(\[[0-9]{1,2}(:[0-5][0-9]){1,2}\])(\[)@)/, /^(?:(\[[0-9]{1,2}(:[0-5][0-9]){1,2}\]))/, /^(?:(\[[0-9]{1,2}(:[6-9][0-9]){1,2}\]))/, /^(?:([\t \r]+))/, /^(?:.)/, /^(?:(\[):)/, /^(?:(\[)@)/, /^(?:(\[)~)/, /^(?:(\[))/, /^(?:.)/, /^(?:([^\\\:\@\~\[\]\r\n\t ])+)/, /^(?:\\.)/, /^(?:[ \r\t]+:)/, /^(?:[ \r\t]+@)/, /^(?:[ \r\t]+~)/, /^(?:(\[):)/, /^(?:(\[)@)/, /^(?:(\[)~)/, /^(?:\])/, /^(?:(\[)ref\b)/, /^(?:(\[))/, /^(?:([\t \r]+))/, /^(?:.)/, /^(?:([0-9a-zA-Z][0-9a-zA-Z_]*))/, /^(?:("([^\n\"\\]|\\.)*"))/, /^(?:.)/, /^(?:\\)/, /^(?:\\#)/, /^(?:\])/, /^(?:[ ])/, /^(?:[ ]#[0-9]+)/, /^(?:.)/, /^(?:([\t \r]+))/, /^(?:site([\t \r]*)=)/, /^(?:page([\t \r]*)=)/, /^(?:url([\t \r]*)=)/, /^(?:title([\t \r]*)=)/, /^(?:article([\t \r]*)=)/, /^(?:author([\t \r]*)=)/, /^(?:editor([\t \r]*)=)/, /^(?:publisher([\t \r]*)=)/, /^(?:isbn([\t \r]*)=)/, /^(?:\])/, /^(?:.)/, /^(?:([\t \r]+))/, /^(?:([^\" \]\t\r\n][^ \]\t\r\n]*))/, /^(?:("([^\n\"\\]|\\.)*"))/, /^(?:\[\/video\])/, /^(?:([\t \r]+))/, /^(?:(\[)quote\b)/, /^(?:(\[):)/, /^(?:(\[)[0-9])/, /^(?:..)/, /^(?:.)/, /^(?:[^\]\n]+\])/, /^(?:([\t \r]+))/, /^(?:([\t \r]+))/, /^(?::([^\" \]\t\r\n][^ \]\t\r\n]*))/, /^(?::("([^\n\"\\]|\\.)*"))/, /^(?:\](\[))/, /^(?:\])/, /^(?:.)/, /^(?:([\t \r]+))/, /^(?:[0-9]+([\t \r]*)\])/, /^(?:([0-9a-zA-Z][0-9a-zA-Z_]*))/, /^(?:.)/],
conditions: {
"QUOTES": {
"rules": [0, 1, 88, 89, 90, 91],
"inclusive": true
},
"CATEGORIES": {
"rules": [0, 1, 82, 83, 84, 85, 86, 87],
"inclusive": true
},
"AUTHOR": {
"rules": [0, 1, 80, 81],
"inclusive": true
},
"AFTERTEXT": {
"rules": [0, 1, 73, 74, 75, 76, 77, 78, 79],
"inclusive": true
},
"R_ATTR": {
"rules": [0, 1, 70, 71, 72],
"inclusive": true
},
"REF": {
"rules": [0, 1, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69],
"inclusive": true
},
"MARKER_XTRA": {
"rules": [0, 1, 52, 53, 54, 55, 56, 57],
"inclusive": true
},
"MARKER": {
"rules": [0, 1, 49, 50, 51],
"inclusive": true
},
"TEXT": {
"rules": [0, 1, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48],
"inclusive": true
},
"TEXT_START": {
"rules": [0, 1, 31, 32, 33, 34, 35],
"inclusive": true
},
"ANNOTATION": {
"rules": [0, 1, 25, 26, 27, 28, 29, 30],
"inclusive": true
},
"V2_ATTR": {
"rules": [0, 1, 21, 22, 23, 24],
"inclusive": true
},
"V_ATTR": {
"rules": [0, 1, 17, 18, 19, 20],
"inclusive": true
},
"VIDEO": {
"rules": [0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
"inclusive": true
},
"INITIAL": {
"rules": [0, 1, 2, 3],
"inclusive": true
}
}
}