From 6729e0b4c3364bc4cb6ecde7c7323682e4d4db70 Mon Sep 17 00:00:00 2001 From: Alex Baines Date: Mon, 19 Jun 2017 14:47:55 +0100 Subject: [PATCH] hmmlib-js: Add source files, fix a few bugs. The source files are only needed to build hmmlib.js, they needn't be included in other projects. Bugs fixed: * marker offset should be correct now. * after-text markers (-1 offset) now have type CATEGORY not HMML_CATEGORY which is consistent with the in-text ones. * markers with parameters have proper .text and the marker.parameter doesn't start with "undefined" any more. --- hmmlib-js/.gitignore | 1 + hmmlib-js/hmmlib.js | 31 +++++-- hmmlib-js/index.html | 2 +- hmmlib-js/src/header.js | 75 +++++++++++++++ hmmlib-js/src/makefile | 11 +++ hmmlib-js/src/source.l | 199 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 311 insertions(+), 8 deletions(-) create mode 100644 hmmlib-js/.gitignore create mode 100644 hmmlib-js/src/header.js create mode 100644 hmmlib-js/src/makefile create mode 100644 hmmlib-js/src/source.l diff --git a/hmmlib-js/.gitignore b/hmmlib-js/.gitignore new file mode 100644 index 0000000..4cd71b9 --- /dev/null +++ b/hmmlib-js/.gitignore @@ -0,0 +1 @@ +src/temp.js diff --git a/hmmlib-js/hmmlib.js b/hmmlib-js/hmmlib.js index 289d8c9..7d48834 100644 --- a/hmmlib-js/hmmlib.js +++ b/hmmlib-js/hmmlib.js @@ -339,6 +339,16 @@ HMMLexer.prototype = { } }, + // return next match that has a token + lex: function lex() { + var r = this.next(); + if (r) { + return r; + } else { + return this.lex(); + } + }, + // activates a new lexer condition state (pushes the new lexer condition state onto the condition stack) begin: function begin(condition) { this.conditionStack.push(condition); @@ -383,7 +393,7 @@ HMMLexer.prototype = { return this.conditionStack.length; }, options: { - "moduleName": "test" + "moduleName": "temp" }, performAction: function anonymous(yy, yy_, $avoiding_name_collisions, YY_START) { function ERR(yy, err) { @@ -419,12 +429,19 @@ HMMLexer.prototype = { function M_ADD(yy, t, n) { yy.an.markers[yy.an.markers.length - 1].marker = t.substr(0, n); - yy.an.markers[yy.an.markers.length - 1].offset = t.length; - yy.an.text += t.substr(0, n); + yy.an.markers[yy.an.markers.length - 1].offset = yy.an.text.length; + if (yy.mnext === "TEXT") { + yy.an.text += t.substr(0, n); + } } function MX_ADD(yy, c) { - yy.an.markers[yy.an.markers.length - 1].parameter += c; + var m = yy.an.markers[yy.an.markers.length - 1]; + if (m.parameter) { + m.parameter += c; + } else { + m.parameter = c; + } yy.an.text += c; } @@ -712,14 +729,14 @@ HMMLexer.prototype = { break; case 73: yy.an.markers.push({ - type: "HMML_CATEGORY", + type: "CATEGORY", marker: yy_.yytext.substr(1, yy_.yyleng - 1), offset: -1 }); break; case 74: yy.an.markers.push({ - type: "HMML_CATEGORY", + type: "CATEGORY", marker: UNQUOTE(yy, yy_.yytext.substr(2, yy_.yyleng - 3)), offset: -1 }); @@ -807,4 +824,4 @@ HMMLexer.prototype = { "inclusive": true } } -}; +} \ No newline at end of file diff --git a/hmmlib-js/index.html b/hmmlib-js/index.html index be9b79c..ee21881 100644 --- a/hmmlib-js/index.html +++ b/hmmlib-js/index.html @@ -12,7 +12,7 @@ var x = new XMLHttpRequest(); x.addEventListener("load", function(){ var state = HMML_parse(this.responseText); - console.log(state); + console.dir(state); }); x.open("GET", "test.hmml"); x.send(); diff --git a/hmmlib-js/src/header.js b/hmmlib-js/src/header.js new file mode 100644 index 0000000..7f427b9 --- /dev/null +++ b/hmmlib-js/src/header.js @@ -0,0 +1,75 @@ +function HMML_parse(contents) { + var l = new HMMLexer(); + var state = { + line: 0, + annos: [], + meta: { + member: "", + twitch: "", + project: "", + title: "", + platform: "", + id: "", + annotator: "" + }, + an: { + line: 0, + time: "", + text: "", + author: "", + references: [], + markers: [], + quote: { + id: 0, + author: "" + }, + is_quote: false + }, + ref: { + site: "", + page: "", + url: "", + title: "", + article: "", + author: "", + editor: "", + publisher: "", + isbn: "", + offset: 0 + }, + error: { + line: 0, + msg: "" + }, + attr: "", + mnext: 0, + first: true + }; + l.setInput(contents, state); + + try { + var r; + do { + r = l.next(); + } while (!r); + state.annos.forEach(function(a) { + if (!a.is_quote) { + delete a.quote; + } + if (a.author === "") delete a.author; + delete a.is_quote; + }); + return { + metadata: state.meta, + annotations: state.annos + }; + } catch (e) { + return { + error: state.error + }; + } +} + +function HMMLexer() {} +/* generated by jison-lex 0.3.4 */ +HMMLexer.prototype = { diff --git a/hmmlib-js/src/makefile b/hmmlib-js/src/makefile new file mode 100644 index 0000000..0044878 --- /dev/null +++ b/hmmlib-js/src/makefile @@ -0,0 +1,11 @@ +../hmmlib.js: header.js temp.js + tail -n +4 temp.js | head -n -2 | head -c -3 | cat header.js - > $@ + js-beautify -r $@ + +temp.js: source.l + jison-lex $< -o $@ + +clean: + $(RM) temp.js ../hmmlib.js + +.PHONY: clean diff --git a/hmmlib-js/src/source.l b/hmmlib-js/src/source.l new file mode 100644 index 0000000..7921b65 --- /dev/null +++ b/hmmlib-js/src/source.l @@ -0,0 +1,199 @@ + +%{ + function ERR(yy, err){ + console.log(err); + yy.error.line = yy.line; + yy.error.msg = err; + throw "tantrum"; + } + + function CHECKESCAPE(yy, str){ + if(!"[]:@~\\\"".find(str)){ + ERR(yy, "hmmlib: Unknown backslash escape code: %s", str); + } + } + + function UNQUOTE(yy, str){ + var i = 0; + var j = 0; + while((j = str.indexOf('\\', i)) != -1){ + CHECKESCAPE(yy, str.charAt(j)); + str = str.slice(i, j) + str.slice(j+1); + i = j+1; + } + return str; + } + + function M_(yy, str, s){ + yy.an.markers.push({ type: str }); + yy.mnext = s; + } + + function M_ADD(yy, t, n){ + yy.an.markers[yy.an.markers.length - 1].marker = t.substr(0, n); + yy.an.markers[yy.an.markers.length - 1].offset = yy.an.text.length; + if(yy.mnext === "TEXT"){ + yy.an.text += t.substr(0, n); + } + } + + function MX_ADD(yy, c){ + var m = yy.an.markers[yy.an.markers.length - 1]; + if(m.parameter){ + m.parameter += c; + } else { + m.parameter = c; + } + yy.an.text += c; + } + + function NEWANNO(yy){ + if(!yy.first) yy.annos.push(yy.an); + yy.an = { + line: yy.line, + time: "", + text: "", + author: "", + references: [], + markers: [], + quote: { + id: 0, + author: "" + }, + is_quote: false + }; + yy.an.line = yy.line; + yy.first = false; + } +%} + +%option reentrant +%option noyywrap + +S [\t \r]* +SP [\t \r]+ +ATTR_SIMPLE [^\" \]\t\r\n][^ \]\t\r\n]* +ATTR_ALNUM [0-9a-zA-Z][0-9a-zA-Z_]* +ATTR_QUOTED \"([^\n\"\\]|\\.)*\" +TAG_VIDEO_OPEN "[video" +TIMECODE \[[0-9]{1,2}(\:[0-5][0-9]){1,2}\] +BAD_TIMECODE \[[0-9]{1,2}(\:[6-9][0-9]){1,2}\] +TEXT_BREAK [^\\\:\@\~\[\]\r\n\t ] +LB \[ +RB \] + +%s VIDEO +%s V_ATTR +%s ANNOTATION +%s TEXT_START +%s TEXT +%s MARKER +%s MARKER_XTRA +%s REF +%s R_ATTR +%s AFTERTEXT +%s AUTHOR +%s CATEGORIES +%s QUOTES + +%% + +<> { ERR(yy, "Unexpected EOF, video close tag not found."); } +\r\n|\n { yy.line++; } + +{TAG_VIDEO_OPEN} { yy_.begin("VIDEO"); } +. { ERR(yy, "Missing video tag."); } + +