diff --git a/hmmlib-js/hmmlib.js b/hmmlib-js/hmmlib.js index 296ceaa..171b29a 100644 --- a/hmmlib-js/hmmlib.js +++ b/hmmlib-js/hmmlib.js @@ -3,19 +3,16 @@ function HMML_parse(contents) { var state = { line: 0, annos: [], - meta: {}, + meta: { + annotators: [], + guests: [], + co_hosts: [] + }, an: { line: 0, - time: "", text: "", - author: "", references: [], markers: [], - quote: { - id: 0, - author: "" - }, - is_quote: false }, ref: {}, first: true @@ -27,13 +24,6 @@ function HMML_parse(contents) { do { r = l.next(); } while (!r); - state.annos.forEach(function(a) { - if (!a.is_quote) { - delete a.quote; - } - if (a.author === "") delete a.author; - delete a.is_quote; - }); return { metadata: state.meta, annotations: state.annos @@ -425,16 +415,9 @@ HMMLexer.prototype = { if (!yy.first) yy.annos.push(yy.an); yy.an = { line: yy.line, - time: "", text: "", - author: "", references: [], markers: [], - quote: { - id: 0, - author: "" - }, - is_quote: false }; yy.an.line = yy.line; yy.first = false; @@ -485,16 +468,16 @@ HMMLexer.prototype = { yy_.begin("V_ATTR"); break; case 12: - yy.attr = "co_host" - yy_.begin("V_ATTR"); + yy.attr = "co_hosts" + yy_.begin("V2_ATTR"); break; case 13: - yy.attr = "guest" - yy_.begin("V_ATTR"); + yy.attr = "guests" + yy_.begin("V2_ATTR"); break; case 14: - yy.attr = "annotator"; - yy_.begin("V_ATTR"); + yy.attr = "annotators"; + yy_.begin("V2_ATTR"); break; case 15: yy_.begin("ANNOTATION"); @@ -518,121 +501,136 @@ HMMLexer.prototype = { yy_.begin("VIDEO"); break; case 21: + yy_.begin("VIDEO"); + break; + case 22: + yy.meta[yy.attr].push(yy_.yytext); + yy_.begin("VIDEO"); + break; + case 23: + yy.meta[yy.attr].push(UNQUOTE(yy, yy_.yytext.substr(1, yy_.yyleng - 2))); + yy_.begin("VIDEO"); + break; + case 24: + yy_.less(0); + yy_.begin("VIDEO"); + break; + case 25: NEWANNO(yy); return 1; break; - case 22: + case 26: NEWANNO(yy); yy.an.time = yy_.yytext.substr(1, yy_.yyleng - 4); yy_.begin("AUTHOR"); break; - case 23: + case 27: NEWANNO(yy); yy.an.time = yy_.yytext.substr(1, yy_.yyleng - 2); yy_.begin("TEXT_START"); break; - case 24: + case 28: ERR(yy, "Timecode '" + yy_.yytext + "' out of range."); break; - case 25: + case 29: break; - case 26: + case 30: ERR(yy, "Cannot parse annotation. Expected timecode."); break; - case 27: + case 31: M_(yy, "CATEGORY", "TEXT"); yy_.begin("MARKER"); break; - case 28: + case 32: M_(yy, "MEMBER", "TEXT"); yy_.begin("MARKER"); break; - case 29: + case 33: M_(yy, "PROJECT", "TEXT"); yy_.begin("MARKER"); break; - case 30: + case 34: yy_.less(0); yy_.begin("TEXT"); break; - case 31: + case 35: ERR(yy, "Unknown character '" + yy_.yytext + "' after timecode."); break; - case 32: + case 36: yy.an.text += yy_.yytext; break; - case 33: + case 37: CHECKESCAPE(yy, yy_.yytext.charAt(1)); yy.an.text += yy_.yytext.substr(1, yy_.yyleng - 1); break; - case 34: + case 38: yy.an.text += ' '; M_(yy, "CATEGORY", "TEXT"); yy_.begin("MARKER"); break; - case 35: + case 39: yy.an.text += ' '; M_(yy, "MEMBER", "TEXT"); yy_.begin("MARKER"); break; - case 36: + case 40: yy.an.text += ' '; M_(yy, "PROJECT", "TEXT"); yy_.begin("MARKER"); break; - case 37: + case 41: M_(yy, "CATEGORY", "MARKER_XTRA"); yy_.begin("MARKER"); break; - case 38: + case 42: M_(yy, "MEMBER", "MARKER_XTRA"); yy_.begin("MARKER"); break; - case 39: + case 43: M_(yy, "PROJECT", "MARKER_XTRA"); yy_.begin("MARKER"); break; - case 40: + case 44: yy_.begin("AFTERTEXT"); break; - case 41: + case 45: yy.ref.offset = yy.an.text.length; yy_.begin("REF"); break; - case 42: + case 46: break; - case 43: + case 47: yy.an.text += ' '; break; - case 44: + case 48: yy.an.text += yy_.yytext; break; - case 45: + case 49: M_ADD(yy, yy_.yytext); yy_.begin(yy.mnext); break; - case 46: + case 50: M_ADD(yy, yy_.yytext.substr(1, yy_.yyleng - 2)); yy_.begin(yy.mnext); break; - case 47: + case 51: ERR(yy, "Cannot parse Marker. Expected quoted or alphanumeric attribute."); break; - case 48: + case 52: MX_ADD(yy, ']'); break; - case 49: + case 53: MX_ADD(yy, '#'); break; - case 50: + case 54: yy_.begin("TEXT"); break; - case 51: + case 55: if (yy.an.markers[yy.an.markers.length - 1].parameter) { MX_ADD(yy, ' '); } break; - case 52: + case 56: var m = yy.an.markers[yy.an.markers.length - 1]; if (m.type == "PROJECT") { m.episode = yy_.yytext.substr(2); @@ -641,178 +639,184 @@ HMMLexer.prototype = { } break; - case 53: + case 57: MX_ADD(yy, yy_.yytext); break; - case 54: + case 58: break; - case 55: + case 59: yy.attr = "site"; yy_.begin("R_ATTR"); break; - case 56: + case 60: yy.attr = "page"; yy_.begin("R_ATTR"); break; - case 57: + case 61: yy.attr = "url"; yy_.begin("R_ATTR"); break; - case 58: + case 62: yy.attr = "title"; yy_.begin("R_ATTR"); break; - case 59: + case 63: yy.attr = "article"; yy_.begin("R_ATTR"); break; - case 60: + case 64: yy.attr = "author"; yy_.begin("R_ATTR"); break; - case 61: + case 65: yy.attr = "editor"; yy_.begin("R_ATTR"); break; - case 62: + case 66: yy.attr = "publisher"; yy_.begin("R_ATTR"); break; - case 63: + case 67: yy.attr = "isbn"; yy_.begin("R_ATTR"); break; - case 64: + case 68: yy.an.references.push(yy.ref); yy.ref = {}; yy_.begin("TEXT"); break; - case 65: - ERR(yy, "Unexpected item in ref: " + yy_.yytext); - break; - case 66: - break; - case 67: - yy.ref[yy.attr] = yy_.yytext; - yy_.begin("REF"); - break; - case 68: - yy.ref[yy.attr] = UNQUOTE(yy, yy_.yytext.substr(1, yy_.yyleng - 2)); - yy_.begin("REF"); - break; case 69: - NEWANNO(yy); - return 1; + ERR(yy, "Unexpected item in ref: " + yy_.yytext); break; case 70: break; case 71: - yy_.begin("QUOTES"); + yy.ref[yy.attr] = yy_.yytext; + yy_.begin("REF"); break; case 72: + yy.ref[yy.attr] = UNQUOTE(yy, yy_.yytext.substr(1, yy_.yyleng - 2)); + yy_.begin("REF"); + break; + case 73: + NEWANNO(yy); + return 1; + break; + case 74: + break; + case 75: + yy_.begin("QUOTES"); + break; + case 76: yy_.begin("CATEGORIES"); yy_.less(1); break; - case 73: + case 77: yy_.begin("ANNOTATION"); yy_.less(0); break; - case 74: + case 78: ERR(yy, "Unexpected thing after text node: " + yy_.yytext); break; - case 75: + case 79: ERR(yy, "Unexpected thing after text node: " + yy_.yytext); break; - case 76: + case 80: yy.an.author = yy_.yytext.substr(0, yy_.yyleng - 1); yy_.begin("TEXT_START"); break; - case 77: + case 81: break; - case 78: + case 82: break; - case 79: + case 83: yy.an.markers.push({ type: "CATEGORY", marker: yy_.yytext.substr(1, yy_.yyleng - 1), offset: -1 }); break; - case 80: + case 84: yy.an.markers.push({ type: "CATEGORY", marker: UNQUOTE(yy, yy_.yytext.substr(2, yy_.yyleng - 3)), offset: -1 }); break; - case 81: + case 85: yy_.begin("QUOTES"); break; - case 82: - yy_.begin("ANNOTATION"); - break; - case 83: - ERR(yy, "Unexpected character in category tag: " + yy_.yytext); - break; - case 84: - break; - case 85: - yy.an.is_quote = true; - yy.an.quote.id = parseInt(yy_.yytext); - yy_.begin("ANNOTATION"); - break; case 86: - yy.an.quote.author += yy_.yytext; + yy_.begin("ANNOTATION"); break; case 87: + ERR(yy, "Unexpected character in category tag: " + yy_.yytext); + break; + case 88: + break; + case 89: + yy.an.quote = { + author: "", + id: parseInt(yy_.yytext) + }; + yy_.begin("ANNOTATION"); + break; + case 90: + yy.an.quote.author += yy_.yytext; + break; + case 91: ERR(yy, "Unexpected character in quotes tag: " + yy_.yytext); break; } }, - rules: [/^(?:$)/, /^(?:\r\n|\n)/, /^(?:(\[video\b))/, /^(?:.)/, /^(?:([\t \r]+))/, /^(?:member([\t \r]*)=)/, /^(?:stream_platform([\t \r]*)=)/, /^(?:stream_username([\t \r]*)=)/, /^(?:project([\t \r]*)=)/, /^(?:title([\t \r]*)=)/, /^(?:vod_platform([\t \r]*)=)/, /^(?:id([\t \r]*)=)/, /^(?:co_host([\t \r]*)=)/, /^(?:guest([\t \r]*)=)/, /^(?:annotator([\t \r]*)=)/, /^(?:\])/, /^(?:.)/, /^(?:([\t \r]+))/, /^(?:([^\" \]\t\r\n][^ \]\t\r\n]*))/, /^(?:("([^\n\"\\]|\\.)*"))/, /^(?:\])/, /^(?:\[\/video\])/, /^(?:(\[[0-9]{1,2}(:[0-5][0-9]){1,2}\])(\[)@)/, /^(?:(\[[0-9]{1,2}(:[0-5][0-9]){1,2}\]))/, /^(?:(\[[0-9]{1,2}(:[6-9][0-9]){1,2}\]))/, /^(?:([\t \r]+))/, /^(?:.)/, /^(?:(\[):)/, /^(?:(\[)@)/, /^(?:(\[)~)/, /^(?:(\[))/, /^(?:.)/, /^(?:([^\\\:\@\~\[\]\r\n\t ])+)/, /^(?:\\.)/, /^(?:[ \r\t]+:)/, /^(?:[ \r\t]+@)/, /^(?:[ \r\t]+~)/, /^(?:(\[):)/, /^(?:(\[)@)/, /^(?:(\[)~)/, /^(?:\])/, /^(?:(\[)ref\b)/, /^(?:(\[))/, /^(?:([\t \r]+))/, /^(?:.)/, /^(?:([0-9a-zA-Z][0-9a-zA-Z_]*))/, /^(?:("([^\n\"\\]|\\.)*"))/, /^(?:.)/, /^(?:\\)/, /^(?:\\#)/, /^(?:\])/, /^(?:[ ])/, /^(?:[ ]#[0-9]+)/, /^(?:.)/, /^(?:([\t \r]+))/, /^(?:site([\t \r]*)=)/, /^(?:page([\t \r]*)=)/, /^(?:url([\t \r]*)=)/, /^(?:title([\t \r]*)=)/, /^(?:article([\t \r]*)=)/, /^(?:author([\t \r]*)=)/, /^(?:editor([\t \r]*)=)/, /^(?:publisher([\t \r]*)=)/, /^(?:isbn([\t \r]*)=)/, /^(?:\])/, /^(?:.)/, /^(?:([\t \r]+))/, /^(?:([^\" \]\t\r\n][^ \]\t\r\n]*))/, /^(?:("([^\n\"\\]|\\.)*"))/, /^(?:\[\/video\])/, /^(?:([\t \r]+))/, /^(?:(\[)quote\b)/, /^(?:(\[):)/, /^(?:(\[)[0-9])/, /^(?:..)/, /^(?:.)/, /^(?:[^\]\n]+\])/, /^(?:([\t \r]+))/, /^(?:([\t \r]+))/, /^(?::([^\" \]\t\r\n][^ \]\t\r\n]*))/, /^(?::("([^\n\"\\]|\\.)*"))/, /^(?:\](\[))/, /^(?:\])/, /^(?:.)/, /^(?:([\t \r]+))/, /^(?:[0-9]+([\t \r]*)\])/, /^(?:([0-9a-zA-Z][0-9a-zA-Z_]*))/, /^(?:.)/], + rules: [/^(?:$)/, /^(?:\r\n|\n)/, /^(?:(\[video\b))/, /^(?:.)/, /^(?:([\t \r]+))/, /^(?:member([\t \r]*)=)/, /^(?:stream_platform([\t \r]*)=)/, /^(?:stream_username([\t \r]*)=)/, /^(?:project([\t \r]*)=)/, /^(?:title([\t \r]*)=)/, /^(?:vod_platform([\t \r]*)=)/, /^(?:id([\t \r]*)=)/, /^(?:co-host([\t \r]*)=)/, /^(?:guest([\t \r]*)=)/, /^(?:annotator([\t \r]*)=)/, /^(?:\])/, /^(?:.)/, /^(?:([\t \r]+))/, /^(?:([^\" \]\t\r\n][^ \]\t\r\n]*))/, /^(?:("([^\n\"\\]|\\.)*"))/, /^(?:\])/, /^(?:([\t \r]+))/, /^(?:([^\" \]\t\r\n][^ \]\t\r\n]*))/, /^(?:("([^\n\"\\]|\\.)*"))/, /^(?:\])/, /^(?:\[\/video\])/, /^(?:(\[[0-9]{1,2}(:[0-5][0-9]){1,2}\])(\[)@)/, /^(?:(\[[0-9]{1,2}(:[0-5][0-9]){1,2}\]))/, /^(?:(\[[0-9]{1,2}(:[6-9][0-9]){1,2}\]))/, /^(?:([\t \r]+))/, /^(?:.)/, /^(?:(\[):)/, /^(?:(\[)@)/, /^(?:(\[)~)/, /^(?:(\[))/, /^(?:.)/, /^(?:([^\\\:\@\~\[\]\r\n\t ])+)/, /^(?:\\.)/, /^(?:[ \r\t]+:)/, /^(?:[ \r\t]+@)/, /^(?:[ \r\t]+~)/, /^(?:(\[):)/, /^(?:(\[)@)/, /^(?:(\[)~)/, /^(?:\])/, /^(?:(\[)ref\b)/, /^(?:(\[))/, /^(?:([\t \r]+))/, /^(?:.)/, /^(?:([0-9a-zA-Z][0-9a-zA-Z_]*))/, /^(?:("([^\n\"\\]|\\.)*"))/, /^(?:.)/, /^(?:\\)/, /^(?:\\#)/, /^(?:\])/, /^(?:[ ])/, /^(?:[ ]#[0-9]+)/, /^(?:.)/, /^(?:([\t \r]+))/, /^(?:site([\t \r]*)=)/, /^(?:page([\t \r]*)=)/, /^(?:url([\t \r]*)=)/, /^(?:title([\t \r]*)=)/, /^(?:article([\t \r]*)=)/, /^(?:author([\t \r]*)=)/, /^(?:editor([\t \r]*)=)/, /^(?:publisher([\t \r]*)=)/, /^(?:isbn([\t \r]*)=)/, /^(?:\])/, /^(?:.)/, /^(?:([\t \r]+))/, /^(?:([^\" \]\t\r\n][^ \]\t\r\n]*))/, /^(?:("([^\n\"\\]|\\.)*"))/, /^(?:\[\/video\])/, /^(?:([\t \r]+))/, /^(?:(\[)quote\b)/, /^(?:(\[):)/, /^(?:(\[)[0-9])/, /^(?:..)/, /^(?:.)/, /^(?:[^\]\n]+\])/, /^(?:([\t \r]+))/, /^(?:([\t \r]+))/, /^(?::([^\" \]\t\r\n][^ \]\t\r\n]*))/, /^(?::("([^\n\"\\]|\\.)*"))/, /^(?:\](\[))/, /^(?:\])/, /^(?:.)/, /^(?:([\t \r]+))/, /^(?:[0-9]+([\t \r]*)\])/, /^(?:([0-9a-zA-Z][0-9a-zA-Z_]*))/, /^(?:.)/], conditions: { "QUOTES": { - "rules": [0, 1, 84, 85, 86, 87], + "rules": [0, 1, 88, 89, 90, 91], "inclusive": true }, "CATEGORIES": { - "rules": [0, 1, 78, 79, 80, 81, 82, 83], + "rules": [0, 1, 82, 83, 84, 85, 86, 87], "inclusive": true }, "AUTHOR": { - "rules": [0, 1, 76, 77], + "rules": [0, 1, 80, 81], "inclusive": true }, "AFTERTEXT": { - "rules": [0, 1, 69, 70, 71, 72, 73, 74, 75], + "rules": [0, 1, 73, 74, 75, 76, 77, 78, 79], "inclusive": true }, "R_ATTR": { - "rules": [0, 1, 66, 67, 68], + "rules": [0, 1, 70, 71, 72], "inclusive": true }, "REF": { - "rules": [0, 1, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65], + "rules": [0, 1, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69], "inclusive": true }, "MARKER_XTRA": { - "rules": [0, 1, 48, 49, 50, 51, 52, 53], + "rules": [0, 1, 52, 53, 54, 55, 56, 57], "inclusive": true }, "MARKER": { - "rules": [0, 1, 45, 46, 47], + "rules": [0, 1, 49, 50, 51], "inclusive": true }, "TEXT": { - "rules": [0, 1, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], + "rules": [0, 1, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48], "inclusive": true }, "TEXT_START": { - "rules": [0, 1, 27, 28, 29, 30, 31], + "rules": [0, 1, 31, 32, 33, 34, 35], "inclusive": true }, "ANNOTATION": { - "rules": [0, 1, 21, 22, 23, 24, 25, 26], + "rules": [0, 1, 25, 26, 27, 28, 29, 30], + "inclusive": true + }, + "V2_ATTR": { + "rules": [0, 1, 21, 22, 23, 24], "inclusive": true }, "V_ATTR": { diff --git a/hmmlib-js/src/header.js b/hmmlib-js/src/header.js index 4476ec2..78acedd 100644 --- a/hmmlib-js/src/header.js +++ b/hmmlib-js/src/header.js @@ -3,19 +3,16 @@ function HMML_parse(contents) { var state = { line: 0, annos: [], - meta: {}, + meta: { + annotators: [], + guests: [], + co_hosts: [] + }, an: { line: 0, - time: "", text: "", - author: "", references: [], markers: [], - quote: { - id: 0, - author: "" - }, - is_quote: false }, ref: {}, first: true @@ -27,13 +24,6 @@ function HMML_parse(contents) { do { r = l.next(); } while (!r); - state.annos.forEach(function(a) { - if (!a.is_quote) { - delete a.quote; - } - if (a.author === "") delete a.author; - delete a.is_quote; - }); return { metadata: state.meta, annotations: state.annos diff --git a/hmmlib-js/src/source.l b/hmmlib-js/src/source.l index 10f4f06..a25e169 100644 --- a/hmmlib-js/src/source.l +++ b/hmmlib-js/src/source.l @@ -49,16 +49,9 @@ if(!yy.first) yy.annos.push(yy.an); yy.an = { line: yy.line, - time: "", text: "", - author: "", references: [], markers: [], - quote: { - id: 0, - author: "" - }, - is_quote: false }; yy.an.line = yy.line; yy.first = false; @@ -82,6 +75,7 @@ RB \] %s VIDEO %s V_ATTR +%s V2_ATTR %s ANNOTATION %s TEXT_START %s TEXT @@ -110,9 +104,9 @@ RB \]