Make the bbcode parser much faster

2021-07-17 21:42:52 -05:00 · 2021-07-17 21:42:52 -05:00 · a9d39cd969
parent 059f407ee4
commit a9d39cd969
5 changed files with 49 additions and 24 deletions
--- a/public/js/editorpreviews.js
+++ b/public/js/editorpreviews.js
@ -22,7 +22,7 @@ WebAssembly.instantiateStreaming(fetch('../parsing.wasm'), go.importObject)
    });
 const doPreview = () => {
-    if (!ready || !inputData) {
+    if (!ready || inputData === null) {
        return;
    }
--- a/public/parsing.wasm
+++ b/public/parsing.wasm
--- a/src/parsing/bbcode.go
+++ b/src/parsing/bbcode.go
@ -23,7 +23,8 @@ import (
 var BBCodePriority = 1 // TODO: This is maybe too high a priority?
-var reTag = regexp.MustCompile(`(?P<open>\[\s*(?P<opentagname>[a-zA-Z0-9]+))|(?P<close>\[\s*\/\s*(?P<closetagname>[a-zA-Z0-9]+)\s*\])`)
+var reOpenTag = regexp.MustCompile(`^\[\s*(?P<name>[a-zA-Z0-9]+)`)
 var reTag = regexp.MustCompile(`\[\s*(?P<opentagname>[a-zA-Z0-9]+)|\[\s*\/\s*(?P<closetagname>[a-zA-Z0-9]+)\s*\]`)
 var previewBBCodeCompiler = bbcode.NewCompiler(false, false)
 var realBBCodeCompiler = bbcode.NewCompiler(false, false)
@ -245,38 +246,42 @@ func (s bbcodeParser) Parse(parent gast.Node, block text.Reader, pc parser.Conte
 	_, pos := block.Position()
 	restOfSource := block.Source()[pos.Start:]
-	matches := reTag.FindAllSubmatchIndex(restOfSource, -1)
+	openMatch := reOpenTag.FindSubmatch(restOfSource)
-	if matches == nil {
+	if openMatch == nil {
-		// No tags anywhere
+		// not a bbcode tag
 		return nil
 	}
 	otIndex := reTag.SubexpIndex("opentagname")
 	ctIndex := reTag.SubexpIndex("closetagname")
-	tagName := extractStringBySubmatchIndices(restOfSource, matches[0], otIndex)
+	tagName := string(openMatch[reOpenTag.SubexpIndex("name")])
 	if tagName == "" {
 		// Not an opening tag
 		return nil
 	}
 	depth := 0
 	endIndex := -1
-	for _, m := range matches {
+
-		if openName := extractStringBySubmatchIndices(restOfSource, m, otIndex); openName != "" {
+	searchStartIndex := 0
-			if openName == tagName {
+
-				depth++
+	for {
-			}
+		searchText := restOfSource[searchStartIndex:]
-		} else if closeName := extractStringBySubmatchIndices(restOfSource, m, ctIndex); closeName != "" {
+
-			if closeName == tagName {
+		match := reTag.FindSubmatchIndex(searchText)
-				depth--
+		if match == nil {
-				if depth == 0 {
+			// no more tags
-					// We have balanced out!
+			break
-					endIndex = m[1] // the end index of this closing tag (exclusive)
+		}
-					break
+
-				}
+		if openName := extractStringBySubmatchIndices(searchText, match, otIndex); openName == tagName {
 			depth++
 		} else if closeName := extractStringBySubmatchIndices(searchText, match, ctIndex); closeName == tagName {
 			depth--
 			if depth == 0 {
 				// We have balanced out!
 				endIndex = searchStartIndex + match[1] // the end index of this closing tag (exclusive)
 				break
 			}
 		}
 		searchStartIndex = searchStartIndex + match[1]
 	}
 	if endIndex < 0 {
 		// Unbalanced, too many opening tags
--- a/src/parsing/parsing_test.go
+++ b/src/parsing/parsing_test.go
@ -63,6 +63,26 @@ func main() {
 	})
 }
 func TestSharlock(t *testing.T) {
 	t.Skipf("This doesn't pass right now because parts of Sharlock's original source read as indented code blocks, or depend on different line break behavior.")
 	t.Run("sanity check", func(t *testing.T) {
 		result := ParsePostInput(sharlock, RealMarkdown)
 		for _, line := range strings.Split(result, "\n") {
 			assert.NotContains(t, line, "[b]")
 			assert.NotContains(t, line, "[/b]")
 			assert.NotContains(t, line, "[ul]")
 			assert.NotContains(t, line, "[/ul]")
 			assert.NotContains(t, line, "[li]")
 			assert.NotContains(t, line, "[/li]")
 			assert.NotContains(t, line, "[img]")
 			assert.NotContains(t, line, "[/img]")
 			assert.NotContains(t, line, "[code")
 			assert.NotContains(t, line, "[/code]")
 		}
 	})
 }
 func BenchmarkSharlock(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		ParsePostInput(sharlock, RealMarkdown)
--- a/src/parsing/wasm/build.sh
+++ b/src/parsing/wasm/build.sh