Make the bbcode parser much faster

2021-07-17 21:42:52 -05:00 · 2021-07-17 21:42:52 -05:00 · a9d39cd969
parent 059f407ee4
commit a9d39cd969
5 changed files with 49 additions and 24 deletions
--- a/public/js/editorpreviews.js
+++ b/public/js/editorpreviews.js
@ -22,7 +22,7 @@ WebAssembly.instantiateStreaming(fetch('../parsing.wasm'), go.importObject)
    });

 const doPreview = () => {
-    if (!ready || !inputData) {
+    if (!ready || inputData === null) {
        return;
    }

--- a/public/parsing.wasm
+++ b/public/parsing.wasm
--- a/src/parsing/bbcode.go
+++ b/src/parsing/bbcode.go
@ -23,7 +23,8 @@ import (

 var BBCodePriority = 1 // TODO: This is maybe too high a priority?

-var reTag = regexp.MustCompile(`(?P<open>\[\s*(?P<opentagname>[a-zA-Z0-9]+))|(?P<close>\[\s*\/\s*(?P<closetagname>[a-zA-Z0-9]+)\s*\])`)
+var reOpenTag = regexp.MustCompile(`^\[\s*(?P<name>[a-zA-Z0-9]+)`)
+var reTag = regexp.MustCompile(`\[\s*(?P<opentagname>[a-zA-Z0-9]+)|\[\s*\/\s*(?P<closetagname>[a-zA-Z0-9]+)\s*\]`)

 var previewBBCodeCompiler = bbcode.NewCompiler(false, false)
 var realBBCodeCompiler = bbcode.NewCompiler(false, false)
@ -245,38 +246,42 @@ func (s bbcodeParser) Parse(parent gast.Node, block text.Reader, pc parser.Conte
 	_, pos := block.Position()
 	restOfSource := block.Source()[pos.Start:]

-	matches := reTag.FindAllSubmatchIndex(restOfSource, -1)
-	if matches == nil {
-		// No tags anywhere
+	openMatch := reOpenTag.FindSubmatch(restOfSource)
+	if openMatch == nil {
+		// not a bbcode tag
 		return nil
 	}

 	otIndex := reTag.SubexpIndex("opentagname")
 	ctIndex := reTag.SubexpIndex("closetagname")

-	tagName := extractStringBySubmatchIndices(restOfSource, matches[0], otIndex)
-	if tagName == "" {
-		// Not an opening tag
-		return nil
-	}
-
+	tagName := string(openMatch[reOpenTag.SubexpIndex("name")])
 	depth := 0
 	endIndex := -1
-	for _, m := range matches {
-		if openName := extractStringBySubmatchIndices(restOfSource, m, otIndex); openName != "" {
-			if openName == tagName {
-				depth++
-			}
-		} else if closeName := extractStringBySubmatchIndices(restOfSource, m, ctIndex); closeName != "" {
-			if closeName == tagName {
-				depth--
-				if depth == 0 {
-					// We have balanced out!
-					endIndex = m[1] // the end index of this closing tag (exclusive)
-					break
-				}
+
+	searchStartIndex := 0
+
+	for {
+		searchText := restOfSource[searchStartIndex:]
+
+		match := reTag.FindSubmatchIndex(searchText)
+		if match == nil {
+			// no more tags
+			break
+		}
+
+		if openName := extractStringBySubmatchIndices(searchText, match, otIndex); openName == tagName {
+			depth++
+		} else if closeName := extractStringBySubmatchIndices(searchText, match, ctIndex); closeName == tagName {
+			depth--
+			if depth == 0 {
+				// We have balanced out!
+				endIndex = searchStartIndex + match[1] // the end index of this closing tag (exclusive)
+				break
 			}
 		}
+
+		searchStartIndex = searchStartIndex + match[1]
 	}
 	if endIndex < 0 {
 		// Unbalanced, too many opening tags
--- a/src/parsing/parsing_test.go
+++ b/src/parsing/parsing_test.go
@ -63,6 +63,26 @@ func main() {
 	})
 }

+func TestSharlock(t *testing.T) {
+	t.Skipf("This doesn't pass right now because parts of Sharlock's original source read as indented code blocks, or depend on different line break behavior.")
+	t.Run("sanity check", func(t *testing.T) {
+		result := ParsePostInput(sharlock, RealMarkdown)
+
+		for _, line := range strings.Split(result, "\n") {
+			assert.NotContains(t, line, "[b]")
+			assert.NotContains(t, line, "[/b]")
+			assert.NotContains(t, line, "[ul]")
+			assert.NotContains(t, line, "[/ul]")
+			assert.NotContains(t, line, "[li]")
+			assert.NotContains(t, line, "[/li]")
+			assert.NotContains(t, line, "[img]")
+			assert.NotContains(t, line, "[/img]")
+			assert.NotContains(t, line, "[code")
+			assert.NotContains(t, line, "[/code]")
+		}
+	})
+}
+
 func BenchmarkSharlock(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		ParsePostInput(sharlock, RealMarkdown)
--- a/src/parsing/wasm/build.sh
+++ b/src/parsing/wasm/build.sh