Make the bbcode parser much faster

This commit is contained in:
Ben Visness 2021-07-17 21:42:52 -05:00
parent 059f407ee4
commit a9d39cd969
5 changed files with 49 additions and 24 deletions

View File

@ -22,7 +22,7 @@ WebAssembly.instantiateStreaming(fetch('../parsing.wasm'), go.importObject)
}); });
const doPreview = () => { const doPreview = () => {
if (!ready || !inputData) { if (!ready || inputData === null) {
return; return;
} }

BIN
public/parsing.wasm Normal file → Executable file

Binary file not shown.

View File

@ -23,7 +23,8 @@ import (
var BBCodePriority = 1 // TODO: This is maybe too high a priority? var BBCodePriority = 1 // TODO: This is maybe too high a priority?
var reTag = regexp.MustCompile(`(?P<open>\[\s*(?P<opentagname>[a-zA-Z0-9]+))|(?P<close>\[\s*\/\s*(?P<closetagname>[a-zA-Z0-9]+)\s*\])`) var reOpenTag = regexp.MustCompile(`^\[\s*(?P<name>[a-zA-Z0-9]+)`)
var reTag = regexp.MustCompile(`\[\s*(?P<opentagname>[a-zA-Z0-9]+)|\[\s*\/\s*(?P<closetagname>[a-zA-Z0-9]+)\s*\]`)
var previewBBCodeCompiler = bbcode.NewCompiler(false, false) var previewBBCodeCompiler = bbcode.NewCompiler(false, false)
var realBBCodeCompiler = bbcode.NewCompiler(false, false) var realBBCodeCompiler = bbcode.NewCompiler(false, false)
@ -245,38 +246,42 @@ func (s bbcodeParser) Parse(parent gast.Node, block text.Reader, pc parser.Conte
_, pos := block.Position() _, pos := block.Position()
restOfSource := block.Source()[pos.Start:] restOfSource := block.Source()[pos.Start:]
matches := reTag.FindAllSubmatchIndex(restOfSource, -1) openMatch := reOpenTag.FindSubmatch(restOfSource)
if matches == nil { if openMatch == nil {
// No tags anywhere // not a bbcode tag
return nil return nil
} }
otIndex := reTag.SubexpIndex("opentagname") otIndex := reTag.SubexpIndex("opentagname")
ctIndex := reTag.SubexpIndex("closetagname") ctIndex := reTag.SubexpIndex("closetagname")
tagName := extractStringBySubmatchIndices(restOfSource, matches[0], otIndex) tagName := string(openMatch[reOpenTag.SubexpIndex("name")])
if tagName == "" {
// Not an opening tag
return nil
}
depth := 0 depth := 0
endIndex := -1 endIndex := -1
for _, m := range matches {
if openName := extractStringBySubmatchIndices(restOfSource, m, otIndex); openName != "" { searchStartIndex := 0
if openName == tagName {
depth++ for {
} searchText := restOfSource[searchStartIndex:]
} else if closeName := extractStringBySubmatchIndices(restOfSource, m, ctIndex); closeName != "" {
if closeName == tagName { match := reTag.FindSubmatchIndex(searchText)
depth-- if match == nil {
if depth == 0 { // no more tags
// We have balanced out! break
endIndex = m[1] // the end index of this closing tag (exclusive) }
break
} if openName := extractStringBySubmatchIndices(searchText, match, otIndex); openName == tagName {
depth++
} else if closeName := extractStringBySubmatchIndices(searchText, match, ctIndex); closeName == tagName {
depth--
if depth == 0 {
// We have balanced out!
endIndex = searchStartIndex + match[1] // the end index of this closing tag (exclusive)
break
} }
} }
searchStartIndex = searchStartIndex + match[1]
} }
if endIndex < 0 { if endIndex < 0 {
// Unbalanced, too many opening tags // Unbalanced, too many opening tags

View File

@ -63,6 +63,26 @@ func main() {
}) })
} }
func TestSharlock(t *testing.T) {
t.Skipf("This doesn't pass right now because parts of Sharlock's original source read as indented code blocks, or depend on different line break behavior.")
t.Run("sanity check", func(t *testing.T) {
result := ParsePostInput(sharlock, RealMarkdown)
for _, line := range strings.Split(result, "\n") {
assert.NotContains(t, line, "[b]")
assert.NotContains(t, line, "[/b]")
assert.NotContains(t, line, "[ul]")
assert.NotContains(t, line, "[/ul]")
assert.NotContains(t, line, "[li]")
assert.NotContains(t, line, "[/li]")
assert.NotContains(t, line, "[img]")
assert.NotContains(t, line, "[/img]")
assert.NotContains(t, line, "[code")
assert.NotContains(t, line, "[/code]")
}
})
}
func BenchmarkSharlock(b *testing.B) { func BenchmarkSharlock(b *testing.B) {
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
ParsePostInput(sharlock, RealMarkdown) ParsePostInput(sharlock, RealMarkdown)

0
src/parsing/wasm/build.sh Normal file → Executable file
View File