Make the bbcode parser much faster
This commit is contained in:
parent
059f407ee4
commit
a9d39cd969
|
@ -22,7 +22,7 @@ WebAssembly.instantiateStreaming(fetch('../parsing.wasm'), go.importObject)
|
||||||
});
|
});
|
||||||
|
|
||||||
const doPreview = () => {
|
const doPreview = () => {
|
||||||
if (!ready || !inputData) {
|
if (!ready || inputData === null) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Binary file not shown.
|
@ -23,7 +23,8 @@ import (
|
||||||
|
|
||||||
var BBCodePriority = 1 // TODO: This is maybe too high a priority?
|
var BBCodePriority = 1 // TODO: This is maybe too high a priority?
|
||||||
|
|
||||||
var reTag = regexp.MustCompile(`(?P<open>\[\s*(?P<opentagname>[a-zA-Z0-9]+))|(?P<close>\[\s*\/\s*(?P<closetagname>[a-zA-Z0-9]+)\s*\])`)
|
var reOpenTag = regexp.MustCompile(`^\[\s*(?P<name>[a-zA-Z0-9]+)`)
|
||||||
|
var reTag = regexp.MustCompile(`\[\s*(?P<opentagname>[a-zA-Z0-9]+)|\[\s*\/\s*(?P<closetagname>[a-zA-Z0-9]+)\s*\]`)
|
||||||
|
|
||||||
var previewBBCodeCompiler = bbcode.NewCompiler(false, false)
|
var previewBBCodeCompiler = bbcode.NewCompiler(false, false)
|
||||||
var realBBCodeCompiler = bbcode.NewCompiler(false, false)
|
var realBBCodeCompiler = bbcode.NewCompiler(false, false)
|
||||||
|
@ -245,38 +246,42 @@ func (s bbcodeParser) Parse(parent gast.Node, block text.Reader, pc parser.Conte
|
||||||
_, pos := block.Position()
|
_, pos := block.Position()
|
||||||
restOfSource := block.Source()[pos.Start:]
|
restOfSource := block.Source()[pos.Start:]
|
||||||
|
|
||||||
matches := reTag.FindAllSubmatchIndex(restOfSource, -1)
|
openMatch := reOpenTag.FindSubmatch(restOfSource)
|
||||||
if matches == nil {
|
if openMatch == nil {
|
||||||
// No tags anywhere
|
// not a bbcode tag
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
otIndex := reTag.SubexpIndex("opentagname")
|
otIndex := reTag.SubexpIndex("opentagname")
|
||||||
ctIndex := reTag.SubexpIndex("closetagname")
|
ctIndex := reTag.SubexpIndex("closetagname")
|
||||||
|
|
||||||
tagName := extractStringBySubmatchIndices(restOfSource, matches[0], otIndex)
|
tagName := string(openMatch[reOpenTag.SubexpIndex("name")])
|
||||||
if tagName == "" {
|
|
||||||
// Not an opening tag
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
depth := 0
|
depth := 0
|
||||||
endIndex := -1
|
endIndex := -1
|
||||||
for _, m := range matches {
|
|
||||||
if openName := extractStringBySubmatchIndices(restOfSource, m, otIndex); openName != "" {
|
searchStartIndex := 0
|
||||||
if openName == tagName {
|
|
||||||
depth++
|
for {
|
||||||
}
|
searchText := restOfSource[searchStartIndex:]
|
||||||
} else if closeName := extractStringBySubmatchIndices(restOfSource, m, ctIndex); closeName != "" {
|
|
||||||
if closeName == tagName {
|
match := reTag.FindSubmatchIndex(searchText)
|
||||||
depth--
|
if match == nil {
|
||||||
if depth == 0 {
|
// no more tags
|
||||||
// We have balanced out!
|
break
|
||||||
endIndex = m[1] // the end index of this closing tag (exclusive)
|
}
|
||||||
break
|
|
||||||
}
|
if openName := extractStringBySubmatchIndices(searchText, match, otIndex); openName == tagName {
|
||||||
|
depth++
|
||||||
|
} else if closeName := extractStringBySubmatchIndices(searchText, match, ctIndex); closeName == tagName {
|
||||||
|
depth--
|
||||||
|
if depth == 0 {
|
||||||
|
// We have balanced out!
|
||||||
|
endIndex = searchStartIndex + match[1] // the end index of this closing tag (exclusive)
|
||||||
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
searchStartIndex = searchStartIndex + match[1]
|
||||||
}
|
}
|
||||||
if endIndex < 0 {
|
if endIndex < 0 {
|
||||||
// Unbalanced, too many opening tags
|
// Unbalanced, too many opening tags
|
||||||
|
|
|
@ -63,6 +63,26 @@ func main() {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSharlock(t *testing.T) {
|
||||||
|
t.Skipf("This doesn't pass right now because parts of Sharlock's original source read as indented code blocks, or depend on different line break behavior.")
|
||||||
|
t.Run("sanity check", func(t *testing.T) {
|
||||||
|
result := ParsePostInput(sharlock, RealMarkdown)
|
||||||
|
|
||||||
|
for _, line := range strings.Split(result, "\n") {
|
||||||
|
assert.NotContains(t, line, "[b]")
|
||||||
|
assert.NotContains(t, line, "[/b]")
|
||||||
|
assert.NotContains(t, line, "[ul]")
|
||||||
|
assert.NotContains(t, line, "[/ul]")
|
||||||
|
assert.NotContains(t, line, "[li]")
|
||||||
|
assert.NotContains(t, line, "[/li]")
|
||||||
|
assert.NotContains(t, line, "[img]")
|
||||||
|
assert.NotContains(t, line, "[/img]")
|
||||||
|
assert.NotContains(t, line, "[code")
|
||||||
|
assert.NotContains(t, line, "[/code]")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func BenchmarkSharlock(b *testing.B) {
|
func BenchmarkSharlock(b *testing.B) {
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
ParsePostInput(sharlock, RealMarkdown)
|
ParsePostInput(sharlock, RealMarkdown)
|
||||||
|
|
Reference in New Issue