Process Discord markdown for snippets

This commit is contained in:
Ben Visness 2021-08-23 22:26:27 -05:00
parent 69ead1f347
commit 7d5590ee10
7 changed files with 307 additions and 6 deletions

147
src/discord/markdown.go Normal file
View File

@ -0,0 +1,147 @@
package discord
import (
"context"
"errors"
"fmt"
"regexp"
"strings"
"sync"
"git.handmade.network/hmn/hmn/src/config"
"git.handmade.network/hmn/hmn/src/logging"
)
var (
REMarkdownUser = regexp.MustCompile(`<@([0-9]+)>`)
REMarkdownUserNickname = regexp.MustCompile(`<@!([0-9]+)>`)
REMarkdownChannel = regexp.MustCompile(`<#([0-9]+)>`)
REMarkdownRole = regexp.MustCompile(`<@&([0-9]+)>`)
REMarkdownCustomEmoji = regexp.MustCompile(`<a?:(\w+):[0-9]+>`) // includes animated
REMarkdownTimestamp = regexp.MustCompile(`<t:([0-9]+)(:([tTdDfFR]))?>`)
)
func CleanUpMarkdown(ctx context.Context, original string) string {
userMatches := REMarkdownUser.FindAllStringSubmatch(original, -1)
userNicknameMatches := REMarkdownUserNickname.FindAllStringSubmatch(original, -1)
channelMatches := REMarkdownChannel.FindAllStringSubmatch(original, -1)
roleMatches := REMarkdownRole.FindAllStringSubmatch(original, -1)
customEmojiMatches := REMarkdownCustomEmoji.FindAllStringSubmatch(original, -1)
timestampMatches := REMarkdownTimestamp.FindAllStringSubmatch(original, -1)
userIdsToFetch := map[string]struct{}{}
for _, m := range userMatches {
userIdsToFetch[m[1]] = struct{}{}
}
for _, m := range userNicknameMatches {
userIdsToFetch[m[1]] = struct{}{}
}
// do the requests, gathering the resulting data
userNames := map[string]string{}
userNicknames := map[string]string{}
channelNames := map[string]string{}
roleNames := map[string]string{}
var wg sync.WaitGroup
var mutex sync.Mutex
for userId := range userIdsToFetch {
wg.Add(1)
go func(ctx context.Context, userId string) {
defer wg.Done()
member, err := GetGuildMember(ctx, config.Config.Discord.GuildID, userId)
if err != nil {
if errors.Is(err, NotFound) {
// not a problem
} else if err != nil {
logging.ExtractLogger(ctx).Warn().Err(err).Msg("failed to fetch guild member for markdown")
}
return
}
func() {
mutex.Lock()
defer mutex.Unlock()
if member.User != nil {
userNames[userId] = member.User.Username
}
if member.Nick != nil {
userNicknames[userId] = *member.Nick
}
}()
}(ctx, userId)
}
if len(channelMatches) > 0 {
wg.Add(1)
go func(ctx context.Context) {
defer wg.Done()
channels, err := GetGuildChannels(ctx, config.Config.Discord.GuildID)
if err != nil {
logging.ExtractLogger(ctx).Warn().Err(err).Msg("failed to fetch channels for markdown")
return
}
for _, channel := range channels {
channelNames[channel.ID] = channel.Name
}
}(ctx)
}
if len(roleMatches) > 0 {
wg.Add(1)
go func(ctx context.Context) {
defer wg.Done()
roles, err := GetGuildRoles(ctx, config.Config.Discord.GuildID)
if err != nil {
logging.ExtractLogger(ctx).Warn().Err(err).Msg("failed to fetch roles for markdown")
return
}
for _, role := range roles {
roleNames[role.ID] = role.Name
}
}(ctx)
}
wg.Wait()
// Replace all the everything
res := original
for _, m := range userMatches {
resultName := "Unknown User"
if name, ok := userNames[m[1]]; ok {
resultName = name
}
res = strings.Replace(res, m[0], fmt.Sprintf("@%s", resultName), 1)
}
for _, m := range userNicknameMatches {
resultName := "Unknown User"
if name, ok := userNicknames[m[1]]; ok {
resultName = name
} else if name, ok := userNames[m[1]]; ok {
resultName = name
}
res = strings.Replace(res, m[0], fmt.Sprintf("@%s", resultName), 1)
}
for _, m := range channelMatches {
resultName := "Unknown Channel"
if name, ok := channelNames[m[1]]; ok {
resultName = name
}
res = strings.Replace(res, m[0], fmt.Sprintf("#%s", resultName), 1)
}
for _, m := range roleMatches {
resultName := "Unknown Role"
if name, ok := roleNames[m[1]]; ok {
resultName = name
}
res = strings.Replace(res, m[0], fmt.Sprintf("@%s", resultName), 1)
}
for _, m := range customEmojiMatches {
res = strings.Replace(res, m[0], fmt.Sprintf(":%s:", m[1]), 1)
}
for _, m := range timestampMatches {
res = strings.Replace(res, m[0], "<timestamp>", 1) // TODO: Actual timestamp stuff? Is it worth it?
}
return res
}

View File

@ -0,0 +1,38 @@
package discord
import (
"context"
"fmt"
"testing"
"time"
"github.com/stretchr/testify/assert"
)
func TestCleanUpMarkdown(t *testing.T) {
t.Skip("Skipping these tests because they are server-specific and make network requests. Feel free to re-enable, but don't commit :)")
const userBen = "<@!132715550571888640>"
const channelShowcaseTest = "<#759497527883202582>"
const roleHmnMember = "<@&876685379770646538>"
t.Run("normal behavior", func(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
assert.Equal(t, "@Frogbot some stuff", CleanUpMarkdown(ctx, "<@!745051593728196732> some stuff"))
assert.Equal(t,
"users: @Unknown User @bvisness @bvisness, channels: #Unknown Channel #showcase-test #showcase-test, roles: @Unknown Role @HMN Member @HMN Member, :shakefist: also normal text",
CleanUpMarkdown(ctx, fmt.Sprintf("users: <@!000000> %s %s, channels: <#000000> %s %s, roles: <@&000000> %s %s, <a:shakefist:798333915973943307> also normal text", userBen, userBen, channelShowcaseTest, channelShowcaseTest, roleHmnMember, roleHmnMember)),
)
})
t.Run("context cancellation", func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
cancel() // immediately cancel
assert.Equal(t,
"@Unknown User #Unknown Channel @Unknown Role",
CleanUpMarkdown(ctx, fmt.Sprintf("%s %s %s", userBen, channelShowcaseTest, roleHmnMember)),
)
})
}

View File

@ -127,6 +127,13 @@ const (
ChannelTypeGuildStageVoice ChannelType = 13
)
// https://discord.com/developers/docs/topics/permissions#role-object
type Role struct {
ID string `json:"id"`
Name string `json:"name"`
// more fields not yet present
}
// https://discord.com/developers/docs/resources/channel#channel-object
type Channel struct {
ID string `json:"id"`
@ -285,6 +292,13 @@ func UserFromMap(m interface{}) User {
return u
}
// https://discord.com/developers/docs/resources/guild#guild-member-object
type GuildMember struct {
User *User `json:"user"`
Nick *string `json:"nick"`
// more fields not yet handled here
}
// https://discord.com/developers/docs/resources/channel#attachment-object
type Attachment struct {
ID string `json:"id"`

View File

@ -4,6 +4,7 @@ import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
@ -26,6 +27,8 @@ const (
var UserAgent = fmt.Sprintf("%s (%s, %s)", BotName, UserAgentURL, UserAgentVersion)
var NotFound = errors.New("not found")
var httpClient = &http.Client{}
func buildUrl(path string) string {
@ -83,6 +86,101 @@ func GetGatewayBot(ctx context.Context) (*GetGatewayBotResponse, error) {
return &result, nil
}
func GetGuildRoles(ctx context.Context, guildID string) ([]Role, error) {
const name = "Get Guild Roles"
path := fmt.Sprintf("/guilds/%s/roles", guildID)
res, err := doWithRateLimiting(ctx, name, func(ctx context.Context) *http.Request {
return makeRequest(ctx, http.MethodGet, path, nil)
})
if err != nil {
return nil, err
}
defer res.Body.Close()
if res.StatusCode >= 400 {
logErrorResponse(ctx, name, res, "")
return nil, oops.New(nil, "received error from Discord")
}
bodyBytes, err := io.ReadAll(res.Body)
if err != nil {
panic(err)
}
var roles []Role
err = json.Unmarshal(bodyBytes, &roles)
if err != nil {
return nil, oops.New(err, "failed to unmarshal Discord message")
}
return roles, nil
}
func GetGuildChannels(ctx context.Context, guildID string) ([]Channel, error) {
const name = "Get Guild Channels"
path := fmt.Sprintf("/guilds/%s/channels", guildID)
res, err := doWithRateLimiting(ctx, name, func(ctx context.Context) *http.Request {
return makeRequest(ctx, http.MethodGet, path, nil)
})
if err != nil {
return nil, err
}
defer res.Body.Close()
if res.StatusCode >= 400 {
logErrorResponse(ctx, name, res, "")
return nil, oops.New(nil, "received error from Discord")
}
bodyBytes, err := io.ReadAll(res.Body)
if err != nil {
panic(err)
}
var channels []Channel
err = json.Unmarshal(bodyBytes, &channels)
if err != nil {
return nil, oops.New(err, "failed to unmarshal Discord message")
}
return channels, nil
}
func GetGuildMember(ctx context.Context, guildID, userID string) (*GuildMember, error) {
const name = "Get Guild Member"
path := fmt.Sprintf("/guilds/%s/members/%s", guildID, userID)
res, err := doWithRateLimiting(ctx, name, func(ctx context.Context) *http.Request {
return makeRequest(ctx, http.MethodGet, path, nil)
})
if err != nil {
return nil, err
}
defer res.Body.Close()
if res.StatusCode == http.StatusNotFound {
return nil, NotFound
} else if res.StatusCode >= 400 {
logErrorResponse(ctx, name, res, "")
return nil, oops.New(nil, "received error from Discord")
}
bodyBytes, err := io.ReadAll(res.Body)
if err != nil {
panic(err)
}
var msg GuildMember
err = json.Unmarshal(bodyBytes, &msg)
if err != nil {
return nil, oops.New(err, "failed to unmarshal Discord message")
}
return &msg, nil
}
type CreateMessageRequest struct {
Content string `json:"content"`
}

View File

@ -16,6 +16,7 @@ import (
"git.handmade.network/hmn/hmn/src/logging"
"git.handmade.network/hmn/hmn/src/models"
"git.handmade.network/hmn/hmn/src/oops"
"git.handmade.network/hmn/hmn/src/parsing"
"github.com/google/uuid"
"github.com/jackc/pgx/v4"
)
@ -24,7 +25,6 @@ var reDiscordMessageLink = regexp.MustCompile(`https?://.+?(\s|$)`)
var errNotEnoughInfo = errors.New("Discord didn't send enough info in this event for us to do this")
// TODO: Can this function be called asynchronously?
func (bot *botInstance) processShowcaseMsg(ctx context.Context, msg *Message) error {
switch msg.Type {
case MessageTypeDefault, MessageTypeReply, MessageTypeApplicationCommand:
@ -224,7 +224,7 @@ func (bot *botInstance) saveMessageAndContents(
`,
newMsg.ID,
discordUser.ID,
msg.Content, // TODO: Add a method that can fill in mentions and stuff (https://discord.com/developers/docs/reference#message-formatting)
CleanUpMarkdown(ctx, msg.Content),
)
}
@ -547,8 +547,8 @@ func (bot *botInstance) createMessageSnippet(ctx context.Context, tx pgx.Tx, msg
if existing.Snippet != nil {
// A snippet already exists - maybe update its content, then return it
if msg.OriginalHasFields("content") && !existing.Snippet.EditedOnWebsite {
contentMarkdown := msg.Content
contentHTML := contentMarkdown // TODO: Parse Markdown's HTML
contentMarkdown := existing.MessageContent.LastContent
contentHTML := parsing.ParseMarkdown(contentMarkdown, parsing.RealMarkdown)
_, err := tx.Exec(ctx,
`
@ -587,7 +587,7 @@ func (bot *botInstance) createMessageSnippet(ctx context.Context, tx pgx.Tx, msg
}
contentMarkdown := existing.MessageContent.LastContent
contentHTML := contentMarkdown // TODO: Actually parse Discord's Markdown
contentHTML := parsing.ParseMarkdown(contentMarkdown, parsing.RealMarkdown)
// TODO(db): Insert
isnippet, err := db.QueryOne(ctx, tx, models.Snippet{},

View File

@ -41,3 +41,7 @@ background stuff:
- look at every message ever in the channel
- do exactly what the real-time bot does on new messages (although maybe don't do snippets depending on context)
what the heck do we do with discord's markdown
- when we save message contents, we should save both the raw discord markdown and a version with their custom stuff replaced. We do _not_ (yet) need a full markdown parse with HTML tags and stuff. (That arguably doesn't make sense for the handmade_discordmessagecontent record anyway.)
- when we create a snippet, we should store both markdown that makes sense to a user and the rendered version of that HTML. THIS MEANS: The markdown we save is the "clean" version of the Discord markdown.

View File

@ -41,7 +41,7 @@ account, regardless of whether we create snippets or not.
*/
type DiscordMessageContent struct {
MessageID string `db:"message_id"`
LastContent string `db:"last_content"`
LastContent string `db:"last_content"` // This should always be cleaned up with nice user IDs and stuff
DiscordID int `db:"discord_id"`
}