From 7d5590ee10be509db5a0287b20228dcab9c340ff Mon Sep 17 00:00:00 2001 From: Ben Visness Date: Mon, 23 Aug 2021 22:26:27 -0500 Subject: [PATCH] Process Discord markdown for snippets --- src/discord/markdown.go | 147 +++++++++++++++++++++++++++++++++++ src/discord/markdown_test.go | 38 +++++++++ src/discord/payloads.go | 14 ++++ src/discord/rest.go | 98 +++++++++++++++++++++++ src/discord/showcase.go | 10 +-- src/discord/todo.txt | 4 + src/models/discord.go | 2 +- 7 files changed, 307 insertions(+), 6 deletions(-) create mode 100644 src/discord/markdown.go create mode 100644 src/discord/markdown_test.go diff --git a/src/discord/markdown.go b/src/discord/markdown.go new file mode 100644 index 0000000..313bc7d --- /dev/null +++ b/src/discord/markdown.go @@ -0,0 +1,147 @@ +package discord + +import ( + "context" + "errors" + "fmt" + "regexp" + "strings" + "sync" + + "git.handmade.network/hmn/hmn/src/config" + "git.handmade.network/hmn/hmn/src/logging" +) + +var ( + REMarkdownUser = regexp.MustCompile(`<@([0-9]+)>`) + REMarkdownUserNickname = regexp.MustCompile(`<@!([0-9]+)>`) + REMarkdownChannel = regexp.MustCompile(`<#([0-9]+)>`) + REMarkdownRole = regexp.MustCompile(`<@&([0-9]+)>`) + REMarkdownCustomEmoji = regexp.MustCompile(``) // includes animated + REMarkdownTimestamp = regexp.MustCompile(``) +) + +func CleanUpMarkdown(ctx context.Context, original string) string { + userMatches := REMarkdownUser.FindAllStringSubmatch(original, -1) + userNicknameMatches := REMarkdownUserNickname.FindAllStringSubmatch(original, -1) + channelMatches := REMarkdownChannel.FindAllStringSubmatch(original, -1) + roleMatches := REMarkdownRole.FindAllStringSubmatch(original, -1) + customEmojiMatches := REMarkdownCustomEmoji.FindAllStringSubmatch(original, -1) + timestampMatches := REMarkdownTimestamp.FindAllStringSubmatch(original, -1) + + userIdsToFetch := map[string]struct{}{} + + for _, m := range userMatches { + userIdsToFetch[m[1]] = struct{}{} + } + for _, m := range userNicknameMatches { + userIdsToFetch[m[1]] = struct{}{} + } + + // do the requests, gathering the resulting data + userNames := map[string]string{} + userNicknames := map[string]string{} + channelNames := map[string]string{} + roleNames := map[string]string{} + var wg sync.WaitGroup + var mutex sync.Mutex + + for userId := range userIdsToFetch { + wg.Add(1) + go func(ctx context.Context, userId string) { + defer wg.Done() + member, err := GetGuildMember(ctx, config.Config.Discord.GuildID, userId) + if err != nil { + if errors.Is(err, NotFound) { + // not a problem + } else if err != nil { + logging.ExtractLogger(ctx).Warn().Err(err).Msg("failed to fetch guild member for markdown") + } + return + } + func() { + mutex.Lock() + defer mutex.Unlock() + if member.User != nil { + userNames[userId] = member.User.Username + } + if member.Nick != nil { + userNicknames[userId] = *member.Nick + } + }() + }(ctx, userId) + } + + if len(channelMatches) > 0 { + wg.Add(1) + go func(ctx context.Context) { + defer wg.Done() + channels, err := GetGuildChannels(ctx, config.Config.Discord.GuildID) + if err != nil { + logging.ExtractLogger(ctx).Warn().Err(err).Msg("failed to fetch channels for markdown") + return + } + for _, channel := range channels { + channelNames[channel.ID] = channel.Name + } + }(ctx) + } + + if len(roleMatches) > 0 { + wg.Add(1) + go func(ctx context.Context) { + defer wg.Done() + roles, err := GetGuildRoles(ctx, config.Config.Discord.GuildID) + if err != nil { + logging.ExtractLogger(ctx).Warn().Err(err).Msg("failed to fetch roles for markdown") + return + } + for _, role := range roles { + roleNames[role.ID] = role.Name + } + }(ctx) + } + + wg.Wait() + + // Replace all the everything + res := original + for _, m := range userMatches { + resultName := "Unknown User" + if name, ok := userNames[m[1]]; ok { + resultName = name + } + res = strings.Replace(res, m[0], fmt.Sprintf("@%s", resultName), 1) + } + for _, m := range userNicknameMatches { + resultName := "Unknown User" + if name, ok := userNicknames[m[1]]; ok { + resultName = name + } else if name, ok := userNames[m[1]]; ok { + resultName = name + } + res = strings.Replace(res, m[0], fmt.Sprintf("@%s", resultName), 1) + } + for _, m := range channelMatches { + resultName := "Unknown Channel" + if name, ok := channelNames[m[1]]; ok { + resultName = name + } + res = strings.Replace(res, m[0], fmt.Sprintf("#%s", resultName), 1) + } + for _, m := range roleMatches { + resultName := "Unknown Role" + if name, ok := roleNames[m[1]]; ok { + resultName = name + } + res = strings.Replace(res, m[0], fmt.Sprintf("@%s", resultName), 1) + } + for _, m := range customEmojiMatches { + res = strings.Replace(res, m[0], fmt.Sprintf(":%s:", m[1]), 1) + } + for _, m := range timestampMatches { + res = strings.Replace(res, m[0], "", 1) // TODO: Actual timestamp stuff? Is it worth it? + } + + return res +} diff --git a/src/discord/markdown_test.go b/src/discord/markdown_test.go new file mode 100644 index 0000000..6c55373 --- /dev/null +++ b/src/discord/markdown_test.go @@ -0,0 +1,38 @@ +package discord + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestCleanUpMarkdown(t *testing.T) { + t.Skip("Skipping these tests because they are server-specific and make network requests. Feel free to re-enable, but don't commit :)") + + const userBen = "<@!132715550571888640>" + const channelShowcaseTest = "<#759497527883202582>" + const roleHmnMember = "<@&876685379770646538>" + + t.Run("normal behavior", func(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + assert.Equal(t, "@Frogbot some stuff", CleanUpMarkdown(ctx, "<@!745051593728196732> some stuff")) + assert.Equal(t, + "users: @Unknown User @bvisness @bvisness, channels: #Unknown Channel #showcase-test #showcase-test, roles: @Unknown Role @HMN Member @HMN Member, :shakefist: also normal text", + CleanUpMarkdown(ctx, fmt.Sprintf("users: <@!000000> %s %s, channels: <#000000> %s %s, roles: <@&000000> %s %s, also normal text", userBen, userBen, channelShowcaseTest, channelShowcaseTest, roleHmnMember, roleHmnMember)), + ) + }) + t.Run("context cancellation", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + cancel() // immediately cancel + + assert.Equal(t, + "@Unknown User #Unknown Channel @Unknown Role", + CleanUpMarkdown(ctx, fmt.Sprintf("%s %s %s", userBen, channelShowcaseTest, roleHmnMember)), + ) + }) +} diff --git a/src/discord/payloads.go b/src/discord/payloads.go index 765608e..0b0f786 100644 --- a/src/discord/payloads.go +++ b/src/discord/payloads.go @@ -127,6 +127,13 @@ const ( ChannelTypeGuildStageVoice ChannelType = 13 ) +// https://discord.com/developers/docs/topics/permissions#role-object +type Role struct { + ID string `json:"id"` + Name string `json:"name"` + // more fields not yet present +} + // https://discord.com/developers/docs/resources/channel#channel-object type Channel struct { ID string `json:"id"` @@ -285,6 +292,13 @@ func UserFromMap(m interface{}) User { return u } +// https://discord.com/developers/docs/resources/guild#guild-member-object +type GuildMember struct { + User *User `json:"user"` + Nick *string `json:"nick"` + // more fields not yet handled here +} + // https://discord.com/developers/docs/resources/channel#attachment-object type Attachment struct { ID string `json:"id"` diff --git a/src/discord/rest.go b/src/discord/rest.go index e11dae5..8387e11 100644 --- a/src/discord/rest.go +++ b/src/discord/rest.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "encoding/json" + "errors" "fmt" "io" "net/http" @@ -26,6 +27,8 @@ const ( var UserAgent = fmt.Sprintf("%s (%s, %s)", BotName, UserAgentURL, UserAgentVersion) +var NotFound = errors.New("not found") + var httpClient = &http.Client{} func buildUrl(path string) string { @@ -83,6 +86,101 @@ func GetGatewayBot(ctx context.Context) (*GetGatewayBotResponse, error) { return &result, nil } +func GetGuildRoles(ctx context.Context, guildID string) ([]Role, error) { + const name = "Get Guild Roles" + + path := fmt.Sprintf("/guilds/%s/roles", guildID) + res, err := doWithRateLimiting(ctx, name, func(ctx context.Context) *http.Request { + return makeRequest(ctx, http.MethodGet, path, nil) + }) + if err != nil { + return nil, err + } + defer res.Body.Close() + + if res.StatusCode >= 400 { + logErrorResponse(ctx, name, res, "") + return nil, oops.New(nil, "received error from Discord") + } + + bodyBytes, err := io.ReadAll(res.Body) + if err != nil { + panic(err) + } + + var roles []Role + err = json.Unmarshal(bodyBytes, &roles) + if err != nil { + return nil, oops.New(err, "failed to unmarshal Discord message") + } + + return roles, nil +} + +func GetGuildChannels(ctx context.Context, guildID string) ([]Channel, error) { + const name = "Get Guild Channels" + + path := fmt.Sprintf("/guilds/%s/channels", guildID) + res, err := doWithRateLimiting(ctx, name, func(ctx context.Context) *http.Request { + return makeRequest(ctx, http.MethodGet, path, nil) + }) + if err != nil { + return nil, err + } + defer res.Body.Close() + + if res.StatusCode >= 400 { + logErrorResponse(ctx, name, res, "") + return nil, oops.New(nil, "received error from Discord") + } + + bodyBytes, err := io.ReadAll(res.Body) + if err != nil { + panic(err) + } + + var channels []Channel + err = json.Unmarshal(bodyBytes, &channels) + if err != nil { + return nil, oops.New(err, "failed to unmarshal Discord message") + } + + return channels, nil +} + +func GetGuildMember(ctx context.Context, guildID, userID string) (*GuildMember, error) { + const name = "Get Guild Member" + + path := fmt.Sprintf("/guilds/%s/members/%s", guildID, userID) + res, err := doWithRateLimiting(ctx, name, func(ctx context.Context) *http.Request { + return makeRequest(ctx, http.MethodGet, path, nil) + }) + if err != nil { + return nil, err + } + defer res.Body.Close() + + if res.StatusCode == http.StatusNotFound { + return nil, NotFound + } else if res.StatusCode >= 400 { + logErrorResponse(ctx, name, res, "") + return nil, oops.New(nil, "received error from Discord") + } + + bodyBytes, err := io.ReadAll(res.Body) + if err != nil { + panic(err) + } + + var msg GuildMember + err = json.Unmarshal(bodyBytes, &msg) + if err != nil { + return nil, oops.New(err, "failed to unmarshal Discord message") + } + + return &msg, nil +} + type CreateMessageRequest struct { Content string `json:"content"` } diff --git a/src/discord/showcase.go b/src/discord/showcase.go index d43359f..3f1f68a 100644 --- a/src/discord/showcase.go +++ b/src/discord/showcase.go @@ -16,6 +16,7 @@ import ( "git.handmade.network/hmn/hmn/src/logging" "git.handmade.network/hmn/hmn/src/models" "git.handmade.network/hmn/hmn/src/oops" + "git.handmade.network/hmn/hmn/src/parsing" "github.com/google/uuid" "github.com/jackc/pgx/v4" ) @@ -24,7 +25,6 @@ var reDiscordMessageLink = regexp.MustCompile(`https?://.+?(\s|$)`) var errNotEnoughInfo = errors.New("Discord didn't send enough info in this event for us to do this") -// TODO: Can this function be called asynchronously? func (bot *botInstance) processShowcaseMsg(ctx context.Context, msg *Message) error { switch msg.Type { case MessageTypeDefault, MessageTypeReply, MessageTypeApplicationCommand: @@ -224,7 +224,7 @@ func (bot *botInstance) saveMessageAndContents( `, newMsg.ID, discordUser.ID, - msg.Content, // TODO: Add a method that can fill in mentions and stuff (https://discord.com/developers/docs/reference#message-formatting) + CleanUpMarkdown(ctx, msg.Content), ) } @@ -547,8 +547,8 @@ func (bot *botInstance) createMessageSnippet(ctx context.Context, tx pgx.Tx, msg if existing.Snippet != nil { // A snippet already exists - maybe update its content, then return it if msg.OriginalHasFields("content") && !existing.Snippet.EditedOnWebsite { - contentMarkdown := msg.Content - contentHTML := contentMarkdown // TODO: Parse Markdown's HTML + contentMarkdown := existing.MessageContent.LastContent + contentHTML := parsing.ParseMarkdown(contentMarkdown, parsing.RealMarkdown) _, err := tx.Exec(ctx, ` @@ -587,7 +587,7 @@ func (bot *botInstance) createMessageSnippet(ctx context.Context, tx pgx.Tx, msg } contentMarkdown := existing.MessageContent.LastContent - contentHTML := contentMarkdown // TODO: Actually parse Discord's Markdown + contentHTML := parsing.ParseMarkdown(contentMarkdown, parsing.RealMarkdown) // TODO(db): Insert isnippet, err := db.QueryOne(ctx, tx, models.Snippet{}, diff --git a/src/discord/todo.txt b/src/discord/todo.txt index 2d683ac..ba71d42 100644 --- a/src/discord/todo.txt +++ b/src/discord/todo.txt @@ -41,3 +41,7 @@ background stuff: - look at every message ever in the channel - do exactly what the real-time bot does on new messages (although maybe don't do snippets depending on context) + +what the heck do we do with discord's markdown +- when we save message contents, we should save both the raw discord markdown and a version with their custom stuff replaced. We do _not_ (yet) need a full markdown parse with HTML tags and stuff. (That arguably doesn't make sense for the handmade_discordmessagecontent record anyway.) +- when we create a snippet, we should store both markdown that makes sense to a user and the rendered version of that HTML. THIS MEANS: The markdown we save is the "clean" version of the Discord markdown. diff --git a/src/models/discord.go b/src/models/discord.go index 4b2eeb7..33eda4a 100644 --- a/src/models/discord.go +++ b/src/models/discord.go @@ -41,7 +41,7 @@ account, regardless of whether we create snippets or not. */ type DiscordMessageContent struct { MessageID string `db:"message_id"` - LastContent string `db:"last_content"` + LastContent string `db:"last_content"` // This should always be cleaned up with nice user IDs and stuff DiscordID int `db:"discord_id"` }