Process Discord markdown for snippets
This commit is contained in:
parent
69ead1f347
commit
7d5590ee10
|
@ -0,0 +1,147 @@
|
|||
package discord
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"git.handmade.network/hmn/hmn/src/config"
|
||||
"git.handmade.network/hmn/hmn/src/logging"
|
||||
)
|
||||
|
||||
var (
|
||||
REMarkdownUser = regexp.MustCompile(`<@([0-9]+)>`)
|
||||
REMarkdownUserNickname = regexp.MustCompile(`<@!([0-9]+)>`)
|
||||
REMarkdownChannel = regexp.MustCompile(`<#([0-9]+)>`)
|
||||
REMarkdownRole = regexp.MustCompile(`<@&([0-9]+)>`)
|
||||
REMarkdownCustomEmoji = regexp.MustCompile(`<a?:(\w+):[0-9]+>`) // includes animated
|
||||
REMarkdownTimestamp = regexp.MustCompile(`<t:([0-9]+)(:([tTdDfFR]))?>`)
|
||||
)
|
||||
|
||||
func CleanUpMarkdown(ctx context.Context, original string) string {
|
||||
userMatches := REMarkdownUser.FindAllStringSubmatch(original, -1)
|
||||
userNicknameMatches := REMarkdownUserNickname.FindAllStringSubmatch(original, -1)
|
||||
channelMatches := REMarkdownChannel.FindAllStringSubmatch(original, -1)
|
||||
roleMatches := REMarkdownRole.FindAllStringSubmatch(original, -1)
|
||||
customEmojiMatches := REMarkdownCustomEmoji.FindAllStringSubmatch(original, -1)
|
||||
timestampMatches := REMarkdownTimestamp.FindAllStringSubmatch(original, -1)
|
||||
|
||||
userIdsToFetch := map[string]struct{}{}
|
||||
|
||||
for _, m := range userMatches {
|
||||
userIdsToFetch[m[1]] = struct{}{}
|
||||
}
|
||||
for _, m := range userNicknameMatches {
|
||||
userIdsToFetch[m[1]] = struct{}{}
|
||||
}
|
||||
|
||||
// do the requests, gathering the resulting data
|
||||
userNames := map[string]string{}
|
||||
userNicknames := map[string]string{}
|
||||
channelNames := map[string]string{}
|
||||
roleNames := map[string]string{}
|
||||
var wg sync.WaitGroup
|
||||
var mutex sync.Mutex
|
||||
|
||||
for userId := range userIdsToFetch {
|
||||
wg.Add(1)
|
||||
go func(ctx context.Context, userId string) {
|
||||
defer wg.Done()
|
||||
member, err := GetGuildMember(ctx, config.Config.Discord.GuildID, userId)
|
||||
if err != nil {
|
||||
if errors.Is(err, NotFound) {
|
||||
// not a problem
|
||||
} else if err != nil {
|
||||
logging.ExtractLogger(ctx).Warn().Err(err).Msg("failed to fetch guild member for markdown")
|
||||
}
|
||||
return
|
||||
}
|
||||
func() {
|
||||
mutex.Lock()
|
||||
defer mutex.Unlock()
|
||||
if member.User != nil {
|
||||
userNames[userId] = member.User.Username
|
||||
}
|
||||
if member.Nick != nil {
|
||||
userNicknames[userId] = *member.Nick
|
||||
}
|
||||
}()
|
||||
}(ctx, userId)
|
||||
}
|
||||
|
||||
if len(channelMatches) > 0 {
|
||||
wg.Add(1)
|
||||
go func(ctx context.Context) {
|
||||
defer wg.Done()
|
||||
channels, err := GetGuildChannels(ctx, config.Config.Discord.GuildID)
|
||||
if err != nil {
|
||||
logging.ExtractLogger(ctx).Warn().Err(err).Msg("failed to fetch channels for markdown")
|
||||
return
|
||||
}
|
||||
for _, channel := range channels {
|
||||
channelNames[channel.ID] = channel.Name
|
||||
}
|
||||
}(ctx)
|
||||
}
|
||||
|
||||
if len(roleMatches) > 0 {
|
||||
wg.Add(1)
|
||||
go func(ctx context.Context) {
|
||||
defer wg.Done()
|
||||
roles, err := GetGuildRoles(ctx, config.Config.Discord.GuildID)
|
||||
if err != nil {
|
||||
logging.ExtractLogger(ctx).Warn().Err(err).Msg("failed to fetch roles for markdown")
|
||||
return
|
||||
}
|
||||
for _, role := range roles {
|
||||
roleNames[role.ID] = role.Name
|
||||
}
|
||||
}(ctx)
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
|
||||
// Replace all the everything
|
||||
res := original
|
||||
for _, m := range userMatches {
|
||||
resultName := "Unknown User"
|
||||
if name, ok := userNames[m[1]]; ok {
|
||||
resultName = name
|
||||
}
|
||||
res = strings.Replace(res, m[0], fmt.Sprintf("@%s", resultName), 1)
|
||||
}
|
||||
for _, m := range userNicknameMatches {
|
||||
resultName := "Unknown User"
|
||||
if name, ok := userNicknames[m[1]]; ok {
|
||||
resultName = name
|
||||
} else if name, ok := userNames[m[1]]; ok {
|
||||
resultName = name
|
||||
}
|
||||
res = strings.Replace(res, m[0], fmt.Sprintf("@%s", resultName), 1)
|
||||
}
|
||||
for _, m := range channelMatches {
|
||||
resultName := "Unknown Channel"
|
||||
if name, ok := channelNames[m[1]]; ok {
|
||||
resultName = name
|
||||
}
|
||||
res = strings.Replace(res, m[0], fmt.Sprintf("#%s", resultName), 1)
|
||||
}
|
||||
for _, m := range roleMatches {
|
||||
resultName := "Unknown Role"
|
||||
if name, ok := roleNames[m[1]]; ok {
|
||||
resultName = name
|
||||
}
|
||||
res = strings.Replace(res, m[0], fmt.Sprintf("@%s", resultName), 1)
|
||||
}
|
||||
for _, m := range customEmojiMatches {
|
||||
res = strings.Replace(res, m[0], fmt.Sprintf(":%s:", m[1]), 1)
|
||||
}
|
||||
for _, m := range timestampMatches {
|
||||
res = strings.Replace(res, m[0], "<timestamp>", 1) // TODO: Actual timestamp stuff? Is it worth it?
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
package discord
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestCleanUpMarkdown(t *testing.T) {
|
||||
t.Skip("Skipping these tests because they are server-specific and make network requests. Feel free to re-enable, but don't commit :)")
|
||||
|
||||
const userBen = "<@!132715550571888640>"
|
||||
const channelShowcaseTest = "<#759497527883202582>"
|
||||
const roleHmnMember = "<@&876685379770646538>"
|
||||
|
||||
t.Run("normal behavior", func(t *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
assert.Equal(t, "@Frogbot some stuff", CleanUpMarkdown(ctx, "<@!745051593728196732> some stuff"))
|
||||
assert.Equal(t,
|
||||
"users: @Unknown User @bvisness @bvisness, channels: #Unknown Channel #showcase-test #showcase-test, roles: @Unknown Role @HMN Member @HMN Member, :shakefist: also normal text",
|
||||
CleanUpMarkdown(ctx, fmt.Sprintf("users: <@!000000> %s %s, channels: <#000000> %s %s, roles: <@&000000> %s %s, <a:shakefist:798333915973943307> also normal text", userBen, userBen, channelShowcaseTest, channelShowcaseTest, roleHmnMember, roleHmnMember)),
|
||||
)
|
||||
})
|
||||
t.Run("context cancellation", func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel() // immediately cancel
|
||||
|
||||
assert.Equal(t,
|
||||
"@Unknown User #Unknown Channel @Unknown Role",
|
||||
CleanUpMarkdown(ctx, fmt.Sprintf("%s %s %s", userBen, channelShowcaseTest, roleHmnMember)),
|
||||
)
|
||||
})
|
||||
}
|
|
@ -127,6 +127,13 @@ const (
|
|||
ChannelTypeGuildStageVoice ChannelType = 13
|
||||
)
|
||||
|
||||
// https://discord.com/developers/docs/topics/permissions#role-object
|
||||
type Role struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
// more fields not yet present
|
||||
}
|
||||
|
||||
// https://discord.com/developers/docs/resources/channel#channel-object
|
||||
type Channel struct {
|
||||
ID string `json:"id"`
|
||||
|
@ -285,6 +292,13 @@ func UserFromMap(m interface{}) User {
|
|||
return u
|
||||
}
|
||||
|
||||
// https://discord.com/developers/docs/resources/guild#guild-member-object
|
||||
type GuildMember struct {
|
||||
User *User `json:"user"`
|
||||
Nick *string `json:"nick"`
|
||||
// more fields not yet handled here
|
||||
}
|
||||
|
||||
// https://discord.com/developers/docs/resources/channel#attachment-object
|
||||
type Attachment struct {
|
||||
ID string `json:"id"`
|
||||
|
|
|
@ -4,6 +4,7 @@ import (
|
|||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
|
@ -26,6 +27,8 @@ const (
|
|||
|
||||
var UserAgent = fmt.Sprintf("%s (%s, %s)", BotName, UserAgentURL, UserAgentVersion)
|
||||
|
||||
var NotFound = errors.New("not found")
|
||||
|
||||
var httpClient = &http.Client{}
|
||||
|
||||
func buildUrl(path string) string {
|
||||
|
@ -83,6 +86,101 @@ func GetGatewayBot(ctx context.Context) (*GetGatewayBotResponse, error) {
|
|||
return &result, nil
|
||||
}
|
||||
|
||||
func GetGuildRoles(ctx context.Context, guildID string) ([]Role, error) {
|
||||
const name = "Get Guild Roles"
|
||||
|
||||
path := fmt.Sprintf("/guilds/%s/roles", guildID)
|
||||
res, err := doWithRateLimiting(ctx, name, func(ctx context.Context) *http.Request {
|
||||
return makeRequest(ctx, http.MethodGet, path, nil)
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
if res.StatusCode >= 400 {
|
||||
logErrorResponse(ctx, name, res, "")
|
||||
return nil, oops.New(nil, "received error from Discord")
|
||||
}
|
||||
|
||||
bodyBytes, err := io.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
var roles []Role
|
||||
err = json.Unmarshal(bodyBytes, &roles)
|
||||
if err != nil {
|
||||
return nil, oops.New(err, "failed to unmarshal Discord message")
|
||||
}
|
||||
|
||||
return roles, nil
|
||||
}
|
||||
|
||||
func GetGuildChannels(ctx context.Context, guildID string) ([]Channel, error) {
|
||||
const name = "Get Guild Channels"
|
||||
|
||||
path := fmt.Sprintf("/guilds/%s/channels", guildID)
|
||||
res, err := doWithRateLimiting(ctx, name, func(ctx context.Context) *http.Request {
|
||||
return makeRequest(ctx, http.MethodGet, path, nil)
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
if res.StatusCode >= 400 {
|
||||
logErrorResponse(ctx, name, res, "")
|
||||
return nil, oops.New(nil, "received error from Discord")
|
||||
}
|
||||
|
||||
bodyBytes, err := io.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
var channels []Channel
|
||||
err = json.Unmarshal(bodyBytes, &channels)
|
||||
if err != nil {
|
||||
return nil, oops.New(err, "failed to unmarshal Discord message")
|
||||
}
|
||||
|
||||
return channels, nil
|
||||
}
|
||||
|
||||
func GetGuildMember(ctx context.Context, guildID, userID string) (*GuildMember, error) {
|
||||
const name = "Get Guild Member"
|
||||
|
||||
path := fmt.Sprintf("/guilds/%s/members/%s", guildID, userID)
|
||||
res, err := doWithRateLimiting(ctx, name, func(ctx context.Context) *http.Request {
|
||||
return makeRequest(ctx, http.MethodGet, path, nil)
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
if res.StatusCode == http.StatusNotFound {
|
||||
return nil, NotFound
|
||||
} else if res.StatusCode >= 400 {
|
||||
logErrorResponse(ctx, name, res, "")
|
||||
return nil, oops.New(nil, "received error from Discord")
|
||||
}
|
||||
|
||||
bodyBytes, err := io.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
var msg GuildMember
|
||||
err = json.Unmarshal(bodyBytes, &msg)
|
||||
if err != nil {
|
||||
return nil, oops.New(err, "failed to unmarshal Discord message")
|
||||
}
|
||||
|
||||
return &msg, nil
|
||||
}
|
||||
|
||||
type CreateMessageRequest struct {
|
||||
Content string `json:"content"`
|
||||
}
|
||||
|
|
|
@ -16,6 +16,7 @@ import (
|
|||
"git.handmade.network/hmn/hmn/src/logging"
|
||||
"git.handmade.network/hmn/hmn/src/models"
|
||||
"git.handmade.network/hmn/hmn/src/oops"
|
||||
"git.handmade.network/hmn/hmn/src/parsing"
|
||||
"github.com/google/uuid"
|
||||
"github.com/jackc/pgx/v4"
|
||||
)
|
||||
|
@ -24,7 +25,6 @@ var reDiscordMessageLink = regexp.MustCompile(`https?://.+?(\s|$)`)
|
|||
|
||||
var errNotEnoughInfo = errors.New("Discord didn't send enough info in this event for us to do this")
|
||||
|
||||
// TODO: Can this function be called asynchronously?
|
||||
func (bot *botInstance) processShowcaseMsg(ctx context.Context, msg *Message) error {
|
||||
switch msg.Type {
|
||||
case MessageTypeDefault, MessageTypeReply, MessageTypeApplicationCommand:
|
||||
|
@ -224,7 +224,7 @@ func (bot *botInstance) saveMessageAndContents(
|
|||
`,
|
||||
newMsg.ID,
|
||||
discordUser.ID,
|
||||
msg.Content, // TODO: Add a method that can fill in mentions and stuff (https://discord.com/developers/docs/reference#message-formatting)
|
||||
CleanUpMarkdown(ctx, msg.Content),
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -547,8 +547,8 @@ func (bot *botInstance) createMessageSnippet(ctx context.Context, tx pgx.Tx, msg
|
|||
if existing.Snippet != nil {
|
||||
// A snippet already exists - maybe update its content, then return it
|
||||
if msg.OriginalHasFields("content") && !existing.Snippet.EditedOnWebsite {
|
||||
contentMarkdown := msg.Content
|
||||
contentHTML := contentMarkdown // TODO: Parse Markdown's HTML
|
||||
contentMarkdown := existing.MessageContent.LastContent
|
||||
contentHTML := parsing.ParseMarkdown(contentMarkdown, parsing.RealMarkdown)
|
||||
|
||||
_, err := tx.Exec(ctx,
|
||||
`
|
||||
|
@ -587,7 +587,7 @@ func (bot *botInstance) createMessageSnippet(ctx context.Context, tx pgx.Tx, msg
|
|||
}
|
||||
|
||||
contentMarkdown := existing.MessageContent.LastContent
|
||||
contentHTML := contentMarkdown // TODO: Actually parse Discord's Markdown
|
||||
contentHTML := parsing.ParseMarkdown(contentMarkdown, parsing.RealMarkdown)
|
||||
|
||||
// TODO(db): Insert
|
||||
isnippet, err := db.QueryOne(ctx, tx, models.Snippet{},
|
||||
|
|
|
@ -41,3 +41,7 @@ background stuff:
|
|||
- look at every message ever in the channel
|
||||
- do exactly what the real-time bot does on new messages (although maybe don't do snippets depending on context)
|
||||
|
||||
|
||||
what the heck do we do with discord's markdown
|
||||
- when we save message contents, we should save both the raw discord markdown and a version with their custom stuff replaced. We do _not_ (yet) need a full markdown parse with HTML tags and stuff. (That arguably doesn't make sense for the handmade_discordmessagecontent record anyway.)
|
||||
- when we create a snippet, we should store both markdown that makes sense to a user and the rendered version of that HTML. THIS MEANS: The markdown we save is the "clean" version of the Discord markdown.
|
||||
|
|
|
@ -41,7 +41,7 @@ account, regardless of whether we create snippets or not.
|
|||
*/
|
||||
type DiscordMessageContent struct {
|
||||
MessageID string `db:"message_id"`
|
||||
LastContent string `db:"last_content"`
|
||||
LastContent string `db:"last_content"` // This should always be cleaned up with nice user IDs and stuff
|
||||
DiscordID int `db:"discord_id"`
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue