2021-08-27 03:59:12 +00:00
|
|
|
package discord
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"errors"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"git.handmade.network/hmn/hmn/src/config"
|
|
|
|
"git.handmade.network/hmn/hmn/src/db"
|
|
|
|
"git.handmade.network/hmn/hmn/src/logging"
|
|
|
|
"git.handmade.network/hmn/hmn/src/models"
|
|
|
|
"github.com/jackc/pgx/v4/pgxpool"
|
|
|
|
)
|
|
|
|
|
|
|
|
func RunHistoryWatcher(ctx context.Context, dbConn *pgxpool.Pool) <-chan struct{} {
|
|
|
|
log := logging.ExtractLogger(ctx).With().Str("discord goroutine", "history watcher").Logger()
|
|
|
|
ctx = logging.AttachLoggerToContext(&log, ctx)
|
|
|
|
|
2021-09-06 00:43:49 +00:00
|
|
|
if config.Config.Discord.BotToken == "" {
|
|
|
|
log.Warn().Msg("No Discord bot token was provided, so the Discord history bot cannot run.")
|
|
|
|
done := make(chan struct{}, 1)
|
|
|
|
done <- struct{}{}
|
|
|
|
return done
|
|
|
|
}
|
2021-08-27 03:59:12 +00:00
|
|
|
|
2021-09-06 00:43:49 +00:00
|
|
|
done := make(chan struct{})
|
2021-08-27 03:59:12 +00:00
|
|
|
go func() {
|
|
|
|
defer func() {
|
|
|
|
log.Debug().Msg("shut down Discord history watcher")
|
|
|
|
done <- struct{}{}
|
|
|
|
}()
|
|
|
|
|
|
|
|
newUserTicker := time.NewTicker(5 * time.Second)
|
2022-01-31 06:46:43 +00:00
|
|
|
|
2022-02-07 12:21:40 +00:00
|
|
|
backfillFirstRun := make(chan struct{}, 1)
|
|
|
|
backfillFirstRun <- struct{}{}
|
|
|
|
backfillTicker := time.NewTicker(1 * time.Hour)
|
|
|
|
|
|
|
|
lastBackfillTime := time.Now().Add(-3 * time.Hour)
|
|
|
|
|
|
|
|
runBackfill := func() {
|
|
|
|
log.Info().Msg("Running backfill")
|
|
|
|
// Run a backfill to patch up places where the Discord bot missed (does create snippets)
|
|
|
|
now := time.Now()
|
|
|
|
done := Scrape(ctx, dbConn,
|
|
|
|
config.Config.Discord.ShowcaseChannelID,
|
|
|
|
lastBackfillTime,
|
|
|
|
true,
|
|
|
|
)
|
|
|
|
if done {
|
|
|
|
lastBackfillTime = now
|
|
|
|
}
|
|
|
|
}
|
2021-08-27 03:59:12 +00:00
|
|
|
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return
|
|
|
|
case <-newUserTicker.C:
|
|
|
|
// Get content for messages when a user links their account (but do not create snippets)
|
|
|
|
fetchMissingContent(ctx, dbConn)
|
2022-02-07 12:21:40 +00:00
|
|
|
case <-backfillFirstRun:
|
|
|
|
runBackfill()
|
2021-08-27 03:59:12 +00:00
|
|
|
case <-backfillTicker.C:
|
2022-02-07 12:21:40 +00:00
|
|
|
runBackfill()
|
2021-08-27 03:59:12 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
return done
|
|
|
|
}
|
|
|
|
|
|
|
|
func fetchMissingContent(ctx context.Context, dbConn *pgxpool.Pool) {
|
|
|
|
log := logging.ExtractLogger(ctx)
|
|
|
|
|
|
|
|
type query struct {
|
|
|
|
Message models.DiscordMessage `db:"msg"`
|
|
|
|
}
|
2021-12-15 01:36:37 +00:00
|
|
|
imessagesWithoutContent, err := db.Query(ctx, dbConn, query{},
|
2021-08-27 03:59:12 +00:00
|
|
|
`
|
|
|
|
SELECT $columns
|
|
|
|
FROM
|
|
|
|
handmade_discordmessage AS msg
|
|
|
|
JOIN handmade_discorduser AS duser ON msg.user_id = duser.userid -- only fetch messages for linked discord users
|
|
|
|
LEFT JOIN handmade_discordmessagecontent AS c ON c.message_id = msg.id
|
|
|
|
WHERE
|
|
|
|
c.last_content IS NULL
|
|
|
|
AND msg.guild_id = $1
|
2021-08-27 17:58:52 +00:00
|
|
|
ORDER BY msg.sent_at DESC
|
2021-08-27 03:59:12 +00:00
|
|
|
`,
|
|
|
|
config.Config.Discord.GuildID,
|
|
|
|
)
|
|
|
|
if err != nil {
|
|
|
|
log.Error().Err(err).Msg("failed to check for messages without content")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(imessagesWithoutContent) > 0 {
|
|
|
|
log.Info().Msgf("There are %d Discord messages without content, fetching their content now...", len(imessagesWithoutContent))
|
|
|
|
msgloop:
|
|
|
|
for _, imsg := range imessagesWithoutContent {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
log.Info().Msg("Scrape was canceled")
|
|
|
|
break msgloop
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
|
|
|
msg := imsg.(*query).Message
|
|
|
|
|
|
|
|
discordMsg, err := GetChannelMessage(ctx, msg.ChannelID, msg.ID)
|
|
|
|
if errors.Is(err, NotFound) {
|
|
|
|
// This message has apparently been deleted; delete it from our database
|
2022-01-31 06:46:43 +00:00
|
|
|
interned, err := FetchInternedMessage(ctx, dbConn, msg.ID)
|
|
|
|
if err != nil {
|
2022-02-07 12:21:40 +00:00
|
|
|
if !errors.Is(err, db.NotFound) {
|
|
|
|
log.Error().Str("Message ID", msg.ID).Msg("couldn't find interned message")
|
|
|
|
} else {
|
|
|
|
log.Error().Err(err).Msg("failed to fetch interned message")
|
|
|
|
}
|
2022-01-31 06:46:43 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
err = DeleteInternedMessage(ctx, dbConn, interned)
|
2021-08-27 03:59:12 +00:00
|
|
|
if err != nil {
|
|
|
|
log.Error().Err(err).Msg("failed to delete missing message")
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
log.Info().Str("msg id", msg.ID).Msg("deleted missing Discord message")
|
|
|
|
continue
|
|
|
|
} else if err != nil {
|
|
|
|
log.Error().Err(err).Msg("failed to get message")
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Info().Str("msg", discordMsg.ShortString()).Msg("fetched message for content")
|
|
|
|
|
2022-01-31 06:46:43 +00:00
|
|
|
err = HandleInternedMessage(ctx, dbConn, discordMsg, false, false)
|
2021-08-27 03:59:12 +00:00
|
|
|
if err != nil {
|
|
|
|
log.Error().Err(err).Msg("failed to save content for message")
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
log.Info().Msgf("Done fetching missing content")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-07 12:21:40 +00:00
|
|
|
func Scrape(ctx context.Context, dbConn *pgxpool.Pool, channelID string, earliestMessageTime time.Time, createSnippets bool) bool {
|
2021-08-27 03:59:12 +00:00
|
|
|
log := logging.ExtractLogger(ctx)
|
|
|
|
|
|
|
|
log.Info().Msg("Starting scrape")
|
|
|
|
defer log.Info().Msg("Done with scrape!")
|
|
|
|
|
|
|
|
before := ""
|
|
|
|
for {
|
|
|
|
msgs, err := GetChannelMessages(ctx, channelID, GetChannelMessagesInput{
|
|
|
|
Limit: 100,
|
|
|
|
Before: before,
|
|
|
|
})
|
|
|
|
if err != nil {
|
2021-08-28 17:07:45 +00:00
|
|
|
logging.Error().Err(err).Msg("failed to get messages while scraping")
|
2022-02-07 12:21:40 +00:00
|
|
|
return false
|
2021-08-27 03:59:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if len(msgs) == 0 {
|
|
|
|
logging.Debug().Msg("out of messages, stopping scrape")
|
2022-02-07 12:21:40 +00:00
|
|
|
return true
|
2021-08-27 03:59:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for _, msg := range msgs {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
log.Info().Msg("Scrape was canceled")
|
2022-02-07 12:21:40 +00:00
|
|
|
return false
|
2021-08-27 03:59:12 +00:00
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Info().Str("msg", msg.ShortString()).Msg("")
|
|
|
|
|
|
|
|
if !earliestMessageTime.IsZero() && msg.Time().Before(earliestMessageTime) {
|
|
|
|
logging.ExtractLogger(ctx).Info().Time("earliest", earliestMessageTime).Msg("Saw a message before the specified earliest time; exiting")
|
2022-02-07 12:21:40 +00:00
|
|
|
return true
|
2021-08-27 03:59:12 +00:00
|
|
|
}
|
|
|
|
|
2022-01-31 06:46:43 +00:00
|
|
|
err := HandleIncomingMessage(ctx, dbConn, &msg, createSnippets)
|
|
|
|
|
2021-08-27 03:59:12 +00:00
|
|
|
if err != nil {
|
|
|
|
errLog := logging.ExtractLogger(ctx).Error()
|
|
|
|
errLog.Err(err).Msg("failed to process Discord message")
|
|
|
|
}
|
|
|
|
|
|
|
|
before = msg.ID
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|