2021-08-27 03:59:12 +00:00
|
|
|
package discord
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"errors"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"git.handmade.network/hmn/hmn/src/config"
|
|
|
|
"git.handmade.network/hmn/hmn/src/db"
|
2022-05-14 05:33:00 +00:00
|
|
|
"git.handmade.network/hmn/hmn/src/jobs"
|
2021-08-27 03:59:12 +00:00
|
|
|
"git.handmade.network/hmn/hmn/src/logging"
|
|
|
|
"git.handmade.network/hmn/hmn/src/models"
|
2022-06-15 21:33:57 +00:00
|
|
|
"git.handmade.network/hmn/hmn/src/utils"
|
2023-01-02 21:52:41 +00:00
|
|
|
"github.com/jackc/pgx/v5/pgxpool"
|
2021-08-27 03:59:12 +00:00
|
|
|
)
|
|
|
|
|
2022-05-14 05:33:00 +00:00
|
|
|
func RunHistoryWatcher(ctx context.Context, dbConn *pgxpool.Pool) jobs.Job {
|
2021-08-27 03:59:12 +00:00
|
|
|
log := logging.ExtractLogger(ctx).With().Str("discord goroutine", "history watcher").Logger()
|
|
|
|
ctx = logging.AttachLoggerToContext(&log, ctx)
|
|
|
|
|
2021-09-06 00:43:49 +00:00
|
|
|
if config.Config.Discord.BotToken == "" {
|
|
|
|
log.Warn().Msg("No Discord bot token was provided, so the Discord history bot cannot run.")
|
2022-05-14 05:33:00 +00:00
|
|
|
return jobs.Noop()
|
2021-09-06 00:43:49 +00:00
|
|
|
}
|
2021-08-27 03:59:12 +00:00
|
|
|
|
2022-05-14 05:33:00 +00:00
|
|
|
job := jobs.New()
|
2021-08-27 03:59:12 +00:00
|
|
|
go func() {
|
|
|
|
defer func() {
|
|
|
|
log.Debug().Msg("shut down Discord history watcher")
|
2022-05-14 05:33:00 +00:00
|
|
|
job.Done()
|
2021-08-27 03:59:12 +00:00
|
|
|
}()
|
|
|
|
|
|
|
|
newUserTicker := time.NewTicker(5 * time.Second)
|
2022-01-31 06:46:43 +00:00
|
|
|
|
2022-02-07 12:21:40 +00:00
|
|
|
backfillFirstRun := make(chan struct{}, 1)
|
|
|
|
backfillFirstRun <- struct{}{}
|
|
|
|
backfillTicker := time.NewTicker(1 * time.Hour)
|
|
|
|
|
|
|
|
lastBackfillTime := time.Now().Add(-3 * time.Hour)
|
|
|
|
|
|
|
|
runBackfill := func() {
|
|
|
|
log.Info().Msg("Running backfill")
|
|
|
|
// Run a backfill to patch up places where the Discord bot missed (does create snippets)
|
|
|
|
now := time.Now()
|
|
|
|
done := Scrape(ctx, dbConn,
|
|
|
|
config.Config.Discord.ShowcaseChannelID,
|
|
|
|
lastBackfillTime,
|
|
|
|
true,
|
|
|
|
)
|
|
|
|
if done {
|
|
|
|
lastBackfillTime = now
|
|
|
|
}
|
|
|
|
}
|
2021-08-27 03:59:12 +00:00
|
|
|
|
|
|
|
for {
|
2022-06-15 21:33:57 +00:00
|
|
|
done, err := func() (done bool, err error) {
|
|
|
|
defer utils.RecoverPanicAsError(&err)
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return true, nil
|
|
|
|
case <-newUserTicker.C:
|
|
|
|
// Get content for messages when a user links their account (but do not create snippets)
|
|
|
|
fetchMissingContent(ctx, dbConn)
|
|
|
|
case <-backfillFirstRun:
|
|
|
|
runBackfill()
|
|
|
|
case <-backfillTicker.C:
|
|
|
|
runBackfill()
|
|
|
|
}
|
|
|
|
return false, nil
|
|
|
|
}()
|
|
|
|
if err != nil {
|
|
|
|
log.Error().Err(err).Msg("Panicked in RunHistoryWatcher")
|
|
|
|
} else if done {
|
2021-08-27 03:59:12 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2022-05-14 05:33:00 +00:00
|
|
|
return job
|
2021-08-27 03:59:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func fetchMissingContent(ctx context.Context, dbConn *pgxpool.Pool) {
|
|
|
|
log := logging.ExtractLogger(ctx)
|
|
|
|
|
2022-04-16 17:49:29 +00:00
|
|
|
messagesWithoutContent, err := db.Query[models.DiscordMessage](ctx, dbConn,
|
2021-08-27 03:59:12 +00:00
|
|
|
`
|
2022-04-16 17:49:29 +00:00
|
|
|
SELECT $columns{msg}
|
2021-08-27 03:59:12 +00:00
|
|
|
FROM
|
2022-05-07 13:11:05 +00:00
|
|
|
discord_message AS msg
|
|
|
|
JOIN discord_user AS duser ON msg.user_id = duser.userid -- only fetch messages for linked discord users
|
|
|
|
LEFT JOIN discord_message_content AS c ON c.message_id = msg.id
|
2021-08-27 03:59:12 +00:00
|
|
|
WHERE
|
|
|
|
c.last_content IS NULL
|
|
|
|
AND msg.guild_id = $1
|
2021-08-27 17:58:52 +00:00
|
|
|
ORDER BY msg.sent_at DESC
|
2021-08-27 03:59:12 +00:00
|
|
|
`,
|
|
|
|
config.Config.Discord.GuildID,
|
|
|
|
)
|
|
|
|
if err != nil {
|
|
|
|
log.Error().Err(err).Msg("failed to check for messages without content")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2022-04-16 17:49:29 +00:00
|
|
|
if len(messagesWithoutContent) > 0 {
|
|
|
|
log.Info().Msgf("There are %d Discord messages without content, fetching their content now...", len(messagesWithoutContent))
|
2021-08-27 03:59:12 +00:00
|
|
|
msgloop:
|
2022-04-16 17:49:29 +00:00
|
|
|
for _, msg := range messagesWithoutContent {
|
2021-08-27 03:59:12 +00:00
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
log.Info().Msg("Scrape was canceled")
|
|
|
|
break msgloop
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
|
|
|
discordMsg, err := GetChannelMessage(ctx, msg.ChannelID, msg.ID)
|
|
|
|
if errors.Is(err, NotFound) {
|
|
|
|
// This message has apparently been deleted; delete it from our database
|
2022-01-31 06:46:43 +00:00
|
|
|
interned, err := FetchInternedMessage(ctx, dbConn, msg.ID)
|
|
|
|
if err != nil {
|
2022-02-07 12:21:40 +00:00
|
|
|
if !errors.Is(err, db.NotFound) {
|
|
|
|
log.Error().Str("Message ID", msg.ID).Msg("couldn't find interned message")
|
|
|
|
} else {
|
|
|
|
log.Error().Err(err).Msg("failed to fetch interned message")
|
|
|
|
}
|
2022-01-31 06:46:43 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
err = DeleteInternedMessage(ctx, dbConn, interned)
|
2021-08-27 03:59:12 +00:00
|
|
|
if err != nil {
|
|
|
|
log.Error().Err(err).Msg("failed to delete missing message")
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
log.Info().Str("msg id", msg.ID).Msg("deleted missing Discord message")
|
|
|
|
continue
|
|
|
|
} else if err != nil {
|
|
|
|
log.Error().Err(err).Msg("failed to get message")
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Info().Str("msg", discordMsg.ShortString()).Msg("fetched message for content")
|
|
|
|
|
2022-01-31 06:46:43 +00:00
|
|
|
err = HandleInternedMessage(ctx, dbConn, discordMsg, false, false)
|
2021-08-27 03:59:12 +00:00
|
|
|
if err != nil {
|
|
|
|
log.Error().Err(err).Msg("failed to save content for message")
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
log.Info().Msgf("Done fetching missing content")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-07 12:21:40 +00:00
|
|
|
func Scrape(ctx context.Context, dbConn *pgxpool.Pool, channelID string, earliestMessageTime time.Time, createSnippets bool) bool {
|
2021-08-27 03:59:12 +00:00
|
|
|
log := logging.ExtractLogger(ctx)
|
|
|
|
|
|
|
|
log.Info().Msg("Starting scrape")
|
|
|
|
defer log.Info().Msg("Done with scrape!")
|
|
|
|
|
|
|
|
before := ""
|
|
|
|
for {
|
|
|
|
msgs, err := GetChannelMessages(ctx, channelID, GetChannelMessagesInput{
|
|
|
|
Limit: 100,
|
|
|
|
Before: before,
|
|
|
|
})
|
|
|
|
if err != nil {
|
2021-08-28 17:07:45 +00:00
|
|
|
logging.Error().Err(err).Msg("failed to get messages while scraping")
|
2022-02-07 12:21:40 +00:00
|
|
|
return false
|
2021-08-27 03:59:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if len(msgs) == 0 {
|
|
|
|
logging.Debug().Msg("out of messages, stopping scrape")
|
2022-02-07 12:21:40 +00:00
|
|
|
return true
|
2021-08-27 03:59:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for _, msg := range msgs {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
log.Info().Msg("Scrape was canceled")
|
2022-02-07 12:21:40 +00:00
|
|
|
return false
|
2021-08-27 03:59:12 +00:00
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Info().Str("msg", msg.ShortString()).Msg("")
|
|
|
|
|
|
|
|
if !earliestMessageTime.IsZero() && msg.Time().Before(earliestMessageTime) {
|
|
|
|
logging.ExtractLogger(ctx).Info().Time("earliest", earliestMessageTime).Msg("Saw a message before the specified earliest time; exiting")
|
2022-02-07 12:21:40 +00:00
|
|
|
return true
|
2021-08-27 03:59:12 +00:00
|
|
|
}
|
|
|
|
|
2022-01-31 06:46:43 +00:00
|
|
|
err := HandleIncomingMessage(ctx, dbConn, &msg, createSnippets)
|
|
|
|
|
2021-08-27 03:59:12 +00:00
|
|
|
if err != nil {
|
|
|
|
errLog := logging.ExtractLogger(ctx).Error()
|
|
|
|
errLog.Err(err).Msg("failed to process Discord message")
|
|
|
|
}
|
|
|
|
|
|
|
|
before = msg.ID
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|