Work on failed outbound requests and move type migrators

- DB type migrators are now in separate file, in preparation for full
  custom sql migration statements
- Start work on handling failed outbound requests stored in the db
This commit is contained in:
Melody Becker 2025-07-03 13:57:30 +02:00
parent 81a01fbf8b
commit 7ac4c628b8
Signed by: mstar
SSH key fingerprint: SHA256:9VAo09aaVNTWKzPW7Hq2LW+ox9OdwmTSHRoD4mlz1yI
13 changed files with 372 additions and 145 deletions

View file

@ -0,0 +1,30 @@
package cleaners
import (
"time"
"github.com/rs/zerolog/log"
"git.mstar.dev/mstar/linstrom/storage-new/dbgen"
)
const maxServerAge = time.Hour * 24 * 30 // One month
func init() {
cleanerBuilders = append(cleanerBuilders, buildKillDeadServers)
}
// Marks all servers where the last interaction time is older than maxServerAge
func tickKillDeadServers(now time.Time) {
_, err := dbgen.RemoteServer.Where(dbgen.RemoteServer.LastInteraction.Lt(now.Add(-maxServerAge)), dbgen.RemoteServer.IsSelf.Is(false)).
UpdateColumn(dbgen.RemoteServer.IsDead, true)
if err != nil {
log.Error().
Err(err).
Msg("Failed to mark servers without interaction for over a 30 days as dead")
}
}
func buildKillDeadServers() (onTick func(time.Time), name string, tickSpeed time.Duration) {
return tickKillDeadServers, "kill-dead-servers", time.Hour
}

View file

@ -0,0 +1,113 @@
package cleaners
import (
"net/http"
"strings"
"sync"
"time"
"github.com/rs/zerolog/log"
"golang.org/x/sync/errgroup"
"gorm.io/gen"
"git.mstar.dev/mstar/linstrom/storage-new/dbgen"
"git.mstar.dev/mstar/linstrom/storage-new/models"
webshared "git.mstar.dev/mstar/linstrom/web/shared"
)
const maxFailedRequestsBeforeDeath = 10
var (
reqErrStrUnknownHost = "no such host"
)
func init() {
cleanerBuilders = append(cleanerBuilders, buildRetryRequests)
}
func TickRetryRequests(now time.Time) {
batchResults := []*models.FailedOutboundRequest{}
fo := dbgen.FailedOutboundRequest
idsToDelete := []uint64{}
var idsDeleteLock sync.Mutex
err := fo.Preload(fo.TargetServer, fo.ActingUser).
// Join with server data to exclude dead servers
LeftJoin(dbgen.RemoteServer, dbgen.RemoteServer.ID.EqCol(fo.TargetServerId)).
Where(dbgen.RemoteServer.IsDead.Is(false)).
Order(fo.Id.Asc()).
FindInBatches(&batchResults, 50, func(tx gen.Dao, batch int) error {
var g errgroup.Group
for _, failedRequest := range batchResults {
g.Go(func() (err error) {
defer func() {
failedRequest.NrOfAttempts += 1
err = dbgen.FailedOutboundRequest.Save(failedRequest)
if err != nil {
return
}
if failedRequest.NrOfAttempts >= maxFailedRequestsBeforeDeath {
_, err = dbgen.RemoteServer.Where(dbgen.RemoteServer.ID.Eq(failedRequest.TargetServerId)).
UpdateColumn(dbgen.RemoteServer.IsDead, true)
}
}()
var res *http.Response
res, _, err = webshared.RequestSigned(
"POST",
failedRequest.Target,
failedRequest.RawData,
failedRequest.ActingUser,
)
if err != nil {
failedRequest.NrOfAttempts += 1
errString := err.Error()
// FIXME: Use the actual error types instead of error string
// Using substring matching is awful and probably not reliable. Using error type is likely more reliable
if strings.Contains(errString, reqErrStrUnknownHost) {
failedRequest.LastFailureReason = string(
models.RequestFailureRequestError,
)
} else {
failedRequest.LastFailureReason = string(models.RequestFailureRequestError)
}
return
}
if res.StatusCode < 400 {
idsDeleteLock.Lock()
idsToDelete = append(idsToDelete, failedRequest.Id)
idsDeleteLock.Unlock()
// Defer func will always add one (to make the expected failure case easier)
// Sub one here to prevent a potential server kill if it was at maxFailedRequestsBeforeDeath-1 failed requests before
failedRequest.NrOfAttempts -= 1
return nil
}
switch res.StatusCode {
case http.StatusInternalServerError:
failedRequest.LastFailureReason = string(models.RequestFailureInternalError)
case http.StatusForbidden:
failedRequest.LastFailureReason = string(models.RequestFailureRejected)
case http.StatusTooManyRequests:
// TODO: Check Timeout headers and write apropriate message
}
return nil
})
}
return nil
})
if err != nil {
log.Error().Err(err).Msg("Failed to batch-process all failed outbound requests")
}
_, err = fo.Where(fo.Id.In(idsToDelete...)).Delete()
if err != nil {
log.Error().Err(err).Msg("Failed to batch-delete all successful retries")
}
err = dbgen.FailedOutboundRequest.KillServers(maxFailedRequestsBeforeDeath)
if err != nil {
log.Error().Err(err).Msg("Failed to kill all servers with too many failed requests")
}
}
func buildRetryRequests() (onTick func(time.Time), name string, tickSpeed time.Duration) {
return TickRetryRequests, "retry-requests", time.Hour
}