- DB type migrators are now in separate file, in preparation for full custom sql migration statements - Start work on handling failed outbound requests stored in the db
113 lines
3.6 KiB
Go
113 lines
3.6 KiB
Go
package cleaners
|
|
|
|
import (
|
|
"net/http"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/rs/zerolog/log"
|
|
"golang.org/x/sync/errgroup"
|
|
"gorm.io/gen"
|
|
|
|
"git.mstar.dev/mstar/linstrom/storage-new/dbgen"
|
|
"git.mstar.dev/mstar/linstrom/storage-new/models"
|
|
webshared "git.mstar.dev/mstar/linstrom/web/shared"
|
|
)
|
|
|
|
const maxFailedRequestsBeforeDeath = 10
|
|
|
|
var (
|
|
reqErrStrUnknownHost = "no such host"
|
|
)
|
|
|
|
func init() {
|
|
cleanerBuilders = append(cleanerBuilders, buildRetryRequests)
|
|
}
|
|
|
|
func TickRetryRequests(now time.Time) {
|
|
batchResults := []*models.FailedOutboundRequest{}
|
|
fo := dbgen.FailedOutboundRequest
|
|
idsToDelete := []uint64{}
|
|
var idsDeleteLock sync.Mutex
|
|
err := fo.Preload(fo.TargetServer, fo.ActingUser).
|
|
// Join with server data to exclude dead servers
|
|
LeftJoin(dbgen.RemoteServer, dbgen.RemoteServer.ID.EqCol(fo.TargetServerId)).
|
|
Where(dbgen.RemoteServer.IsDead.Is(false)).
|
|
Order(fo.Id.Asc()).
|
|
FindInBatches(&batchResults, 50, func(tx gen.Dao, batch int) error {
|
|
var g errgroup.Group
|
|
for _, failedRequest := range batchResults {
|
|
g.Go(func() (err error) {
|
|
defer func() {
|
|
failedRequest.NrOfAttempts += 1
|
|
err = dbgen.FailedOutboundRequest.Save(failedRequest)
|
|
if err != nil {
|
|
return
|
|
}
|
|
if failedRequest.NrOfAttempts >= maxFailedRequestsBeforeDeath {
|
|
_, err = dbgen.RemoteServer.Where(dbgen.RemoteServer.ID.Eq(failedRequest.TargetServerId)).
|
|
UpdateColumn(dbgen.RemoteServer.IsDead, true)
|
|
}
|
|
}()
|
|
var res *http.Response
|
|
res, _, err = webshared.RequestSigned(
|
|
"POST",
|
|
failedRequest.Target,
|
|
failedRequest.RawData,
|
|
failedRequest.ActingUser,
|
|
)
|
|
if err != nil {
|
|
failedRequest.NrOfAttempts += 1
|
|
errString := err.Error()
|
|
// FIXME: Use the actual error types instead of error string
|
|
// Using substring matching is awful and probably not reliable. Using error type is likely more reliable
|
|
if strings.Contains(errString, reqErrStrUnknownHost) {
|
|
failedRequest.LastFailureReason = string(
|
|
models.RequestFailureRequestError,
|
|
)
|
|
} else {
|
|
failedRequest.LastFailureReason = string(models.RequestFailureRequestError)
|
|
}
|
|
return
|
|
}
|
|
if res.StatusCode < 400 {
|
|
idsDeleteLock.Lock()
|
|
idsToDelete = append(idsToDelete, failedRequest.Id)
|
|
idsDeleteLock.Unlock()
|
|
// Defer func will always add one (to make the expected failure case easier)
|
|
// Sub one here to prevent a potential server kill if it was at maxFailedRequestsBeforeDeath-1 failed requests before
|
|
failedRequest.NrOfAttempts -= 1
|
|
return nil
|
|
}
|
|
|
|
switch res.StatusCode {
|
|
case http.StatusInternalServerError:
|
|
failedRequest.LastFailureReason = string(models.RequestFailureInternalError)
|
|
case http.StatusForbidden:
|
|
failedRequest.LastFailureReason = string(models.RequestFailureRejected)
|
|
case http.StatusTooManyRequests:
|
|
// TODO: Check Timeout headers and write apropriate message
|
|
}
|
|
|
|
return nil
|
|
})
|
|
}
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
log.Error().Err(err).Msg("Failed to batch-process all failed outbound requests")
|
|
}
|
|
_, err = fo.Where(fo.Id.In(idsToDelete...)).Delete()
|
|
if err != nil {
|
|
log.Error().Err(err).Msg("Failed to batch-delete all successful retries")
|
|
}
|
|
err = dbgen.FailedOutboundRequest.KillServers(maxFailedRequestsBeforeDeath)
|
|
if err != nil {
|
|
log.Error().Err(err).Msg("Failed to kill all servers with too many failed requests")
|
|
}
|
|
}
|
|
|
|
func buildRetryRequests() (onTick func(time.Time), name string, tickSpeed time.Duration) {
|
|
return TickRetryRequests, "retry-requests", time.Hour
|
|
}
|