upgrade: Split in two steps to ensure a stable retry (#15396)

Currently, if one server in a distributed setup fails to upgrade 
due to any reasons, it is not possible to upgrade again unless 
nodes are restarted.

To fix this, split the upgrade process into two steps :

- download the new binary on all servers
- If successful, overwrite the old binary with the new one
This commit is contained in:
Anis Elleuch
2022-07-26 01:49:47 +01:00
committed by GitHub
parent 4c6498d726
commit e4b51235f8
8 changed files with 116 additions and 34 deletions

View File

@@ -518,7 +518,7 @@ func getUpdateReaderFromURL(u *url.URL, transport http.RoundTripper, mode string
var updateInProgress uint32
func doUpdate(u *url.URL, lrTime time.Time, sha256Sum []byte, releaseInfo string, mode string) (err error) {
func downloadBinary(u *url.URL, sha256Sum []byte, releaseInfo string, mode string) (err error) {
if !atomic.CompareAndSwapUint32(&updateInProgress, 0, 1) {
return errors.New("update already in progress")
}
@@ -565,7 +565,35 @@ func doUpdate(u *url.URL, lrTime time.Time, sha256Sum []byte, releaseInfo string
opts.Verifier = v
}
if err = selfupdate.Apply(reader, opts); err != nil {
if err = selfupdate.PrepareAndCheckBinary(reader, opts); err != nil {
var pathErr *os.PathError
if errors.As(err, &pathErr) {
return AdminError{
Code: AdminUpdateApplyFailure,
Message: fmt.Sprintf("Unable to update the binary at %s: %v",
filepath.Dir(pathErr.Path), pathErr.Err),
StatusCode: http.StatusForbidden,
}
}
return AdminError{
Code: AdminUpdateApplyFailure,
Message: err.Error(),
StatusCode: http.StatusInternalServerError,
}
}
return nil
}
func commitBinary() (err error) {
if !atomic.CompareAndSwapUint32(&updateInProgress, 0, 1) {
return errors.New("update already in progress")
}
defer atomic.StoreUint32(&updateInProgress, 0)
opts := selfupdate.Options{}
if err = selfupdate.CommitBinary(opts); err != nil {
if rerr := selfupdate.RollbackError(err); rerr != nil {
return AdminError{
Code: AdminUpdateApplyFailure,