add kes retries upto two times with jitter backoff (#9527)

KES calls are not retried and under certain situations
when KES is under high load, the request should be
retried automatically.
This commit is contained in:
Harshavardhana 2020-05-06 11:44:06 -07:00 committed by GitHub
parent 8eb99d3a87
commit a2ccba69e5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 139 additions and 41 deletions

View File

@ -1,4 +1,4 @@
// MinIO Cloud Storage, (C) 2019 MinIO, Inc.
// MinIO Cloud Storage, (C) 2019-2020 MinIO, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -19,6 +19,7 @@ import (
"crypto/tls"
"crypto/x509"
"encoding/json"
"errors"
"fmt"
"io"
"io/ioutil"
@ -27,6 +28,10 @@ import (
"os"
"path/filepath"
"strings"
"time"
xhttp "github.com/minio/minio/cmd/http"
xnet "github.com/minio/minio/pkg/net"
)
// KesConfig contains the configuration required
@ -209,6 +214,18 @@ type kesClient struct {
httpClient http.Client
}
// Response KES response struct
type response struct {
Plaintext []byte `json:"plaintext"`
Ciphertext []byte `json:"ciphertext,omitempty"`
}
// Request KES request struct
type request struct {
Ciphertext []byte `json:"ciphertext,omitempty"`
Context []byte `json:"context"`
}
// GenerateDataKey requests a new data key from the KES server.
// On success, the KES server will respond with the plaintext key
// and the ciphertext key as the plaintext key encrypted with
@ -218,10 +235,7 @@ type kesClient struct {
// such that you have to provide the same context when decrypting
// the data key.
func (c *kesClient) GenerateDataKey(name string, context []byte) ([]byte, []byte, error) {
type Request struct {
Context []byte `json:"context"`
}
body, err := json.Marshal(Request{
body, err := json.Marshal(request{
Context: context,
})
if err != nil {
@ -229,25 +243,57 @@ func (c *kesClient) GenerateDataKey(name string, context []byte) ([]byte, []byte
}
url := fmt.Sprintf("%s/v1/key/generate/%s", c.addr, url.PathEscape(name))
resp, err := c.httpClient.Post(url, "application/json", bytes.NewReader(body))
const limit = 1 << 20 // A plaintext/ciphertext key pair will never be larger than 1 MB
resp, err := c.postRetry(url, bytes.NewReader(body), limit)
if err != nil {
return nil, nil, err
}
if resp.StatusCode != http.StatusOK {
return nil, nil, c.parseErrorResponse(resp)
}
defer resp.Body.Close()
type Response struct {
Plaintext []byte `json:"plaintext"`
Ciphertext []byte `json:"ciphertext"`
return resp.Plaintext, resp.Ciphertext, nil
}
const limit = 1 << 20 // A plaintext/ciphertext key pair will never be larger than 1 MB
var response Response
if err = json.NewDecoder(io.LimitReader(resp.Body, limit)).Decode(&response); err != nil {
return nil, nil, err
func (c *kesClient) post(url string, body io.Reader, limit int64) (*response, error) {
resp, err := c.httpClient.Post(url, "application/json", body)
if err != nil {
return nil, err
}
// Drain the entire body to make sure we have re-use connections
defer xhttp.DrainBody(resp.Body)
if resp.StatusCode != http.StatusOK {
return nil, c.parseErrorResponse(resp)
}
response := &response{}
if err = json.NewDecoder(io.LimitReader(resp.Body, limit)).Decode(response); err != nil {
return nil, err
}
return response, nil
}
func (c *kesClient) postRetry(url string, body io.ReadSeeker, limit int64) (*response, error) {
for i := 0; ; i++ {
body.Seek(0, io.SeekStart) // seek to the beginning of the body.
response, err := c.post(url, body, limit)
if err == nil {
return response, nil
}
if !xnet.IsNetworkOrHostDown(err) && !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
return nil, err
}
// retriable network errors.
remain := retryMax - i
if remain <= 0 {
return response, err
}
<-time.After(LinearJitterBackoff(retryWaitMin, retryWaitMax, i))
}
return response.Plaintext, response.Ciphertext, nil
}
// GenerateDataKey decrypts an encrypted data key with the key
@ -257,11 +303,7 @@ func (c *kesClient) GenerateDataKey(name string, context []byte) ([]byte, []byte
// The optional context must match the value you provided when
// generating the data key.
func (c *kesClient) DecryptDataKey(name string, ciphertext, context []byte) ([]byte, error) {
type Request struct {
Ciphertext []byte `json:"ciphertext"`
Context []byte `json:"context"`
}
body, err := json.Marshal(Request{
body, err := json.Marshal(request{
Ciphertext: ciphertext,
Context: context,
})
@ -270,37 +312,26 @@ func (c *kesClient) DecryptDataKey(name string, ciphertext, context []byte) ([]b
}
url := fmt.Sprintf("%s/v1/key/decrypt/%s", c.addr, url.PathEscape(name))
resp, err := c.httpClient.Post(url, "application/json", bytes.NewReader(body))
const limit = 32 * 1024 // A data key will never be larger than 32 KB
resp, err := c.postRetry(url, bytes.NewReader(body), limit)
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusOK {
return nil, c.parseErrorResponse(resp)
}
defer resp.Body.Close()
type Response struct {
Plaintext []byte `json:"plaintext"`
}
const limit = 32 * 1024 // A data key will never be larger than 32 KB
var response Response
if err = json.NewDecoder(io.LimitReader(resp.Body, limit)).Decode(&response); err != nil {
return nil, err
}
return response.Plaintext, nil
return resp.Plaintext, nil
}
func (c *kesClient) parseErrorResponse(resp *http.Response) error {
if resp.Body == nil {
return nil
return Errorf("%s: no body", http.StatusText(resp.StatusCode))
}
defer resp.Body.Close()
const limit = 32 * 1024 // A (valid) error response will not be greater than 32 KB
var errMsg strings.Builder
if _, err := io.Copy(&errMsg, io.LimitReader(resp.Body, limit)); err != nil {
return err
return Errorf("%s: %s", http.StatusText(resp.StatusCode), err)
}
return Errorf("%s: %s", http.StatusText(resp.StatusCode), errMsg.String())
}

65
cmd/crypto/retry.go Normal file
View File

@ -0,0 +1,65 @@
// MinIO Cloud Storage, (C) 2020 MinIO, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package crypto
import (
"math/rand"
"time"
)
// default retry configuration
const (
retryWaitMin = 500 * time.Millisecond // minimum retry limit.
retryWaitMax = 3 * time.Second // 3 secs worth of max retry.
retryMax = 2
)
// LinearJitterBackoff provides the time.Duration for a caller to
// perform linear backoff based on the attempt number and with jitter to
// prevent a thundering herd.
//
// min and max here are *not* absolute values. The number to be multiplied by
// the attempt number will be chosen at random from between them, thus they are
// bounding the jitter.
//
// For instance:
// * To get strictly linear backoff of one second increasing each retry, set
// both to one second (1s, 2s, 3s, 4s, ...)
// * To get a small amount of jitter centered around one second increasing each
// retry, set to around one second, such as a min of 800ms and max of 1200ms
// (892ms, 2102ms, 2945ms, 4312ms, ...)
// * To get extreme jitter, set to a very wide spread, such as a min of 100ms
// and a max of 20s (15382ms, 292ms, 51321ms, 35234ms, ...)
func LinearJitterBackoff(min, max time.Duration, attemptNum int) time.Duration {
// attemptNum always starts at zero but we want to start at 1 for multiplication
attemptNum++
if max <= min {
// Unclear what to do here, or they are the same, so return min *
// attemptNum
return min * time.Duration(attemptNum)
}
// Seed rand; doing this every time is fine
rand := rand.New(rand.NewSource(int64(time.Now().Nanosecond())))
// Pick a random number that lies somewhere between the min and max and
// multiply by the attemptNum. attemptNum starts at zero so we always
// increment here. We first get a random percentage, then apply that to the
// difference between min and max, and add to min.
jitter := rand.Float64() * float64(max-min)
jitterMin := int64(jitter) + int64(min)
return time.Duration(jitterMin * int64(attemptNum))
}

2
go.sum
View File

@ -233,6 +233,8 @@ github.com/minio/lsync v1.0.1/go.mod h1:tCFzfo0dlvdGl70IT4IAK/5Wtgb0/BrTmo/jE8pA
github.com/minio/minio-go/v6 v6.0.53/go.mod h1:DIvC/IApeHX8q1BAMVCXSXwpmrmM+I+iBvhvztQorfI=
github.com/minio/minio-go/v6 v6.0.55-0.20200424204115-7506d2996b22 h1:nZEve4vdUhwHBoV18zRvPDgjL6NYyDJE5QJvz3l9bRs=
github.com/minio/minio-go/v6 v6.0.55-0.20200424204115-7506d2996b22/go.mod h1:KQMM+/44DSlSGSQWSfRrAZ12FVMmpWNuX37i2AX0jfI=
github.com/minio/minio-go/v6 v6.0.55-0.20200425081427-89eebdef2af0 h1:PdHKpM9h2vqCDr1AjJdK8e/6tRdOSjUNzIqeNmxu7ak=
github.com/minio/minio-go/v6 v6.0.55-0.20200425081427-89eebdef2af0/go.mod h1:KQMM+/44DSlSGSQWSfRrAZ12FVMmpWNuX37i2AX0jfI=
github.com/minio/parquet-go v0.0.0-20200414234858-838cfa8aae61 h1:pUSI/WKPdd77gcuoJkSzhJ4wdS8OMDOsOu99MtpXEQA=
github.com/minio/parquet-go v0.0.0-20200414234858-838cfa8aae61/go.mod h1:4trzEJ7N1nBTd5Tt7OCZT5SEin+WiAXpdJ/WgPkESA8=
github.com/minio/sha256-simd v0.1.1 h1:5QHSlgo3nt5yKOJrC7W8w7X+NFl8cMPZm96iu8kKUJU=