kms: add support for MinKMS and remove some unused/broken code (#19368)

This commit adds support for MinKMS. Now, there are three KMS
implementations in `internal/kms`: Builtin, MinIO KES and MinIO KMS.

Adding another KMS integration required some cleanup. In particular:
 - Various KMS APIs that haven't been and are not used have been
   removed. A lot of the code was broken anyway.
 - Metrics are now monitored by the `kms.KMS` itself. For basic
   metrics this is simpler than collecting metrics for external
   servers. In particular, each KES server returns its own metrics
   and no cluster-level view.
 - The builtin KMS now uses the same en/decryption implemented by
   MinKMS and KES. It still supports decryption of the previous
   ciphertext format. It's backwards compatible.
 - Data encryption keys now include a master key version since MinKMS
   supports multiple versions (~4 billion in total and 10000 concurrent)
   per key name.

Signed-off-by: Andreas Auernhammer <github@aead.dev>
This commit is contained in:
Andreas Auernhammer
2024-05-08 01:55:37 +02:00
committed by GitHub
parent 981497799a
commit 8b660e18f2
36 changed files with 1794 additions and 1808 deletions

View File

@@ -18,239 +18,116 @@
package kms
import (
"bytes"
"context"
"crypto/subtle"
"crypto/tls"
"crypto/x509"
"errors"
"fmt"
"strings"
"net/http"
"sync"
"time"
"github.com/minio/pkg/v2/env"
"github.com/minio/kms-go/kes"
"github.com/minio/pkg/v2/certs"
"github.com/minio/madmin-go/v3"
)
const (
tlsClientSessionCacheSize = 100
)
// Config contains various KMS-related configuration
// parameters - like KMS endpoints or authentication
// credentials.
type Config struct {
// Endpoints contains a list of KMS server
// HTTP endpoints.
Endpoints []string
// DefaultKeyID is the key ID used when
// no explicit key ID is specified for
// a cryptographic operation.
DefaultKeyID string
// APIKey is an credential provided by env. var.
// to authenticate to a KES server. Either an
// API key or a client certificate must be specified.
APIKey kes.APIKey
// Certificate is the client TLS certificate
// to authenticate to KMS via mTLS.
Certificate *certs.Certificate
// ReloadCertEvents is an event channel that receives
// the reloaded client certificate.
ReloadCertEvents <-chan tls.Certificate
// RootCAs is a set of root CA certificates
// to verify the KMS server TLS certificate.
RootCAs *x509.CertPool
}
// NewWithConfig returns a new KMS using the given
// configuration.
func NewWithConfig(config Config, logger Logger) (KMS, error) {
if len(config.Endpoints) == 0 {
return nil, errors.New("kms: no server endpoints")
}
endpoints := make([]string, len(config.Endpoints)) // Copy => avoid being affect by any changes to the original slice
copy(endpoints, config.Endpoints)
var client *kes.Client
if config.APIKey != nil {
cert, err := kes.GenerateCertificate(config.APIKey)
if err != nil {
return nil, err
}
client = kes.NewClientWithConfig("", &tls.Config{
MinVersion: tls.VersionTLS12,
Certificates: []tls.Certificate{cert},
RootCAs: config.RootCAs,
ClientSessionCache: tls.NewLRUClientSessionCache(tlsClientSessionCacheSize),
})
} else {
client = kes.NewClientWithConfig("", &tls.Config{
MinVersion: tls.VersionTLS12,
Certificates: []tls.Certificate{config.Certificate.Get()},
RootCAs: config.RootCAs,
ClientSessionCache: tls.NewLRUClientSessionCache(tlsClientSessionCacheSize),
})
}
client.Endpoints = endpoints
c := &kesClient{
client: client,
defaultKeyID: config.DefaultKeyID,
}
go func() {
if config.Certificate == nil || config.ReloadCertEvents == nil {
return
}
var prevCertificate tls.Certificate
for {
certificate, ok := <-config.ReloadCertEvents
if !ok {
return
}
sameCert := len(certificate.Certificate) == len(prevCertificate.Certificate)
for i, b := range certificate.Certificate {
if !sameCert {
break
}
sameCert = sameCert && bytes.Equal(b, prevCertificate.Certificate[i])
}
// Do not reload if its the same cert as before.
if !sameCert {
client := kes.NewClientWithConfig("", &tls.Config{
MinVersion: tls.VersionTLS12,
Certificates: []tls.Certificate{certificate},
RootCAs: config.RootCAs,
ClientSessionCache: tls.NewLRUClientSessionCache(tlsClientSessionCacheSize),
})
client.Endpoints = endpoints
c.lock.Lock()
c.client = client
c.lock.Unlock()
prevCertificate = certificate
}
}
}()
go c.refreshKMSMasterKeyCache(logger)
return c, nil
}
// Request KES keep an up-to-date copy of the KMS master key to allow minio to start up even if KMS is down. The
// cached key may still be evicted if the period of this function is longer than that of KES .cache.expiry.unused
func (c *kesClient) refreshKMSMasterKeyCache(logger Logger) {
ctx := context.Background()
defaultCacheDuration := 10 * time.Second
cacheDuration, err := env.GetDuration(EnvKESKeyCacheInterval, defaultCacheDuration)
if err != nil {
logger.LogOnceIf(ctx, fmt.Errorf("%s, using default of 10s", err.Error()), "refresh-kms-master-key")
cacheDuration = defaultCacheDuration
}
if cacheDuration < time.Second {
logger.LogOnceIf(ctx, errors.New("cache duration is less than 1s, using default of 10s"), "refresh-kms-master-key")
cacheDuration = defaultCacheDuration
}
timer := time.NewTimer(cacheDuration)
defer timer.Stop()
for {
select {
case <-ctx.Done():
return
case <-timer.C:
c.RefreshKey(ctx, logger)
// Reset for the next interval
timer.Reset(cacheDuration)
}
}
}
type kesClient struct {
lock sync.RWMutex
type kesConn struct {
defaultKeyID string
client *kes.Client
}
var ( // compiler checks
_ KMS = (*kesClient)(nil)
_ KeyManager = (*kesClient)(nil)
_ IdentityManager = (*kesClient)(nil)
_ PolicyManager = (*kesClient)(nil)
)
// Stat returns the current KES status containing a
// list of KES endpoints and the default key ID.
func (c *kesClient) Stat(ctx context.Context) (Status, error) {
c.lock.RLock()
defer c.lock.RUnlock()
st, err := c.client.Status(ctx)
if err != nil {
return Status{}, err
}
endpoints := make([]string, len(c.client.Endpoints))
copy(endpoints, c.client.Endpoints)
return Status{
Name: "KES",
Endpoints: endpoints,
DefaultKey: c.defaultKeyID,
Details: st,
}, nil
}
// IsLocal returns true if the KMS is a local implementation
func (c *kesClient) IsLocal() bool {
return env.IsSet(EnvKMSSecretKey)
}
// List returns an array of local KMS Names
func (c *kesClient) List() []kes.KeyInfo {
var kmsSecret []kes.KeyInfo
envKMSSecretKey := env.Get(EnvKMSSecretKey, "")
values := strings.SplitN(envKMSSecretKey, ":", 2)
if len(values) == 2 {
kmsSecret = []kes.KeyInfo{
{
Name: values[0],
},
}
}
return kmsSecret
}
// Metrics retrieves server metrics in the Prometheus exposition format.
func (c *kesClient) Metrics(ctx context.Context) (kes.Metric, error) {
c.lock.RLock()
defer c.lock.RUnlock()
return c.client.Metrics(ctx)
}
// Version retrieves version information
func (c *kesClient) Version(ctx context.Context) (string, error) {
c.lock.RLock()
defer c.lock.RUnlock()
func (c *kesConn) Version(ctx context.Context) (string, error) {
return c.client.Version(ctx)
}
// APIs retrieves a list of supported API endpoints
func (c *kesClient) APIs(ctx context.Context) ([]kes.API, error) {
c.lock.RLock()
defer c.lock.RUnlock()
func (c *kesConn) APIs(ctx context.Context) ([]madmin.KMSAPI, error) {
APIs, err := c.client.APIs(ctx)
if err != nil {
if errors.Is(err, kes.ErrNotAllowed) {
return nil, ErrPermission
}
return nil, Error{
Code: http.StatusInternalServerError,
APICode: "kms:InternalError",
Err: "failed to list KMS APIs",
Cause: err,
}
}
return c.client.APIs(ctx)
list := make([]madmin.KMSAPI, 0, len(APIs))
for _, api := range APIs {
list = append(list, madmin.KMSAPI{
Method: api.Method,
Path: api.Path,
MaxBody: api.MaxBody,
Timeout: int64(api.Timeout.Truncate(time.Second).Seconds()),
})
}
return list, nil
}
// Stat returns the current KES status containing a
// list of KES endpoints and the default key ID.
func (c *kesConn) Status(ctx context.Context) (map[string]madmin.ItemState, error) {
if len(c.client.Endpoints) == 1 {
if _, err := c.client.Status(ctx); err != nil {
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
return nil, err
}
if errors.Is(err, kes.ErrNotAllowed) {
return nil, ErrPermission
}
return map[string]madmin.ItemState{
c.client.Endpoints[0]: madmin.ItemOffline,
}, nil
}
return map[string]madmin.ItemState{
c.client.Endpoints[0]: madmin.ItemOnline,
}, nil
}
type Result struct {
Endpoint string
ItemState madmin.ItemState
}
var wg sync.WaitGroup
results := make([]Result, len(c.client.Endpoints))
for i := range c.client.Endpoints {
wg.Add(1)
go func(i int) {
defer wg.Done()
client := kes.Client{
Endpoints: []string{c.client.Endpoints[i]},
HTTPClient: c.client.HTTPClient,
}
var item madmin.ItemState
if _, err := client.Status(ctx); err == nil {
item = madmin.ItemOnline
} else {
item = madmin.ItemOffline
}
results[i] = Result{
Endpoint: c.client.Endpoints[i],
ItemState: item,
}
}(i)
}
wg.Wait()
status := make(map[string]madmin.ItemState, len(results))
for _, r := range results {
if r.ItemState == madmin.ItemOnline {
status[r.Endpoint] = madmin.ItemOnline
} else {
status[r.Endpoint] = madmin.ItemOffline
}
}
return status, nil
}
func (c *kesConn) ListKeyNames(ctx context.Context, req *ListRequest) ([]string, string, error) {
return c.client.ListKeys(ctx, req.Prefix, req.Limit)
}
// CreateKey tries to create a new key at the KMS with the
@@ -258,32 +135,34 @@ func (c *kesClient) APIs(ctx context.Context) ([]kes.API, error) {
//
// If the a key with the same keyID already exists then
// CreateKey returns kes.ErrKeyExists.
func (c *kesClient) CreateKey(ctx context.Context, keyID string) error {
c.lock.RLock()
defer c.lock.RUnlock()
return c.client.CreateKey(ctx, keyID)
func (c *kesConn) CreateKey(ctx context.Context, req *CreateKeyRequest) error {
if err := c.client.CreateKey(ctx, req.Name); err != nil {
if errors.Is(err, kes.ErrKeyExists) {
return ErrKeyExists
}
if errors.Is(err, kes.ErrNotAllowed) {
return ErrPermission
}
return errKeyCreationFailed(err)
}
return nil
}
// DeleteKey deletes a key at the KMS with the given key ID.
// Please note that is a dangerous operation.
// Once a key has been deleted all data that has been encrypted with it cannot be decrypted
// anymore, and therefore, is lost.
func (c *kesClient) DeleteKey(ctx context.Context, keyID string) error {
c.lock.RLock()
defer c.lock.RUnlock()
return c.client.DeleteKey(ctx, keyID)
}
// ListKeys returns an iterator over all key names.
func (c *kesClient) ListKeys(ctx context.Context) (*kes.ListIter[string], error) {
c.lock.RLock()
defer c.lock.RUnlock()
return &kes.ListIter[string]{
NextFunc: c.client.ListKeys,
}, nil
func (c *kesConn) DeleteKey(ctx context.Context, req *DeleteKeyRequest) error {
if err := c.client.DeleteKey(ctx, req.Name); err != nil {
if errors.Is(err, kes.ErrKeyNotFound) {
return ErrKeyNotFound
}
if errors.Is(err, kes.ErrNotAllowed) {
return ErrPermission
}
return errKeyDeletionFailed(err)
}
return nil
}
// GenerateKey generates a new data encryption key using
@@ -294,34 +173,36 @@ func (c *kesClient) ListKeys(ctx context.Context) (*kes.ListIter[string], error)
// The context is associated and tied to the generated DEK.
// The same context must be provided when the generated
// key should be decrypted.
func (c *kesClient) GenerateKey(ctx context.Context, keyID string, cryptoCtx Context) (DEK, error) {
c.lock.RLock()
defer c.lock.RUnlock()
if keyID == "" {
keyID = c.defaultKeyID
}
ctxBytes, err := cryptoCtx.MarshalText()
func (c *kesConn) GenerateKey(ctx context.Context, req *GenerateKeyRequest) (DEK, error) {
aad, err := req.AssociatedData.MarshalText()
if err != nil {
return DEK{}, err
}
dek, err := c.client.GenerateKey(ctx, keyID, ctxBytes)
name := req.Name
if name == "" {
name = c.defaultKeyID
}
dek, err := c.client.GenerateKey(ctx, name, aad)
if err != nil {
return DEK{}, err
if errors.Is(err, kes.ErrKeyNotFound) {
return DEK{}, ErrKeyNotFound
}
if errors.Is(err, kes.ErrNotAllowed) {
return DEK{}, ErrPermission
}
return DEK{}, errKeyGenerationFailed(err)
}
return DEK{
KeyID: keyID,
KeyID: name,
Plaintext: dek.Plaintext,
Ciphertext: dek.Ciphertext,
}, nil
}
// ImportKey imports a cryptographic key into the KMS.
func (c *kesClient) ImportKey(ctx context.Context, keyID string, bytes []byte) error {
c.lock.RLock()
defer c.lock.RUnlock()
func (c *kesConn) ImportKey(ctx context.Context, keyID string, bytes []byte) error {
return c.client.ImportKey(ctx, keyID, &kes.ImportKeyRequest{
Key: bytes,
})
@@ -329,10 +210,7 @@ func (c *kesClient) ImportKey(ctx context.Context, keyID string, bytes []byte) e
// EncryptKey Encrypts and authenticates a (small) plaintext with the cryptographic key
// The plaintext must not exceed 1 MB
func (c *kesClient) EncryptKey(keyID string, plaintext []byte, ctx Context) ([]byte, error) {
c.lock.RLock()
defer c.lock.RUnlock()
func (c *kesConn) EncryptKey(keyID string, plaintext []byte, ctx Context) ([]byte, error) {
ctxBytes, err := ctx.MarshalText()
if err != nil {
return nil, err
@@ -343,184 +221,42 @@ func (c *kesClient) EncryptKey(keyID string, plaintext []byte, ctx Context) ([]b
// DecryptKey decrypts the ciphertext with the key at the KES
// server referenced by the key ID. The context must match the
// context value used to generate the ciphertext.
func (c *kesClient) DecryptKey(keyID string, ciphertext []byte, ctx Context) ([]byte, error) {
c.lock.RLock()
defer c.lock.RUnlock()
ctxBytes, err := ctx.MarshalText()
func (c *kesConn) Decrypt(ctx context.Context, req *DecryptRequest) ([]byte, error) {
aad, err := req.AssociatedData.MarshalText()
if err != nil {
return nil, err
}
return c.client.Decrypt(context.Background(), keyID, ciphertext, ctxBytes)
}
func (c *kesClient) DecryptAll(ctx context.Context, keyID string, ciphertexts [][]byte, contexts []Context) ([][]byte, error) {
c.lock.RLock()
defer c.lock.RUnlock()
plaintexts := make([][]byte, 0, len(ciphertexts))
for i := range ciphertexts {
ctxBytes, err := contexts[i].MarshalText()
if err != nil {
return nil, err
plaintext, err := c.client.Decrypt(context.Background(), req.Name, req.Ciphertext, aad)
if err != nil {
if errors.Is(err, kes.ErrKeyNotFound) {
return nil, ErrKeyNotFound
}
plaintext, err := c.client.Decrypt(ctx, keyID, ciphertexts[i], ctxBytes)
if err != nil {
return nil, err
if errors.Is(err, kes.ErrDecrypt) {
return nil, ErrDecrypt
}
plaintexts = append(plaintexts, plaintext)
if errors.Is(err, kes.ErrNotAllowed) {
return nil, ErrPermission
}
return nil, errDecryptionFailed(err)
}
return plaintexts, nil
return plaintext, nil
}
// HMAC generates the HMAC checksum of the given msg using the key
// with the given keyID at the KMS.
func (c *kesClient) HMAC(ctx context.Context, keyID string, msg []byte) ([]byte, error) {
c.lock.RLock()
defer c.lock.RUnlock()
return c.client.HMAC(context.Background(), keyID, msg)
}
// DescribePolicy describes a policy by returning its metadata.
// e.g. who created the policy at which point in time.
func (c *kesClient) DescribePolicy(ctx context.Context, policy string) (*kes.PolicyInfo, error) {
c.lock.RLock()
defer c.lock.RUnlock()
return c.client.DescribePolicy(ctx, policy)
}
// ListPolicies returns an iterator over all policy names.
func (c *kesClient) ListPolicies(ctx context.Context) (*kes.ListIter[string], error) {
c.lock.RLock()
defer c.lock.RUnlock()
return &kes.ListIter[string]{
NextFunc: c.client.ListPolicies,
}, nil
}
// GetPolicy gets a policy from KMS.
func (c *kesClient) GetPolicy(ctx context.Context, policy string) (*kes.Policy, error) {
c.lock.RLock()
defer c.lock.RUnlock()
return c.client.GetPolicy(ctx, policy)
}
// DescribeIdentity describes an identity by returning its metadata.
// e.g. which policy is currently assigned and whether its an admin identity.
func (c *kesClient) DescribeIdentity(ctx context.Context, identity string) (*kes.IdentityInfo, error) {
c.lock.RLock()
defer c.lock.RUnlock()
return c.client.DescribeIdentity(ctx, kes.Identity(identity))
}
// DescribeSelfIdentity describes the identity issuing the request.
// It infers the identity from the TLS client certificate used to authenticate.
// It returns the identity and policy information for the client identity.
func (c *kesClient) DescribeSelfIdentity(ctx context.Context) (*kes.IdentityInfo, *kes.Policy, error) {
c.lock.RLock()
defer c.lock.RUnlock()
return c.client.DescribeSelf(ctx)
}
// ListIdentities returns an iterator over all identities.
func (c *kesClient) ListIdentities(ctx context.Context) (*kes.ListIter[kes.Identity], error) {
c.lock.RLock()
defer c.lock.RUnlock()
return &kes.ListIter[kes.Identity]{
NextFunc: c.client.ListIdentities,
}, nil
}
// Verify verifies all KMS endpoints and returns details
func (c *kesClient) Verify(ctx context.Context) []VerifyResult {
c.lock.RLock()
defer c.lock.RUnlock()
results := []VerifyResult{}
kmsContext := Context{"MinIO admin API": "ServerInfoHandler"} // Context for a test key operation
for _, endpoint := range c.client.Endpoints {
client := kes.Client{
Endpoints: []string{endpoint},
HTTPClient: c.client.HTTPClient,
// MAC generates the checksum of the given req.Message using the key
// with the req.Name at the KMS.
func (c *kesConn) MAC(ctx context.Context, req *MACRequest) ([]byte, error) {
mac, err := c.client.HMAC(context.Background(), req.Name, req.Message)
if err != nil {
if errors.Is(err, kes.ErrKeyNotFound) {
return nil, ErrKeyNotFound
}
// 1. Get stats for the KES instance
state, err := client.Status(ctx)
if err != nil {
results = append(results, VerifyResult{Status: "offline", Endpoint: endpoint})
continue
if errors.Is(err, kes.ErrNotAllowed) {
return nil, ErrPermission
}
// 2. Generate a new key using the KMS.
kmsCtx, err := kmsContext.MarshalText()
if err != nil {
results = append(results, VerifyResult{Status: "offline", Endpoint: endpoint})
continue
}
result := VerifyResult{Status: "online", Endpoint: endpoint, Version: state.Version}
key, err := client.GenerateKey(ctx, env.Get(EnvKESKeyName, ""), kmsCtx)
if err != nil {
result.Encrypt = fmt.Sprintf("Encryption failed: %v", err)
} else {
result.Encrypt = "success"
}
// 3. Verify that we can indeed decrypt the (encrypted) key
decryptedKey, err := client.Decrypt(ctx, env.Get(EnvKESKeyName, ""), key.Ciphertext, kmsCtx)
switch {
case err != nil:
result.Decrypt = fmt.Sprintf("Decryption failed: %v", err)
case subtle.ConstantTimeCompare(key.Plaintext, decryptedKey) != 1:
result.Decrypt = "Decryption failed: decrypted key does not match generated key"
default:
result.Decrypt = "success"
}
results = append(results, result)
}
return results
}
// Logger interface permits access to module specific logging, in this case, for KMS
type Logger interface {
LogOnceIf(ctx context.Context, err error, id string, errKind ...interface{})
LogIf(ctx context.Context, err error, errKind ...interface{})
}
// RefreshKey checks the validity of the KMS Master Key
func (c *kesClient) RefreshKey(ctx context.Context, logger Logger) bool {
c.lock.RLock()
defer c.lock.RUnlock()
validKey := false
kmsContext := Context{"MinIO admin API": "ServerInfoHandler"} // Context for a test key operation
for _, endpoint := range c.client.Endpoints {
client := kes.Client{
Endpoints: []string{endpoint},
HTTPClient: c.client.HTTPClient,
}
// 1. Generate a new key using the KMS.
kmsCtx, err := kmsContext.MarshalText()
if err != nil {
logger.LogOnceIf(ctx, err, "refresh-kms-master-key")
validKey = false
break
}
_, err = client.GenerateKey(ctx, env.Get(EnvKESKeyName, ""), kmsCtx)
if err != nil {
logger.LogOnceIf(ctx, err, "refresh-kms-master-key")
validKey = false
break
}
if !validKey {
validKey = true
if kErr, ok := err.(kes.Error); ok && kErr.Status() == http.StatusNotImplemented {
return nil, ErrNotSupported
}
}
return validKey
return mac, nil
}