sr: use site replicator svcacct to sign STS session tokens (#19111)

This change is to decouple need for root credentials to match between
 site replication deployments.

 Also ensuring site replication config initialization is re-tried until
 it succeeds, this deoendency is critical to STS flow in site replication
 scenario.
This commit is contained in:
Poorna 2024-02-26 13:26:18 -08:00 committed by Harshavardhana
parent 30c2596512
commit b1351e2dee
8 changed files with 148 additions and 25 deletions

View File

@ -306,6 +306,13 @@ func checkClaimsFromToken(r *http.Request, cred auth.Credentials) (map[string]in
}
if token != "" {
var err error
if globalSiteReplicationSys.isEnabled() && cred.AccessKey != siteReplicatorSvcAcc {
secret, err = getTokenSigningKey()
if err != nil {
return nil, toAPIErrorCode(r.Context(), err)
}
}
claims, err := getClaimsFromTokenWithSecret(token, secret)
if err != nil {
return nil, toAPIErrorCode(r.Context(), err)

View File

@ -311,7 +311,8 @@ var (
// Time when the server is started
globalBootTime = UTCNow()
globalActiveCred auth.Credentials
globalActiveCred auth.Credentials
globalSiteReplicatorCred siteReplicatorCred
// Captures if root credentials are set via ENV.
globalCredViaEnv bool

View File

@ -1863,7 +1863,12 @@ func (store *IAMStoreSys) GetAllParentUsers() map[string]ParentUserInfo {
if cred.IsServiceAccount() {
claims, err = getClaimsFromTokenWithSecret(cred.SessionToken, cred.SecretKey)
} else if cred.IsTemp() {
claims, err = getClaimsFromTokenWithSecret(cred.SessionToken, globalActiveCred.SecretKey)
var secretKey string
secretKey, err = getTokenSigningKey()
if err != nil {
continue
}
claims, err = getClaimsFromTokenWithSecret(cred.SessionToken, secretKey)
}
if err != nil {
@ -2528,8 +2533,12 @@ func (store *IAMStoreSys) LoadUser(ctx context.Context, accessKey string) {
func extractJWTClaims(u UserIdentity) (*jwt.MapClaims, error) {
jwtClaims, err := auth.ExtractClaims(u.Credentials.SessionToken, u.Credentials.SecretKey)
if err != nil {
// Session tokens for STS creds will be generated with root secret
jwtClaims, err = auth.ExtractClaims(u.Credentials.SessionToken, globalActiveCred.SecretKey)
secretKey, err := getTokenSigningKey()
if err != nil {
return nil, err
}
// Session tokens for STS creds will be generated with root secret or site-replicator-0 secret
jwtClaims, err = auth.ExtractClaims(u.Credentials.SessionToken, secretKey)
if err != nil {
return nil, err
}

View File

@ -201,6 +201,13 @@ func (sys *IAMSys) Load(ctx context.Context, firstTime bool) error {
atomic.StoreUint64(&sys.LastRefreshTimeUnixNano, uint64(loadStartTime.Add(loadDuration).UnixNano()))
atomic.AddUint64(&sys.TotalRefreshSuccesses, 1)
if !globalSiteReplicatorCred.IsValid() {
sa, _, err := sys.getServiceAccount(ctx, siteReplicatorSvcAcc)
if err == nil {
globalSiteReplicatorCred.Set(sa.Credentials)
}
}
if firstTime {
bootstrapTraceMsg(fmt.Sprintf("globalIAMSys.Load(): (duration: %s)", loadDuration))
}
@ -1394,7 +1401,12 @@ func (sys *IAMSys) updateGroupMembershipsForLDAP(ctx context.Context) {
jwtClaims, err = auth.ExtractClaims(cred.SessionToken, globalActiveCred.SecretKey)
}
} else {
jwtClaims, err = auth.ExtractClaims(cred.SessionToken, globalActiveCred.SecretKey)
var secretKey string
secretKey, err = getTokenSigningKey()
if err != nil {
continue
}
jwtClaims, err = auth.ExtractClaims(cred.SessionToken, secretKey)
}
if err != nil {
// skip this cred - session token seems invalid

View File

@ -26,6 +26,7 @@ import (
"encoding/xml"
"errors"
"fmt"
"math/rand"
"net/url"
"reflect"
"runtime"
@ -227,19 +228,30 @@ type srStateData struct {
// Init - initialize the site replication manager.
func (c *SiteReplicationSys) Init(ctx context.Context, objAPI ObjectLayer) error {
go c.startHealRoutine(ctx, objAPI)
r := rand.New(rand.NewSource(time.Now().UnixNano()))
for {
err := c.loadFromDisk(ctx, objAPI)
if err == errConfigNotFound {
return nil
}
if err == nil {
break
}
logger.LogOnceIf(context.Background(), fmt.Errorf("unable to initialize site replication subsystem: (%w)", err), "site-relication-init")
err := c.loadFromDisk(ctx, objAPI)
if err == errConfigNotFound {
return nil
duration := time.Duration(r.Float64() * float64(time.Minute))
if duration < time.Second {
// Make sure to sleep at least a second to avoid high CPU ticks.
duration = time.Second
}
time.Sleep(duration)
}
c.RLock()
defer c.RUnlock()
if c.enabled {
logger.Info("Cluster replication initialized")
}
return err
return nil
}
func (c *SiteReplicationSys) loadFromDisk(ctx context.Context, objAPI ObjectLayer) error {
@ -582,6 +594,9 @@ func (c *SiteReplicationSys) AddPeerClusters(ctx context.Context, psites []madmi
}, nil
}
if !globalSiteReplicatorCred.IsValid() {
globalSiteReplicatorCred.Set(svcCred)
}
result := madmin.ReplicateAddStatus{
Success: true,
Status: madmin.ReplicateAddStatusSuccess,
@ -607,9 +622,9 @@ func (c *SiteReplicationSys) PeerJoinReq(ctx context.Context, arg madmin.SRPeerJ
return errSRSelfNotFound
}
_, _, err := globalIAMSys.GetServiceAccount(ctx, arg.SvcAcctAccessKey)
sa, _, err := globalIAMSys.GetServiceAccount(ctx, arg.SvcAcctAccessKey)
if err == errNoSuchServiceAccount {
_, _, err = globalIAMSys.NewServiceAccount(ctx, arg.SvcAcctParent, nil, newServiceAccountOpts{
sa, _, err = globalIAMSys.NewServiceAccount(ctx, arg.SvcAcctParent, nil, newServiceAccountOpts{
accessKey: arg.SvcAcctAccessKey,
secretKey: arg.SvcAcctSecretKey,
allowSiteReplicatorAccount: arg.SvcAcctAccessKey == siteReplicatorSvcAcc,
@ -641,6 +656,10 @@ func (c *SiteReplicationSys) PeerJoinReq(ctx context.Context, arg madmin.SRPeerJ
if err = c.saveToDisk(ctx, state); err != nil {
return errSRBackendIssue(fmt.Errorf("unable to save cluster-replication state to drive on %s: %v", ourName, err))
}
if !globalSiteReplicatorCred.IsValid() {
globalSiteReplicatorCred.Set(sa)
}
return nil
}
@ -1417,9 +1436,13 @@ func (c *SiteReplicationSys) PeerSTSAccHandler(ctx context.Context, stsCred *mad
}
}
}
secretKey, err := getTokenSigningKey()
if err != nil {
return errSRInvalidRequest(err)
}
// Verify the session token of the stsCred
claims, err := auth.ExtractClaims(stsCred.SessionToken, globalActiveCred.SecretKey)
claims, err := auth.ExtractClaims(stsCred.SessionToken, secretKey)
if err != nil {
return fmt.Errorf("STS credential could not be verified: %w", err)
}
@ -6170,3 +6193,37 @@ func ilmExpiryReplicationEnabled(sites map[string]madmin.PeerInfo) bool {
}
return flag
}
type siteReplicatorCred struct {
Creds auth.Credentials
sync.RWMutex
}
// Get or attempt to load site replicator credentials from disk.
func (s *siteReplicatorCred) Get(ctx context.Context) (auth.Credentials, error) {
s.RLock()
if s.Creds.IsValid() {
s.RUnlock()
return s.Creds, nil
}
s.RUnlock()
s.Lock()
defer s.Unlock()
var m map[string]UserIdentity
if err := globalIAMSys.store.loadUser(ctx, siteReplicatorSvcAcc, svcUser, m); err != nil {
return auth.Credentials{}, err
}
return m[siteReplicatorSvcAcc].Credentials, nil
}
func (s *siteReplicatorCred) Set(c auth.Credentials) {
s.Lock()
defer s.Unlock()
s.Creds = c
}
func (s *siteReplicatorCred) IsValid() bool {
s.RLock()
defer s.RUnlock()
return s.Creds.IsValid()
}

View File

@ -190,6 +190,19 @@ func parseForm(r *http.Request) error {
return nil
}
// getTokenSigningKey returns secret key used to sign JWT session tokens
func getTokenSigningKey() (string, error) {
secret := globalActiveCred.SecretKey
if globalSiteReplicationSys.isEnabled() {
c, err := globalSiteReplicatorCred.Get(GlobalContext)
if err != nil {
return "", err
}
return c.SecretKey, nil
}
return secret, nil
}
// AssumeRole - implementation of AWS STS API AssumeRole to get temporary
// credentials for regular users on Minio.
// https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html
@ -276,7 +289,12 @@ func (sts *stsAPIHandlers) AssumeRole(w http.ResponseWriter, r *http.Request) {
claims[policy.SessionPolicyName] = base64.StdEncoding.EncodeToString([]byte(sessionPolicyStr))
}
secret := globalActiveCred.SecretKey
secret, err := getTokenSigningKey()
if err != nil {
writeSTSErrorResponse(ctx, w, ErrSTSInternalError, err)
return
}
cred, err := auth.GetNewCredentialsWithMetadata(claims, secret)
if err != nil {
writeSTSErrorResponse(ctx, w, ErrSTSInternalError, err)
@ -453,7 +471,11 @@ func (sts *stsAPIHandlers) AssumeRoleWithSSO(w http.ResponseWriter, r *http.Requ
claims[policy.SessionPolicyName] = base64.StdEncoding.EncodeToString([]byte(sessionPolicyStr))
}
secret := globalActiveCred.SecretKey
secret, err := getTokenSigningKey()
if err != nil {
writeSTSErrorResponse(ctx, w, ErrSTSInternalError, err)
return
}
cred, err := auth.GetNewCredentialsWithMetadata(claims, secret)
if err != nil {
writeSTSErrorResponse(ctx, w, ErrSTSInternalError, err)
@ -676,7 +698,12 @@ func (sts *stsAPIHandlers) AssumeRoleWithLDAPIdentity(w http.ResponseWriter, r *
claims[policy.SessionPolicyName] = base64.StdEncoding.EncodeToString([]byte(sessionPolicyStr))
}
secret := globalActiveCred.SecretKey
secret, err := getTokenSigningKey()
if err != nil {
writeSTSErrorResponse(ctx, w, ErrSTSInternalError, err)
return
}
cred, err := auth.GetNewCredentialsWithMetadata(claims, secret)
if err != nil {
writeSTSErrorResponse(ctx, w, ErrSTSInternalError, err)
@ -851,8 +878,12 @@ func (sts *stsAPIHandlers) AssumeRoleWithCertificate(w http.ResponseWriter, r *h
claims[audClaim] = certificate.Subject.Organization
claims[issClaim] = certificate.Issuer.CommonName
claims[parentClaim] = parentUser
tmpCredentials, err := auth.GetNewCredentialsWithMetadata(claims, globalActiveCred.SecretKey)
secretKey, err := getTokenSigningKey()
if err != nil {
writeSTSErrorResponse(ctx, w, ErrSTSInternalError, err)
return
}
tmpCredentials, err := auth.GetNewCredentialsWithMetadata(claims, secretKey)
if err != nil {
writeSTSErrorResponse(ctx, w, ErrSTSInternalError, err)
return
@ -978,8 +1009,12 @@ func (sts *stsAPIHandlers) AssumeRoleWithCustomToken(w http.ResponseWriter, r *h
claims[k] = v
}
}
tmpCredentials, err := auth.GetNewCredentialsWithMetadata(claims, globalActiveCred.SecretKey)
secretKey, err := getTokenSigningKey()
if err != nil {
writeSTSErrorResponse(ctx, w, ErrSTSInternalError, err)
return
}
tmpCredentials, err := auth.GetNewCredentialsWithMetadata(claims, secretKey)
if err != nil {
writeSTSErrorResponse(ctx, w, ErrSTSInternalError, err)
return

View File

@ -22,7 +22,7 @@ The following Bucket features will **not be replicated**, is designed to differ
## Pre-requisites
- Initially, only **one** of the sites added for replication may have data. After site-replication is successfully configured, this data is replicated to the other (initially empty) sites. Subsequently, objects may be written to any of the sites, and they will be replicated to all other sites.
- All sites **must** have the same deployment credentials (i.e. `MINIO_ROOT_USER`, `MINIO_ROOT_PASSWORD`).
- **Removing a site** is not allowed from a set of replicated sites once configured.
- All sites must be using the **same** external IDP(s) if any.
- For [SSE-S3 or SSE-KMS encryption via KMS](https://min.io/docs/minio/linux/operations/server-side-encryption.html "MinIO KMS Guide"), all sites **must** have access to a central KMS deployment. This can be achieved via a central KES server or multiple KES servers (say one per site) connected via a central KMS (Vault) server.
@ -56,3 +56,8 @@ mc admin replicate add minio1 minio2 minio3
```sh
mc admin replicate info minio1
```
** Note **
Previously, site replication required the root credentials of peer sites to be identical. This is no longer necessary because STS tokens are now signed with the site replicator service account credentials, thus allowing flexibility in the independent management of root accounts across sites and the ability to disable root accounts eventually.
However, this means that STS tokens signed previously by root credentials will no longer be valid upon upgrading to the latest version with this change. Please re-generate them as you usually do. Additionally, if site replication is ever removed - the STS tokens will become invalid, regenerate them as you usually do.

View File

@ -6,13 +6,10 @@ exit_1() {
echo "minio1 ============"
cat /tmp/minio1_1.log
cat /tmp/minio1_2.log
echo "minio2 ============"
cat /tmp/minio2_1.log
cat /tmp/minio2_2.log
echo "minio3 ============"
cat /tmp/minio3_1.log
cat /tmp/minio3_2.log
exit 1
}