fix: IAM store fallback to list users and policies from disk (#10787)

Bonus fixes, remove package retry it is harder to get it
right, also manage context remove it such that we don't have
to rely on it anymore instead use a simple Jitter retry.
This commit is contained in:
Harshavardhana
2020-11-02 17:52:13 -08:00
committed by GitHub
parent 4ea31da889
commit 68de5a6f6a
9 changed files with 131 additions and 319 deletions

View File

@@ -26,22 +26,25 @@ import (
// commonTime returns a maximally occurring time from a list of time.
func commonTime(modTimes []time.Time) (modTime time.Time, count int) {
var maxima int // Counter for remembering max occurrence of elements.
timeOccurenceMap := make(map[time.Time]int)
timeOccurenceMap := make(map[int64]int)
// Ignore the uuid sentinel and count the rest.
for _, time := range modTimes {
if time.Equal(timeSentinel) {
continue
}
timeOccurenceMap[time]++
timeOccurenceMap[time.UnixNano()]++
}
// Find the common cardinality from previously collected
// occurrences of elements.
for time, count := range timeOccurenceMap {
if count > maxima || (count == maxima && time.After(modTime)) {
for nano, count := range timeOccurenceMap {
t := time.Unix(0, nano)
if count > maxima || (count == maxima && t.After(modTime)) {
maxima = count
modTime = time
modTime = t
}
}
// Return the collected common uuid.
return modTime, maxima
}

View File

@@ -1202,15 +1202,7 @@ func (z *erasureServerSets) deleteAll(ctx context.Context, bucket, prefix string
wg.Add(1)
go func(disk StorageAPI) {
defer wg.Done()
if err := disk.Delete(ctx, bucket, prefix, true); err != nil {
if !IsErrIgnored(err, []error{
errDiskNotFound,
errVolumeNotFound,
errFileNotFound,
}...) {
logger.LogOnceIf(ctx, err, disk.String())
}
}
disk.Delete(ctx, bucket, prefix, true)
}(disk)
}
}

View File

@@ -20,6 +20,7 @@ import (
"context"
"fmt"
"io"
"math/rand"
"os"
"path"
"time"
@@ -27,7 +28,6 @@ import (
"github.com/minio/minio/cmd/config"
"github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/lock"
"github.com/minio/minio/pkg/retry"
)
// FS format version strings.
@@ -340,31 +340,28 @@ func formatFSFixDeploymentID(ctx context.Context, fsFormatPath string) error {
return time.Now().Round(time.Second).Sub(formatStartTime).String()
}
retryCtx, cancel := context.WithCancel(ctx)
// Indicate to our routine to exit cleanly upon return.
defer cancel()
r := rand.New(rand.NewSource(time.Now().UnixNano()))
var wlk *lock.LockedFile
retryCh := retry.NewTimerWithJitter(retryCtx, time.Second, 30*time.Second, retry.MaxJitter)
var stop bool
for !stop {
select {
case <-retryCh:
case <-ctx.Done():
return fmt.Errorf("Initializing FS format stopped gracefully")
default:
wlk, err = lock.TryLockedOpenFile(fsFormatPath, os.O_RDWR, 0)
if err == lock.ErrAlreadyLocked {
// Lock already present, sleep and attempt again
logger.Info("Another minio process(es) might be holding a lock to the file %s. Please kill that minio process(es) (elapsed %s)\n", fsFormatPath, getElapsedTime())
time.Sleep(time.Duration(r.Float64() * float64(5*time.Second)))
continue
}
if err != nil {
return err
}
stop = true
case <-ctx.Done():
return fmt.Errorf("Initializing FS format stopped gracefully")
}
stop = true
}
defer wlk.Close()
if err = jsonLoad(wlk, format); err != nil {

View File

@@ -421,7 +421,6 @@ func (iamOS *IAMObjectStore) loadAll(ctx context.Context, sys *IAMSys) error {
}
// Sets default canned policies, if none are set.
setDefaultCannedPolicies(sys.iamPolicyDocsMap)
iamOS.unlock()
if isMinIOUsersSys {

View File

@@ -23,6 +23,7 @@ import (
"encoding/json"
"errors"
"fmt"
"math/rand"
"strings"
"time"
@@ -33,7 +34,6 @@ import (
"github.com/minio/minio/pkg/auth"
iampolicy "github.com/minio/minio/pkg/iam/policy"
"github.com/minio/minio/pkg/madmin"
"github.com/minio/minio/pkg/retry"
)
// UsersSysType - defines the type of users and groups system that is
@@ -443,11 +443,15 @@ func (sys *IAMSys) Init(ctx context.Context, objAPI ObjectLayer) {
// allocate dynamic timeout once before the loop
iamLockTimeout := newDynamicTimeout(5*time.Second, 3*time.Second)
for range retry.NewTimerWithJitter(retryCtx, time.Second, 5*time.Second, retry.MaxJitter) {
r := rand.New(rand.NewSource(time.Now().UnixNano()))
var err error
for {
// let one of the server acquire the lock, if not let them timeout.
// which shall be retried again by this loop.
if err := txnLk.GetLock(iamLockTimeout); err != nil {
if err = txnLk.GetLock(iamLockTimeout); err != nil {
logger.Info("Waiting for all MinIO IAM sub-system to be initialized.. trying to acquire lock")
time.Sleep(time.Duration(r.Float64() * float64(5*time.Second)))
continue
}
@@ -455,7 +459,7 @@ func (sys *IAMSys) Init(ctx context.Context, objAPI ObjectLayer) {
// **** WARNING ****
// Migrating to encrypted backend on etcd should happen before initialization of
// IAM sub-system, make sure that we do not move the above codeblock elsewhere.
if err := migrateIAMConfigsEtcdToEncrypted(ctx, globalEtcdClient); err != nil {
if err = migrateIAMConfigsEtcdToEncrypted(ctx, globalEtcdClient); err != nil {
txnLk.Unlock()
logger.LogIf(ctx, fmt.Errorf("Unable to decrypt an encrypted ETCD backend for IAM users and policies: %w", err))
logger.LogIf(ctx, errors.New("IAM sub-system is partially initialized, some users may not be available"))
@@ -469,11 +473,10 @@ func (sys *IAMSys) Init(ctx context.Context, objAPI ObjectLayer) {
}
// Migrate IAM configuration, if necessary.
if err := sys.doIAMConfigMigration(ctx); err != nil {
if err = sys.doIAMConfigMigration(ctx); err != nil {
txnLk.Unlock()
if errors.Is(err, errDiskNotFound) ||
errors.Is(err, errConfigNotFound) ||
errors.Is(err, context.Canceled) ||
errors.Is(err, context.DeadlineExceeded) ||
errors.As(err, &rquorum) ||
errors.As(err, &wquorum) ||
@@ -491,11 +494,10 @@ func (sys *IAMSys) Init(ctx context.Context, objAPI ObjectLayer) {
break
}
err := sys.store.loadAll(ctx, sys)
err = sys.store.loadAll(ctx, sys)
// Invalidate the old cred always, even upon error to avoid any leakage.
globalOldCred = auth.Credentials{}
if err != nil {
logger.LogIf(ctx, fmt.Errorf("Unable to initialize IAM sub-system, some users may not be available %w", err))
}
@@ -579,12 +581,18 @@ func (sys *IAMSys) ListPolicies() (map[string]iampolicy.Policy, error) {
}
sys.store.rlock()
defer sys.store.runlock()
fallback := sys.storeFallback
sys.store.runlock()
if sys.storeFallback {
return nil, errIAMNotInitialized
if fallback {
if err := sys.store.loadAll(context.Background(), sys); err != nil {
return nil, err
}
}
sys.store.rlock()
defer sys.store.runlock()
policyDocsMap := make(map[string]iampolicy.Policy, len(sys.iamPolicyDocsMap))
for k, v := range sys.iamPolicyDocsMap {
policyDocsMap[k] = v
@@ -741,19 +749,25 @@ func (sys *IAMSys) ListUsers() (map[string]madmin.UserInfo, error) {
return nil, errServerNotInitialized
}
var users = make(map[string]madmin.UserInfo)
sys.store.rlock()
defer sys.store.runlock()
if sys.usersSysType != MinIOUsersSysType {
return nil, errIAMActionNotAllowed
}
if sys.storeFallback {
return nil, errIAMNotInitialized
sys.store.rlock()
fallback := sys.storeFallback
sys.store.runlock()
if fallback {
if err := sys.store.loadAll(context.Background(), sys); err != nil {
return nil, err
}
}
sys.store.rlock()
defer sys.store.runlock()
var users = make(map[string]madmin.UserInfo)
for k, v := range sys.iamUsersMap {
if !v.IsTemp() && !v.IsServiceAccount() {
users[k] = madmin.UserInfo{
@@ -975,14 +989,19 @@ func (sys *IAMSys) ListServiceAccounts(ctx context.Context, accessKey string) ([
}
sys.store.rlock()
defer sys.store.runlock()
fallback := sys.storeFallback
sys.store.runlock()
if sys.storeFallback {
return nil, errIAMNotInitialized
if fallback {
if err := sys.store.loadAll(context.Background(), sys); err != nil {
return nil, err
}
}
var serviceAccounts []string
sys.store.rlock()
defer sys.store.runlock()
var serviceAccounts []string
for k, v := range sys.iamUsersMap {
if v.IsServiceAccount() && v.ParentUser == accessKey {
serviceAccounts = append(serviceAccounts, k)
@@ -1083,13 +1102,13 @@ func (sys *IAMSys) SetUserSecretKey(accessKey string, secretKey string) error {
return errServerNotInitialized
}
sys.store.lock()
defer sys.store.unlock()
if sys.usersSysType != MinIOUsersSysType {
return errIAMActionNotAllowed
}
sys.store.lock()
defer sys.store.unlock()
cred, ok := sys.iamUsersMap[accessKey]
if !ok {
return errNoSuchUser
@@ -1181,13 +1200,13 @@ func (sys *IAMSys) AddUsersToGroup(group string, members []string) error {
return errInvalidArgument
}
sys.store.lock()
defer sys.store.unlock()
if sys.usersSysType != MinIOUsersSysType {
return errIAMActionNotAllowed
}
sys.store.lock()
defer sys.store.unlock()
// Validate that all members exist.
for _, member := range members {
cr, ok := sys.iamUsersMap[member]
@@ -1241,13 +1260,13 @@ func (sys *IAMSys) RemoveUsersFromGroup(group string, members []string) error {
return errInvalidArgument
}
sys.store.lock()
defer sys.store.unlock()
if sys.usersSysType != MinIOUsersSysType {
return errIAMActionNotAllowed
}
sys.store.lock()
defer sys.store.unlock()
// Validate that all members exist.
for _, member := range members {
cr, ok := sys.iamUsersMap[member]
@@ -1317,13 +1336,13 @@ func (sys *IAMSys) SetGroupStatus(group string, enabled bool) error {
return errServerNotInitialized
}
sys.store.lock()
defer sys.store.unlock()
if sys.usersSysType != MinIOUsersSysType {
return errIAMActionNotAllowed
}
sys.store.lock()
defer sys.store.unlock()
if group == "" {
return errInvalidArgument
}
@@ -1392,20 +1411,28 @@ func (sys *IAMSys) ListGroups() (r []string, err error) {
return r, errServerNotInitialized
}
sys.store.rlock()
defer sys.store.runlock()
if sys.storeFallback {
return nil, errIAMNotInitialized
}
if sys.usersSysType != MinIOUsersSysType {
return nil, errIAMActionNotAllowed
}
sys.store.rlock()
fallback := sys.storeFallback
sys.store.runlock()
if fallback {
if err := sys.store.loadAll(context.Background(), sys); err != nil {
return nil, err
}
}
sys.store.rlock()
defer sys.store.runlock()
r = make([]string, 0, len(sys.iamGroupsMap))
for k := range sys.iamGroupsMap {
r = append(r, k)
}
return r, nil
}
@@ -1909,9 +1936,6 @@ func (sys *IAMSys) removeGroupFromMembershipsMap(group string) {
// EnableLDAPSys - enable ldap system users type.
func (sys *IAMSys) EnableLDAPSys() {
sys.store.lock()
defer sys.store.unlock()
sys.usersSysType = LDAPUsersSysType
}

View File

@@ -21,6 +21,7 @@ import (
"crypto/tls"
"errors"
"fmt"
"math/rand"
"net"
"os"
"os/signal"
@@ -38,7 +39,6 @@ import (
"github.com/minio/minio/pkg/certs"
"github.com/minio/minio/pkg/color"
"github.com/minio/minio/pkg/env"
"github.com/minio/minio/pkg/retry"
)
// ServerFlags - server command specific flags
@@ -209,21 +209,12 @@ func initServer(ctx context.Context, newObject ObjectLayer) error {
// Initialize IAM store
globalIAMSys.InitStore(newObject)
// Create cancel context to control 'newRetryTimer' go routine.
retryCtx, cancel := context.WithCancel(ctx)
// Indicate to our routine to exit cleanly upon return.
defer cancel()
// Make sure to hold lock for entire migration to avoid
// such that only one server should migrate the entire config
// at a given time, this big transaction lock ensures this
// appropriately. This is also true for rotation of encrypted
// content.
txnLk := newObject.NewNSLock(retryCtx, minioMetaBucket, minioConfigPrefix+"/transaction.lock")
// allocate dynamic timeout once before the loop
configLockTimeout := newDynamicTimeout(5*time.Second, 3*time.Second)
txnLk := newObject.NewNSLock(ctx, minioMetaBucket, minioConfigPrefix+"/transaction.lock")
// **** WARNING ****
// Migrating to encrypted backend should happen before initialization of any
@@ -238,12 +229,25 @@ func initServer(ctx context.Context, newObject ObjectLayer) error {
rquorum := InsufficientReadQuorum{}
wquorum := InsufficientWriteQuorum{}
r := rand.New(rand.NewSource(time.Now().UnixNano()))
lockTimeout := newDynamicTimeout(5*time.Second, 3*time.Second)
var err error
for range retry.NewTimerWithJitter(retryCtx, 500*time.Millisecond, time.Second, retry.MaxJitter) {
for {
select {
case <-ctx.Done():
// Retry was canceled successfully.
return fmt.Errorf("Initializing sub-systems stopped gracefully %w", ctx.Err())
default:
}
// let one of the server acquire the lock, if not let them timeout.
// which shall be retried again by this loop.
if err = txnLk.GetLock(configLockTimeout); err != nil {
if err = txnLk.GetLock(lockTimeout); err != nil {
logger.Info("Waiting for all MinIO sub-systems to be initialized.. trying to acquire lock")
time.Sleep(time.Duration(r.Float64() * float64(5*time.Second)))
continue
}
@@ -279,20 +283,13 @@ func initServer(ctx context.Context, newObject ObjectLayer) error {
errors.As(err, &wquorum) ||
isErrBucketNotFound(err) {
logger.Info("Waiting for all MinIO sub-systems to be initialized.. possible cause (%v)", err)
time.Sleep(time.Duration(r.Float64() * float64(5*time.Second)))
continue
}
// Any other unhandled return right here.
return fmt.Errorf("Unable to initialize sub-systems: %w", err)
}
// Return an error when retry is canceled or deadlined
if err = retryCtx.Err(); err != nil {
return err
}
// Retry was canceled successfully.
return errors.New("Initializing sub-systems stopped gracefully")
}
func initAllSubsystems(ctx context.Context, newObject ObjectLayer) (err error) {
@@ -351,17 +348,6 @@ func initAllSubsystems(ctx context.Context, newObject ObjectLayer) (err error) {
go initFederatorBackend(buckets, newObject)
}
if globalCacheConfig.Enabled {
// initialize the new disk cache objects.
var cacheAPI CacheObjectLayer
cacheAPI, err = newServerCacheObjects(ctx, globalCacheConfig)
logger.FatalIf(err, "Unable to initialize disk caching")
globalObjLayerMutex.Lock()
globalCacheObjectAPI = cacheAPI
globalObjLayerMutex.Unlock()
}
// Initialize bucket metadata sub-system.
globalBucketMetadataSys.Init(ctx, buckets, newObject)
@@ -509,6 +495,17 @@ func serverMain(ctx *cli.Context) {
}
}
if globalCacheConfig.Enabled {
// initialize the new disk cache objects.
var cacheAPI CacheObjectLayer
cacheAPI, err = newServerCacheObjects(GlobalContext, globalCacheConfig)
logger.FatalIf(err, "Unable to initialize disk caching")
globalObjLayerMutex.Lock()
globalCacheObjectAPI = cacheAPI
globalObjLayerMutex.Unlock()
}
// Initialize users credentials and policies in background right after config has initialized.
go globalIAMSys.Init(GlobalContext, newObject)