mirror of
https://github.com/minio/minio.git
synced 2025-11-07 21:02:58 -05:00
fix: IAM store fallback to list users and policies from disk (#10787)
Bonus fixes, remove package retry it is harder to get it right, also manage context remove it such that we don't have to rely on it anymore instead use a simple Jitter retry.
This commit is contained in:
@@ -26,22 +26,25 @@ import (
|
||||
// commonTime returns a maximally occurring time from a list of time.
|
||||
func commonTime(modTimes []time.Time) (modTime time.Time, count int) {
|
||||
var maxima int // Counter for remembering max occurrence of elements.
|
||||
timeOccurenceMap := make(map[time.Time]int)
|
||||
timeOccurenceMap := make(map[int64]int)
|
||||
// Ignore the uuid sentinel and count the rest.
|
||||
for _, time := range modTimes {
|
||||
if time.Equal(timeSentinel) {
|
||||
continue
|
||||
}
|
||||
timeOccurenceMap[time]++
|
||||
timeOccurenceMap[time.UnixNano()]++
|
||||
}
|
||||
|
||||
// Find the common cardinality from previously collected
|
||||
// occurrences of elements.
|
||||
for time, count := range timeOccurenceMap {
|
||||
if count > maxima || (count == maxima && time.After(modTime)) {
|
||||
for nano, count := range timeOccurenceMap {
|
||||
t := time.Unix(0, nano)
|
||||
if count > maxima || (count == maxima && t.After(modTime)) {
|
||||
maxima = count
|
||||
modTime = time
|
||||
modTime = t
|
||||
}
|
||||
}
|
||||
|
||||
// Return the collected common uuid.
|
||||
return modTime, maxima
|
||||
}
|
||||
|
||||
@@ -1202,15 +1202,7 @@ func (z *erasureServerSets) deleteAll(ctx context.Context, bucket, prefix string
|
||||
wg.Add(1)
|
||||
go func(disk StorageAPI) {
|
||||
defer wg.Done()
|
||||
if err := disk.Delete(ctx, bucket, prefix, true); err != nil {
|
||||
if !IsErrIgnored(err, []error{
|
||||
errDiskNotFound,
|
||||
errVolumeNotFound,
|
||||
errFileNotFound,
|
||||
}...) {
|
||||
logger.LogOnceIf(ctx, err, disk.String())
|
||||
}
|
||||
}
|
||||
disk.Delete(ctx, bucket, prefix, true)
|
||||
}(disk)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"math/rand"
|
||||
"os"
|
||||
"path"
|
||||
"time"
|
||||
@@ -27,7 +28,6 @@ import (
|
||||
"github.com/minio/minio/cmd/config"
|
||||
"github.com/minio/minio/cmd/logger"
|
||||
"github.com/minio/minio/pkg/lock"
|
||||
"github.com/minio/minio/pkg/retry"
|
||||
)
|
||||
|
||||
// FS format version strings.
|
||||
@@ -340,31 +340,28 @@ func formatFSFixDeploymentID(ctx context.Context, fsFormatPath string) error {
|
||||
return time.Now().Round(time.Second).Sub(formatStartTime).String()
|
||||
}
|
||||
|
||||
retryCtx, cancel := context.WithCancel(ctx)
|
||||
// Indicate to our routine to exit cleanly upon return.
|
||||
defer cancel()
|
||||
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
|
||||
var wlk *lock.LockedFile
|
||||
retryCh := retry.NewTimerWithJitter(retryCtx, time.Second, 30*time.Second, retry.MaxJitter)
|
||||
var stop bool
|
||||
for !stop {
|
||||
select {
|
||||
case <-retryCh:
|
||||
case <-ctx.Done():
|
||||
return fmt.Errorf("Initializing FS format stopped gracefully")
|
||||
default:
|
||||
wlk, err = lock.TryLockedOpenFile(fsFormatPath, os.O_RDWR, 0)
|
||||
if err == lock.ErrAlreadyLocked {
|
||||
// Lock already present, sleep and attempt again
|
||||
logger.Info("Another minio process(es) might be holding a lock to the file %s. Please kill that minio process(es) (elapsed %s)\n", fsFormatPath, getElapsedTime())
|
||||
time.Sleep(time.Duration(r.Float64() * float64(5*time.Second)))
|
||||
continue
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stop = true
|
||||
case <-ctx.Done():
|
||||
return fmt.Errorf("Initializing FS format stopped gracefully")
|
||||
}
|
||||
stop = true
|
||||
}
|
||||
|
||||
defer wlk.Close()
|
||||
|
||||
if err = jsonLoad(wlk, format); err != nil {
|
||||
|
||||
@@ -421,7 +421,6 @@ func (iamOS *IAMObjectStore) loadAll(ctx context.Context, sys *IAMSys) error {
|
||||
}
|
||||
// Sets default canned policies, if none are set.
|
||||
setDefaultCannedPolicies(sys.iamPolicyDocsMap)
|
||||
|
||||
iamOS.unlock()
|
||||
|
||||
if isMinIOUsersSys {
|
||||
|
||||
112
cmd/iam.go
112
cmd/iam.go
@@ -23,6 +23,7 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -33,7 +34,6 @@ import (
|
||||
"github.com/minio/minio/pkg/auth"
|
||||
iampolicy "github.com/minio/minio/pkg/iam/policy"
|
||||
"github.com/minio/minio/pkg/madmin"
|
||||
"github.com/minio/minio/pkg/retry"
|
||||
)
|
||||
|
||||
// UsersSysType - defines the type of users and groups system that is
|
||||
@@ -443,11 +443,15 @@ func (sys *IAMSys) Init(ctx context.Context, objAPI ObjectLayer) {
|
||||
// allocate dynamic timeout once before the loop
|
||||
iamLockTimeout := newDynamicTimeout(5*time.Second, 3*time.Second)
|
||||
|
||||
for range retry.NewTimerWithJitter(retryCtx, time.Second, 5*time.Second, retry.MaxJitter) {
|
||||
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
|
||||
var err error
|
||||
for {
|
||||
// let one of the server acquire the lock, if not let them timeout.
|
||||
// which shall be retried again by this loop.
|
||||
if err := txnLk.GetLock(iamLockTimeout); err != nil {
|
||||
if err = txnLk.GetLock(iamLockTimeout); err != nil {
|
||||
logger.Info("Waiting for all MinIO IAM sub-system to be initialized.. trying to acquire lock")
|
||||
time.Sleep(time.Duration(r.Float64() * float64(5*time.Second)))
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -455,7 +459,7 @@ func (sys *IAMSys) Init(ctx context.Context, objAPI ObjectLayer) {
|
||||
// **** WARNING ****
|
||||
// Migrating to encrypted backend on etcd should happen before initialization of
|
||||
// IAM sub-system, make sure that we do not move the above codeblock elsewhere.
|
||||
if err := migrateIAMConfigsEtcdToEncrypted(ctx, globalEtcdClient); err != nil {
|
||||
if err = migrateIAMConfigsEtcdToEncrypted(ctx, globalEtcdClient); err != nil {
|
||||
txnLk.Unlock()
|
||||
logger.LogIf(ctx, fmt.Errorf("Unable to decrypt an encrypted ETCD backend for IAM users and policies: %w", err))
|
||||
logger.LogIf(ctx, errors.New("IAM sub-system is partially initialized, some users may not be available"))
|
||||
@@ -469,11 +473,10 @@ func (sys *IAMSys) Init(ctx context.Context, objAPI ObjectLayer) {
|
||||
}
|
||||
|
||||
// Migrate IAM configuration, if necessary.
|
||||
if err := sys.doIAMConfigMigration(ctx); err != nil {
|
||||
if err = sys.doIAMConfigMigration(ctx); err != nil {
|
||||
txnLk.Unlock()
|
||||
if errors.Is(err, errDiskNotFound) ||
|
||||
errors.Is(err, errConfigNotFound) ||
|
||||
errors.Is(err, context.Canceled) ||
|
||||
errors.Is(err, context.DeadlineExceeded) ||
|
||||
errors.As(err, &rquorum) ||
|
||||
errors.As(err, &wquorum) ||
|
||||
@@ -491,11 +494,10 @@ func (sys *IAMSys) Init(ctx context.Context, objAPI ObjectLayer) {
|
||||
break
|
||||
}
|
||||
|
||||
err := sys.store.loadAll(ctx, sys)
|
||||
err = sys.store.loadAll(ctx, sys)
|
||||
|
||||
// Invalidate the old cred always, even upon error to avoid any leakage.
|
||||
globalOldCred = auth.Credentials{}
|
||||
|
||||
if err != nil {
|
||||
logger.LogIf(ctx, fmt.Errorf("Unable to initialize IAM sub-system, some users may not be available %w", err))
|
||||
}
|
||||
@@ -579,12 +581,18 @@ func (sys *IAMSys) ListPolicies() (map[string]iampolicy.Policy, error) {
|
||||
}
|
||||
|
||||
sys.store.rlock()
|
||||
defer sys.store.runlock()
|
||||
fallback := sys.storeFallback
|
||||
sys.store.runlock()
|
||||
|
||||
if sys.storeFallback {
|
||||
return nil, errIAMNotInitialized
|
||||
if fallback {
|
||||
if err := sys.store.loadAll(context.Background(), sys); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
sys.store.rlock()
|
||||
defer sys.store.runlock()
|
||||
|
||||
policyDocsMap := make(map[string]iampolicy.Policy, len(sys.iamPolicyDocsMap))
|
||||
for k, v := range sys.iamPolicyDocsMap {
|
||||
policyDocsMap[k] = v
|
||||
@@ -741,19 +749,25 @@ func (sys *IAMSys) ListUsers() (map[string]madmin.UserInfo, error) {
|
||||
return nil, errServerNotInitialized
|
||||
}
|
||||
|
||||
var users = make(map[string]madmin.UserInfo)
|
||||
|
||||
sys.store.rlock()
|
||||
defer sys.store.runlock()
|
||||
|
||||
if sys.usersSysType != MinIOUsersSysType {
|
||||
return nil, errIAMActionNotAllowed
|
||||
}
|
||||
|
||||
if sys.storeFallback {
|
||||
return nil, errIAMNotInitialized
|
||||
sys.store.rlock()
|
||||
fallback := sys.storeFallback
|
||||
sys.store.runlock()
|
||||
|
||||
if fallback {
|
||||
if err := sys.store.loadAll(context.Background(), sys); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
sys.store.rlock()
|
||||
defer sys.store.runlock()
|
||||
|
||||
var users = make(map[string]madmin.UserInfo)
|
||||
|
||||
for k, v := range sys.iamUsersMap {
|
||||
if !v.IsTemp() && !v.IsServiceAccount() {
|
||||
users[k] = madmin.UserInfo{
|
||||
@@ -975,14 +989,19 @@ func (sys *IAMSys) ListServiceAccounts(ctx context.Context, accessKey string) ([
|
||||
}
|
||||
|
||||
sys.store.rlock()
|
||||
defer sys.store.runlock()
|
||||
fallback := sys.storeFallback
|
||||
sys.store.runlock()
|
||||
|
||||
if sys.storeFallback {
|
||||
return nil, errIAMNotInitialized
|
||||
if fallback {
|
||||
if err := sys.store.loadAll(context.Background(), sys); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
var serviceAccounts []string
|
||||
sys.store.rlock()
|
||||
defer sys.store.runlock()
|
||||
|
||||
var serviceAccounts []string
|
||||
for k, v := range sys.iamUsersMap {
|
||||
if v.IsServiceAccount() && v.ParentUser == accessKey {
|
||||
serviceAccounts = append(serviceAccounts, k)
|
||||
@@ -1083,13 +1102,13 @@ func (sys *IAMSys) SetUserSecretKey(accessKey string, secretKey string) error {
|
||||
return errServerNotInitialized
|
||||
}
|
||||
|
||||
sys.store.lock()
|
||||
defer sys.store.unlock()
|
||||
|
||||
if sys.usersSysType != MinIOUsersSysType {
|
||||
return errIAMActionNotAllowed
|
||||
}
|
||||
|
||||
sys.store.lock()
|
||||
defer sys.store.unlock()
|
||||
|
||||
cred, ok := sys.iamUsersMap[accessKey]
|
||||
if !ok {
|
||||
return errNoSuchUser
|
||||
@@ -1181,13 +1200,13 @@ func (sys *IAMSys) AddUsersToGroup(group string, members []string) error {
|
||||
return errInvalidArgument
|
||||
}
|
||||
|
||||
sys.store.lock()
|
||||
defer sys.store.unlock()
|
||||
|
||||
if sys.usersSysType != MinIOUsersSysType {
|
||||
return errIAMActionNotAllowed
|
||||
}
|
||||
|
||||
sys.store.lock()
|
||||
defer sys.store.unlock()
|
||||
|
||||
// Validate that all members exist.
|
||||
for _, member := range members {
|
||||
cr, ok := sys.iamUsersMap[member]
|
||||
@@ -1241,13 +1260,13 @@ func (sys *IAMSys) RemoveUsersFromGroup(group string, members []string) error {
|
||||
return errInvalidArgument
|
||||
}
|
||||
|
||||
sys.store.lock()
|
||||
defer sys.store.unlock()
|
||||
|
||||
if sys.usersSysType != MinIOUsersSysType {
|
||||
return errIAMActionNotAllowed
|
||||
}
|
||||
|
||||
sys.store.lock()
|
||||
defer sys.store.unlock()
|
||||
|
||||
// Validate that all members exist.
|
||||
for _, member := range members {
|
||||
cr, ok := sys.iamUsersMap[member]
|
||||
@@ -1317,13 +1336,13 @@ func (sys *IAMSys) SetGroupStatus(group string, enabled bool) error {
|
||||
return errServerNotInitialized
|
||||
}
|
||||
|
||||
sys.store.lock()
|
||||
defer sys.store.unlock()
|
||||
|
||||
if sys.usersSysType != MinIOUsersSysType {
|
||||
return errIAMActionNotAllowed
|
||||
}
|
||||
|
||||
sys.store.lock()
|
||||
defer sys.store.unlock()
|
||||
|
||||
if group == "" {
|
||||
return errInvalidArgument
|
||||
}
|
||||
@@ -1392,20 +1411,28 @@ func (sys *IAMSys) ListGroups() (r []string, err error) {
|
||||
return r, errServerNotInitialized
|
||||
}
|
||||
|
||||
sys.store.rlock()
|
||||
defer sys.store.runlock()
|
||||
|
||||
if sys.storeFallback {
|
||||
return nil, errIAMNotInitialized
|
||||
}
|
||||
|
||||
if sys.usersSysType != MinIOUsersSysType {
|
||||
return nil, errIAMActionNotAllowed
|
||||
}
|
||||
|
||||
sys.store.rlock()
|
||||
fallback := sys.storeFallback
|
||||
sys.store.runlock()
|
||||
|
||||
if fallback {
|
||||
if err := sys.store.loadAll(context.Background(), sys); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
sys.store.rlock()
|
||||
defer sys.store.runlock()
|
||||
|
||||
r = make([]string, 0, len(sys.iamGroupsMap))
|
||||
for k := range sys.iamGroupsMap {
|
||||
r = append(r, k)
|
||||
}
|
||||
|
||||
return r, nil
|
||||
}
|
||||
|
||||
@@ -1909,9 +1936,6 @@ func (sys *IAMSys) removeGroupFromMembershipsMap(group string) {
|
||||
|
||||
// EnableLDAPSys - enable ldap system users type.
|
||||
func (sys *IAMSys) EnableLDAPSys() {
|
||||
sys.store.lock()
|
||||
defer sys.store.unlock()
|
||||
|
||||
sys.usersSysType = LDAPUsersSysType
|
||||
}
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ import (
|
||||
"crypto/tls"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"net"
|
||||
"os"
|
||||
"os/signal"
|
||||
@@ -38,7 +39,6 @@ import (
|
||||
"github.com/minio/minio/pkg/certs"
|
||||
"github.com/minio/minio/pkg/color"
|
||||
"github.com/minio/minio/pkg/env"
|
||||
"github.com/minio/minio/pkg/retry"
|
||||
)
|
||||
|
||||
// ServerFlags - server command specific flags
|
||||
@@ -209,21 +209,12 @@ func initServer(ctx context.Context, newObject ObjectLayer) error {
|
||||
// Initialize IAM store
|
||||
globalIAMSys.InitStore(newObject)
|
||||
|
||||
// Create cancel context to control 'newRetryTimer' go routine.
|
||||
retryCtx, cancel := context.WithCancel(ctx)
|
||||
|
||||
// Indicate to our routine to exit cleanly upon return.
|
||||
defer cancel()
|
||||
|
||||
// Make sure to hold lock for entire migration to avoid
|
||||
// such that only one server should migrate the entire config
|
||||
// at a given time, this big transaction lock ensures this
|
||||
// appropriately. This is also true for rotation of encrypted
|
||||
// content.
|
||||
txnLk := newObject.NewNSLock(retryCtx, minioMetaBucket, minioConfigPrefix+"/transaction.lock")
|
||||
|
||||
// allocate dynamic timeout once before the loop
|
||||
configLockTimeout := newDynamicTimeout(5*time.Second, 3*time.Second)
|
||||
txnLk := newObject.NewNSLock(ctx, minioMetaBucket, minioConfigPrefix+"/transaction.lock")
|
||||
|
||||
// **** WARNING ****
|
||||
// Migrating to encrypted backend should happen before initialization of any
|
||||
@@ -238,12 +229,25 @@ func initServer(ctx context.Context, newObject ObjectLayer) error {
|
||||
rquorum := InsufficientReadQuorum{}
|
||||
wquorum := InsufficientWriteQuorum{}
|
||||
|
||||
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
|
||||
lockTimeout := newDynamicTimeout(5*time.Second, 3*time.Second)
|
||||
|
||||
var err error
|
||||
for range retry.NewTimerWithJitter(retryCtx, 500*time.Millisecond, time.Second, retry.MaxJitter) {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
// Retry was canceled successfully.
|
||||
return fmt.Errorf("Initializing sub-systems stopped gracefully %w", ctx.Err())
|
||||
default:
|
||||
}
|
||||
|
||||
// let one of the server acquire the lock, if not let them timeout.
|
||||
// which shall be retried again by this loop.
|
||||
if err = txnLk.GetLock(configLockTimeout); err != nil {
|
||||
if err = txnLk.GetLock(lockTimeout); err != nil {
|
||||
logger.Info("Waiting for all MinIO sub-systems to be initialized.. trying to acquire lock")
|
||||
|
||||
time.Sleep(time.Duration(r.Float64() * float64(5*time.Second)))
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -279,20 +283,13 @@ func initServer(ctx context.Context, newObject ObjectLayer) error {
|
||||
errors.As(err, &wquorum) ||
|
||||
isErrBucketNotFound(err) {
|
||||
logger.Info("Waiting for all MinIO sub-systems to be initialized.. possible cause (%v)", err)
|
||||
time.Sleep(time.Duration(r.Float64() * float64(5*time.Second)))
|
||||
continue
|
||||
}
|
||||
|
||||
// Any other unhandled return right here.
|
||||
return fmt.Errorf("Unable to initialize sub-systems: %w", err)
|
||||
}
|
||||
|
||||
// Return an error when retry is canceled or deadlined
|
||||
if err = retryCtx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Retry was canceled successfully.
|
||||
return errors.New("Initializing sub-systems stopped gracefully")
|
||||
}
|
||||
|
||||
func initAllSubsystems(ctx context.Context, newObject ObjectLayer) (err error) {
|
||||
@@ -351,17 +348,6 @@ func initAllSubsystems(ctx context.Context, newObject ObjectLayer) (err error) {
|
||||
go initFederatorBackend(buckets, newObject)
|
||||
}
|
||||
|
||||
if globalCacheConfig.Enabled {
|
||||
// initialize the new disk cache objects.
|
||||
var cacheAPI CacheObjectLayer
|
||||
cacheAPI, err = newServerCacheObjects(ctx, globalCacheConfig)
|
||||
logger.FatalIf(err, "Unable to initialize disk caching")
|
||||
|
||||
globalObjLayerMutex.Lock()
|
||||
globalCacheObjectAPI = cacheAPI
|
||||
globalObjLayerMutex.Unlock()
|
||||
}
|
||||
|
||||
// Initialize bucket metadata sub-system.
|
||||
globalBucketMetadataSys.Init(ctx, buckets, newObject)
|
||||
|
||||
@@ -509,6 +495,17 @@ func serverMain(ctx *cli.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
if globalCacheConfig.Enabled {
|
||||
// initialize the new disk cache objects.
|
||||
var cacheAPI CacheObjectLayer
|
||||
cacheAPI, err = newServerCacheObjects(GlobalContext, globalCacheConfig)
|
||||
logger.FatalIf(err, "Unable to initialize disk caching")
|
||||
|
||||
globalObjLayerMutex.Lock()
|
||||
globalCacheObjectAPI = cacheAPI
|
||||
globalObjLayerMutex.Unlock()
|
||||
}
|
||||
|
||||
// Initialize users credentials and policies in background right after config has initialized.
|
||||
go globalIAMSys.Init(GlobalContext, newObject)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user