From 5a8df7efb3e7af9cc6ee75eef75b3acf847e8184 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Thu, 1 Dec 2022 14:31:35 -0800 Subject: [PATCH] re-implement StorageInfo to be a peer call (#16155) --- cmd/admin-handlers.go | 7 +- cmd/admin-server-info.go | 5 +- cmd/data-scanner-metric.go | 10 +-- cmd/erasure-server-pool-decom.go | 2 +- cmd/erasure-server-pool-rebalance.go | 2 +- cmd/erasure-server-pool.go | 40 ++-------- cmd/erasure-sets.go | 24 ++---- cmd/erasure.go | 20 ++--- cmd/global-heal.go | 3 +- cmd/metrics-realtime.go | 8 +- cmd/metrics-v2.go | 106 +++++++++++++++++++-------- cmd/metrics.go | 8 +- cmd/notification.go | 33 +++++++++ cmd/object-api-interface.go | 4 +- cmd/peer-rest-client.go | 11 +++ cmd/peer-rest-common.go | 3 +- cmd/peer-rest-server.go | 18 +++++ cmd/rebalance-admin.go | 2 +- cmd/scannermetric_string.go | 6 +- cmd/server-startup-msg.go | 7 +- 20 files changed, 191 insertions(+), 128 deletions(-) diff --git a/cmd/admin-handlers.go b/cmd/admin-handlers.go index c8e5ebe45..96ed173ac 100644 --- a/cmd/admin-handlers.go +++ b/cmd/admin-handlers.go @@ -344,8 +344,7 @@ func (a adminAPIHandlers) StorageInfoHandler(w http.ResponseWriter, r *http.Requ return } - // ignores any errors here. - storageInfo, _ := objectAPI.StorageInfo(ctx) + storageInfo := objectAPI.StorageInfo(ctx) // Collect any disk healing. healing, _ := getAggregatedBackgroundHealState(ctx, nil) @@ -1234,7 +1233,7 @@ func (a adminAPIHandlers) ObjectSpeedTestHandler(w http.ResponseWriter, r *http. duration = time.Second * 10 } - storageInfo, _ := objectAPI.StorageInfo(ctx) + storageInfo := objectAPI.StorageInfo(ctx) sufficientCapacity, canAutotune, capacityErrMsg := validateObjPerfOptions(ctx, storageInfo, concurrent, size, autotune) if !sufficientCapacity { @@ -2581,7 +2580,7 @@ func getClusterMetaInfo(ctx context.Context) []byte { ci.Info.NoOfServers = len(globalEndpoints.Hostnames()) ci.Info.MinioVersion = Version - si, _ := objectAPI.StorageInfo(ctx) + si := objectAPI.StorageInfo(ctx) ci.Info.NoOfDrives = len(si.Disks) for _, disk := range si.Disks { diff --git a/cmd/admin-server-info.go b/cmd/admin-server-info.go index bb907f81e..6b97cc5e9 100644 --- a/cmd/admin-server-info.go +++ b/cmd/admin-server-info.go @@ -143,10 +143,11 @@ func getLocalServerProperty(endpointServerPools EndpointServerPools, r *http.Req objLayer := newObjectLayerFn() if objLayer != nil { - // only need Disks information in server mode. - storageInfo, _ := objLayer.LocalStorageInfo(GlobalContext) + storageInfo := objLayer.LocalStorageInfo(GlobalContext) props.State = string(madmin.ItemOnline) props.Disks = storageInfo.Disks + } else { + props.State = string(madmin.ItemOffline) } return props diff --git a/cmd/data-scanner-metric.go b/cmd/data-scanner-metric.go index 3fac10c26..10b623812 100644 --- a/cmd/data-scanner-metric.go +++ b/cmd/data-scanner-metric.go @@ -58,9 +58,9 @@ const ( scannerMetricLastRealtime // Trace only metrics: - scannerMetricScanFolder // Scan a folder on disk, recursively. - scannerMetricScanCycle // Full cycle, cluster global - scannerMetricScanBucketDisk // Single bucket on one disk + scannerMetricScanFolder // Scan a folder on disk, recursively. + scannerMetricScanCycle // Full cycle, cluster global + scannerMetricScanBucketDrive // Single bucket on one drive // Must be last: scannerMetricLast @@ -181,9 +181,9 @@ func (p *scannerMetrics) getCurrentPaths() []string { return res } -// activeDisks returns the number of currently active disks. +// activeDrives returns the number of currently active disks. // (since this is concurrent it may not be 100% reliable) -func (p *scannerMetrics) activeDisks() int { +func (p *scannerMetrics) activeDrives() int { var i int p.currentPaths.Range(func(k, v interface{}) bool { i++ diff --git a/cmd/erasure-server-pool-decom.go b/cmd/erasure-server-pool-decom.go index f3af410e5..0c0b6c63e 100644 --- a/cmd/erasure-server-pool-decom.go +++ b/cmd/erasure-server-pool-decom.go @@ -1069,7 +1069,7 @@ func (z *erasureServerPools) getDecommissionPoolSpaceInfo(idx int) (pi poolSpace return pi, errInvalidArgument } - info, _ := z.serverPools[idx].StorageInfo(context.Background()) + info := z.serverPools[idx].StorageInfo(context.Background()) info.Backend = z.BackendInfo() usableTotal := int64(GetTotalUsableCapacity(info.Disks, info)) diff --git a/cmd/erasure-server-pool-rebalance.go b/cmd/erasure-server-pool-rebalance.go index aad8fa2b0..3951844c5 100644 --- a/cmd/erasure-server-pool-rebalance.go +++ b/cmd/erasure-server-pool-rebalance.go @@ -125,7 +125,7 @@ func (z *erasureServerPools) initRebalanceMeta(ctx context.Context, buckets []st } // Fetch disk capacity and available space. - si, _ := z.StorageInfo(ctx) + si := z.StorageInfo(ctx) diskStats := make([]struct { AvailableSpace uint64 TotalSpace uint64 diff --git a/cmd/erasure-server-pool.go b/cmd/erasure-server-pool.go index 6c659572b..6fad476b1 100644 --- a/cmd/erasure-server-pool.go +++ b/cmd/erasure-server-pool.go @@ -545,16 +545,15 @@ func (z *erasureServerPools) BackendInfo() (b madmin.BackendInfo) { return } -func (z *erasureServerPools) LocalStorageInfo(ctx context.Context) (StorageInfo, []error) { +func (z *erasureServerPools) LocalStorageInfo(ctx context.Context) StorageInfo { var storageInfo StorageInfo storageInfos := make([]StorageInfo, len(z.serverPools)) - storageInfosErrs := make([][]error, len(z.serverPools)) g := errgroup.WithNErrs(len(z.serverPools)) for index := range z.serverPools { index := index g.Go(func() error { - storageInfos[index], storageInfosErrs[index] = z.serverPools[index].LocalStorageInfo(ctx) + storageInfos[index] = z.serverPools[index].LocalStorageInfo(ctx) return nil }, index) } @@ -567,40 +566,11 @@ func (z *erasureServerPools) LocalStorageInfo(ctx context.Context) (StorageInfo, storageInfo.Disks = append(storageInfo.Disks, lstorageInfo.Disks...) } - var errs []error - for i := range z.serverPools { - errs = append(errs, storageInfosErrs[i]...) - } - return storageInfo, errs + return storageInfo } -func (z *erasureServerPools) StorageInfo(ctx context.Context) (StorageInfo, []error) { - var storageInfo StorageInfo - - storageInfos := make([]StorageInfo, len(z.serverPools)) - storageInfosErrs := make([][]error, len(z.serverPools)) - g := errgroup.WithNErrs(len(z.serverPools)) - for index := range z.serverPools { - index := index - g.Go(func() error { - storageInfos[index], storageInfosErrs[index] = z.serverPools[index].StorageInfo(ctx) - return nil - }, index) - } - - // Wait for the go routines. - g.Wait() - - storageInfo.Backend = z.BackendInfo() - for _, lstorageInfo := range storageInfos { - storageInfo.Disks = append(storageInfo.Disks, lstorageInfo.Disks...) - } - - var errs []error - for i := range z.serverPools { - errs = append(errs, storageInfosErrs[i]...) - } - return storageInfo, errs +func (z *erasureServerPools) StorageInfo(ctx context.Context) StorageInfo { + return globalNotificationSys.StorageInfo(z) } func (z *erasureServerPools) NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo, wantCycle uint32, healScanMode madmin.HealScanMode) error { diff --git a/cmd/erasure-sets.go b/cmd/erasure-sets.go index 8c9d1c392..57d439232 100644 --- a/cmd/erasure-sets.go +++ b/cmd/erasure-sets.go @@ -600,17 +600,16 @@ func (s *erasureSets) ParityCount() int { } // StorageInfo - combines output of StorageInfo across all erasure coded object sets. -func (s *erasureSets) StorageInfo(ctx context.Context) (StorageInfo, []error) { +func (s *erasureSets) StorageInfo(ctx context.Context) StorageInfo { var storageInfo madmin.StorageInfo storageInfos := make([]madmin.StorageInfo, len(s.sets)) - storageInfoErrs := make([][]error, len(s.sets)) g := errgroup.WithNErrs(len(s.sets)) for index := range s.sets { index := index g.Go(func() error { - storageInfos[index], storageInfoErrs[index] = s.sets[index].StorageInfo(ctx) + storageInfos[index] = s.sets[index].StorageInfo(ctx) return nil }, index) } @@ -622,26 +621,20 @@ func (s *erasureSets) StorageInfo(ctx context.Context) (StorageInfo, []error) { storageInfo.Disks = append(storageInfo.Disks, lstorageInfo.Disks...) } - errs := make([]error, 0, len(s.sets)*s.setDriveCount) - for i := range s.sets { - errs = append(errs, storageInfoErrs[i]...) - } - - return storageInfo, errs + return storageInfo } // StorageInfo - combines output of StorageInfo across all erasure coded object sets. -func (s *erasureSets) LocalStorageInfo(ctx context.Context) (StorageInfo, []error) { +func (s *erasureSets) LocalStorageInfo(ctx context.Context) StorageInfo { var storageInfo StorageInfo storageInfos := make([]StorageInfo, len(s.sets)) - storageInfoErrs := make([][]error, len(s.sets)) g := errgroup.WithNErrs(len(s.sets)) for index := range s.sets { index := index g.Go(func() error { - storageInfos[index], storageInfoErrs[index] = s.sets[index].LocalStorageInfo(ctx) + storageInfos[index] = s.sets[index].LocalStorageInfo(ctx) return nil }, index) } @@ -653,12 +646,7 @@ func (s *erasureSets) LocalStorageInfo(ctx context.Context) (StorageInfo, []erro storageInfo.Disks = append(storageInfo.Disks, lstorageInfo.Disks...) } - var errs []error - for i := range s.sets { - errs = append(errs, storageInfoErrs[i]...) - } - - return storageInfo, errs + return storageInfo } // Shutdown shutsdown all erasure coded sets in parallel diff --git a/cmd/erasure.go b/cmd/erasure.go index 93c491d6f..0726311ba 100644 --- a/cmd/erasure.go +++ b/cmd/erasure.go @@ -175,7 +175,7 @@ func getOnlineOfflineDisksStats(disksInfo []madmin.Disk) (onlineDisks, offlineDi } // getDisksInfo - fetch disks info across all other storage API. -func getDisksInfo(disks []StorageAPI, endpoints []Endpoint) (disksInfo []madmin.Disk, errs []error) { +func getDisksInfo(disks []StorageAPI, endpoints []Endpoint) (disksInfo []madmin.Disk) { disksInfo = make([]madmin.Disk, len(disks)) g := errgroup.WithNErrs(len(disks)) @@ -189,8 +189,7 @@ func getDisksInfo(disks []StorageAPI, endpoints []Endpoint) (disksInfo []madmin. State: diskErrToDriveState(errDiskNotFound), Endpoint: diskEndpoint, } - // Storage disk is empty, perhaps ignored disk or not available. - return errDiskNotFound + return nil } info, err := disks[index].DiskInfo(context.TODO()) di := madmin.Disk{ @@ -231,16 +230,17 @@ func getDisksInfo(disks []StorageAPI, endpoints []Endpoint) (disksInfo []madmin. di.Utilization = float64(info.Used / info.Total * 100) } disksInfo[index] = di - return err + return nil }, index) } - return disksInfo, g.Wait() + g.Wait() + return disksInfo } // Get an aggregated storage info across all disks. -func getStorageInfo(disks []StorageAPI, endpoints []Endpoint) (StorageInfo, []error) { - disksInfo, errs := getDisksInfo(disks, endpoints) +func getStorageInfo(disks []StorageAPI, endpoints []Endpoint) StorageInfo { + disksInfo := getDisksInfo(disks, endpoints) // Sort so that the first element is the smallest. sort.Sort(byDiskTotal(disksInfo)) @@ -250,18 +250,18 @@ func getStorageInfo(disks []StorageAPI, endpoints []Endpoint) (StorageInfo, []er } storageInfo.Backend.Type = madmin.Erasure - return storageInfo, errs + return storageInfo } // StorageInfo - returns underlying storage statistics. -func (er erasureObjects) StorageInfo(ctx context.Context) (StorageInfo, []error) { +func (er erasureObjects) StorageInfo(ctx context.Context) StorageInfo { disks := er.getDisks() endpoints := er.getEndpoints() return getStorageInfo(disks, endpoints) } // LocalStorageInfo - returns underlying local storage statistics. -func (er erasureObjects) LocalStorageInfo(ctx context.Context) (StorageInfo, []error) { +func (er erasureObjects) LocalStorageInfo(ctx context.Context) StorageInfo { disks := er.getDisks() endpoints := er.getEndpoints() diff --git a/cmd/global-heal.go b/cmd/global-heal.go index f483a1f78..8febcaf72 100644 --- a/cmd/global-heal.go +++ b/cmd/global-heal.go @@ -105,8 +105,7 @@ func getBackgroundHealStatus(ctx context.Context, o ObjectLayer) (madmin.BgHealS return status, true } - // ignores any errors here. - si, _ := o.StorageInfo(ctx) + si := o.StorageInfo(ctx) indexed := make(map[string][]madmin.Disk) for _, disk := range si.Disks { diff --git a/cmd/metrics-realtime.go b/cmd/metrics-realtime.go index e9e272a0d..a2f5e8751 100644 --- a/cmd/metrics-realtime.go +++ b/cmd/metrics-realtime.go @@ -91,10 +91,8 @@ func collectLocalDisksMetrics(disks map[string]struct{}) map[string]madmin.DiskM return metrics } - // only need Disks information in server mode. - storageInfo, errs := objLayer.LocalStorageInfo(GlobalContext) - - for i, d := range storageInfo.Disks { + storageInfo := objLayer.LocalStorageInfo(GlobalContext) + for _, d := range storageInfo.Disks { if len(disks) != 0 { _, ok := disks[d.Endpoint] if !ok { @@ -102,7 +100,7 @@ func collectLocalDisksMetrics(disks map[string]struct{}) map[string]madmin.DiskM } } - if errs[i] != nil { + if d.State != madmin.DriveStateOk && d.State != madmin.DriveStateUnformatted { metrics[d.Endpoint] = madmin.DiskMetric{NDisks: 1, Offline: 1} continue } diff --git a/cmd/metrics-v2.go b/cmd/metrics-v2.go index 16f50ef33..472096649 100644 --- a/cmd/metrics-v2.go +++ b/cmd/metrics-v2.go @@ -78,7 +78,7 @@ func init() { nodeCollector = newMinioCollectorNode([]*MetricsGroup{ getNodeHealthMetrics(), - getLocalDiskStorageMetrics(), + getLocalDriveStorageMetrics(), getCacheMetrics(), getHTTPMetrics(), getNetworkMetrics(), @@ -333,7 +333,7 @@ func getClusterCapacityUsageFreeBytesMD() MetricDescription { } } -func getNodeDiskAPILatencyMD() MetricDescription { +func getNodeDriveAPILatencyMD() MetricDescription { return MetricDescription{ Namespace: nodeMetricNamespace, Subsystem: diskSubsystem, @@ -343,7 +343,7 @@ func getNodeDiskAPILatencyMD() MetricDescription { } } -func getNodeDiskUsedBytesMD() MetricDescription { +func getNodeDriveUsedBytesMD() MetricDescription { return MetricDescription{ Namespace: nodeMetricNamespace, Subsystem: diskSubsystem, @@ -353,7 +353,7 @@ func getNodeDiskUsedBytesMD() MetricDescription { } } -func getNodeDiskFreeBytesMD() MetricDescription { +func getNodeDriveFreeBytesMD() MetricDescription { return MetricDescription{ Namespace: nodeMetricNamespace, Subsystem: diskSubsystem, @@ -363,7 +363,7 @@ func getNodeDiskFreeBytesMD() MetricDescription { } } -func getClusterDisksOfflineTotalMD() MetricDescription { +func getClusterDrivesOfflineTotalMD() MetricDescription { return MetricDescription{ Namespace: clusterMetricNamespace, Subsystem: diskSubsystem, @@ -373,7 +373,7 @@ func getClusterDisksOfflineTotalMD() MetricDescription { } } -func getClusterDisksOnlineTotalMD() MetricDescription { +func getClusterDrivesOnlineTotalMD() MetricDescription { return MetricDescription{ Namespace: clusterMetricNamespace, Subsystem: diskSubsystem, @@ -383,7 +383,7 @@ func getClusterDisksOnlineTotalMD() MetricDescription { } } -func getClusterDisksTotalMD() MetricDescription { +func getClusterDrivesTotalMD() MetricDescription { return MetricDescription{ Namespace: clusterMetricNamespace, Subsystem: diskSubsystem, @@ -393,9 +393,39 @@ func getClusterDisksTotalMD() MetricDescription { } } -func getClusterDisksFreeInodes() MetricDescription { +func getNodeDrivesOfflineTotalMD() MetricDescription { return MetricDescription{ - Namespace: clusterMetricNamespace, + Namespace: nodeMetricNamespace, + Subsystem: diskSubsystem, + Name: offlineTotal, + Help: "Total drives offline", + Type: gaugeMetric, + } +} + +func getNodeDrivesOnlineTotalMD() MetricDescription { + return MetricDescription{ + Namespace: nodeMetricNamespace, + Subsystem: diskSubsystem, + Name: onlineTotal, + Help: "Total drives online", + Type: gaugeMetric, + } +} + +func getNodeDrivesTotalMD() MetricDescription { + return MetricDescription{ + Namespace: nodeMetricNamespace, + Subsystem: diskSubsystem, + Name: total, + Help: "Total drives", + Type: gaugeMetric, + } +} + +func getNodeDrivesFreeInodes() MetricDescription { + return MetricDescription{ + Namespace: nodeMetricNamespace, Subsystem: diskSubsystem, Name: freeInodes, Help: "Total free inodes", @@ -403,7 +433,7 @@ func getClusterDisksFreeInodes() MetricDescription { } } -func getNodeDiskTotalBytesMD() MetricDescription { +func getNodeDriveTotalBytesMD() MetricDescription { return MetricDescription{ Namespace: nodeMetricNamespace, Subsystem: diskSubsystem, @@ -1288,7 +1318,7 @@ func getScannerNodeMetrics() *MetricsGroup { Help: "Total number of bucket scans started since server start", Type: counterMetric, }, - Value: float64(globalScannerMetrics.lifetime(scannerMetricScanBucketDisk) + uint64(globalScannerMetrics.activeDisks())), + Value: float64(globalScannerMetrics.lifetime(scannerMetricScanBucketDrive) + uint64(globalScannerMetrics.activeDrives())), }, { Description: MetricDescription{ @@ -1298,7 +1328,7 @@ func getScannerNodeMetrics() *MetricsGroup { Help: "Total number of bucket scans finished since server start", Type: counterMetric, }, - Value: float64(globalScannerMetrics.lifetime(scannerMetricScanBucketDisk)), + Value: float64(globalScannerMetrics.lifetime(scannerMetricScanBucketDrive)), }, { Description: MetricDescription{ @@ -1918,39 +1948,57 @@ func getLocalStorageMetrics() *MetricsGroup { } metrics = make([]Metric, 0, 50) - storageInfo, _ := objLayer.LocalStorageInfo(ctx) + storageInfo := objLayer.LocalStorageInfo(ctx) + onlineDrives, offlineDrives := getOnlineOfflineDisksStats(storageInfo.Disks) + totalDrives := onlineDrives.Merge(offlineDrives) + for _, disk := range storageInfo.Disks { metrics = append(metrics, Metric{ - Description: getNodeDiskUsedBytesMD(), + Description: getNodeDriveUsedBytesMD(), Value: float64(disk.UsedSpace), VariableLabels: map[string]string{"disk": disk.DrivePath}, }) metrics = append(metrics, Metric{ - Description: getNodeDiskFreeBytesMD(), + Description: getNodeDriveFreeBytesMD(), Value: float64(disk.AvailableSpace), VariableLabels: map[string]string{"disk": disk.DrivePath}, }) metrics = append(metrics, Metric{ - Description: getNodeDiskTotalBytesMD(), + Description: getNodeDriveTotalBytesMD(), Value: float64(disk.TotalSpace), VariableLabels: map[string]string{"disk": disk.DrivePath}, }) metrics = append(metrics, Metric{ - Description: getClusterDisksFreeInodes(), + Description: getNodeDrivesFreeInodes(), Value: float64(disk.FreeInodes), VariableLabels: map[string]string{"disk": disk.DrivePath}, }) + metrics = append(metrics, Metric{ + Description: getNodeDrivesOfflineTotalMD(), + Value: float64(offlineDrives.Sum()), + }) + + metrics = append(metrics, Metric{ + Description: getNodeDrivesOnlineTotalMD(), + Value: float64(onlineDrives.Sum()), + }) + + metrics = append(metrics, Metric{ + Description: getNodeDrivesTotalMD(), + Value: float64(totalDrives.Sum()), + }) + } return }) return mg } -func getLocalDiskStorageMetrics() *MetricsGroup { +func getLocalDriveStorageMetrics() *MetricsGroup { mg := &MetricsGroup{ cacheInterval: 3 * time.Second, } @@ -1961,7 +2009,7 @@ func getLocalDiskStorageMetrics() *MetricsGroup { return } - storageInfo, _ := objLayer.LocalStorageInfo(ctx) + storageInfo := objLayer.LocalStorageInfo(ctx) if storageInfo.Backend.Type == madmin.FS { return } @@ -1972,7 +2020,7 @@ func getLocalDiskStorageMetrics() *MetricsGroup { } for apiName, latency := range disk.Metrics.LastMinute { metrics = append(metrics, Metric{ - Description: getNodeDiskAPILatencyMD(), + Description: getNodeDriveAPILatencyMD(), Value: float64(latency.Avg().Microseconds()), VariableLabels: map[string]string{"disk": disk.DrivePath, "api": "storage." + apiName}, }) @@ -1996,9 +2044,9 @@ func getClusterStorageMetrics() *MetricsGroup { // Fetch disk space info, ignore errors metrics = make([]Metric, 0, 10) - storageInfo, _ := objLayer.StorageInfo(ctx) - onlineDisks, offlineDisks := getOnlineOfflineDisksStats(storageInfo.Disks) - totalDisks := onlineDisks.Merge(offlineDisks) + storageInfo := objLayer.StorageInfo(ctx) + onlineDrives, offlineDrives := getOnlineOfflineDisksStats(storageInfo.Disks) + totalDrives := onlineDrives.Merge(offlineDrives) metrics = append(metrics, Metric{ Description: getClusterCapacityTotalBytesMD(), @@ -2021,18 +2069,18 @@ func getClusterStorageMetrics() *MetricsGroup { }) metrics = append(metrics, Metric{ - Description: getClusterDisksOfflineTotalMD(), - Value: float64(offlineDisks.Sum()), + Description: getClusterDrivesOfflineTotalMD(), + Value: float64(offlineDrives.Sum()), }) metrics = append(metrics, Metric{ - Description: getClusterDisksOnlineTotalMD(), - Value: float64(onlineDisks.Sum()), + Description: getClusterDrivesOnlineTotalMD(), + Value: float64(onlineDrives.Sum()), }) metrics = append(metrics, Metric{ - Description: getClusterDisksTotalMD(), - Value: float64(totalDisks.Sum()), + Description: getClusterDrivesTotalMD(), + Value: float64(totalDrives.Sum()), }) return }) diff --git a/cmd/metrics.go b/cmd/metrics.go index 5d032ad4b..bf44e9a62 100644 --- a/cmd/metrics.go +++ b/cmd/metrics.go @@ -478,7 +478,8 @@ func storageMetricsPrometheus(ch chan<- prometheus.Metric) { float64(GetTotalCapacityFree(server.Disks)), ) - s, _ := objLayer.StorageInfo(GlobalContext) + sinfo := objLayer.StorageInfo(GlobalContext) + // Report total usable capacity ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( @@ -486,8 +487,9 @@ func storageMetricsPrometheus(ch chan<- prometheus.Metric) { "Total usable capacity online in the cluster", nil, nil), prometheus.GaugeValue, - float64(GetTotalUsableCapacity(server.Disks, s)), + float64(GetTotalUsableCapacity(server.Disks, sinfo)), ) + // Report total usable capacity free ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( @@ -495,7 +497,7 @@ func storageMetricsPrometheus(ch chan<- prometheus.Metric) { "Total free usable capacity online in the cluster", nil, nil), prometheus.GaugeValue, - float64(GetTotalUsableCapacityFree(server.Disks, s)), + float64(GetTotalUsableCapacityFree(server.Disks, sinfo)), ) // MinIO Offline Disks per node diff --git a/cmd/notification.go b/cmd/notification.go index afed6aa97..f35477a0f 100644 --- a/cmd/notification.go +++ b/cmd/notification.go @@ -952,6 +952,39 @@ func getOfflineDisks(offlineHost string, endpoints EndpointServerPools) []madmin return offlineDisks } +// StorageInfo returns disk information across all peers +func (sys *NotificationSys) StorageInfo(objLayer ObjectLayer) StorageInfo { + var storageInfo StorageInfo + replies := make([]StorageInfo, len(sys.peerClients)) + + var wg sync.WaitGroup + for i, client := range sys.peerClients { + if client == nil { + continue + } + wg.Add(1) + go func(client *peerRESTClient, idx int) { + defer wg.Done() + info, err := client.LocalStorageInfo() + if err != nil { + info.Disks = getOfflineDisks(client.host.String(), globalEndpoints) + } + replies[idx] = info + }(client, i) + } + wg.Wait() + + // Add local to this server. + replies = append(replies, objLayer.LocalStorageInfo(GlobalContext)) + + storageInfo.Backend = objLayer.BackendInfo() + for _, sinfo := range replies { + storageInfo.Disks = append(storageInfo.Disks, sinfo.Disks...) + } + + return storageInfo +} + // ServerInfo - calls ServerInfo RPC call on all peers. func (sys *NotificationSys) ServerInfo() []madmin.ServerProperties { reply := make([]madmin.ServerProperties, len(sys.peerClients)) diff --git a/cmd/object-api-interface.go b/cmd/object-api-interface.go index 2dc275809..f9015779b 100644 --- a/cmd/object-api-interface.go +++ b/cmd/object-api-interface.go @@ -194,8 +194,8 @@ type ObjectLayer interface { Shutdown(context.Context) error NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo, wantCycle uint32, scanMode madmin.HealScanMode) error BackendInfo() madmin.BackendInfo - StorageInfo(ctx context.Context) (StorageInfo, []error) - LocalStorageInfo(ctx context.Context) (StorageInfo, []error) + StorageInfo(ctx context.Context) StorageInfo + LocalStorageInfo(ctx context.Context) StorageInfo // Bucket operations. MakeBucketWithLocation(ctx context.Context, bucket string, opts MakeBucketOptions) error diff --git a/cmd/peer-rest-client.go b/cmd/peer-rest-client.go index 88c0a2726..a59890a48 100644 --- a/cmd/peer-rest-client.go +++ b/cmd/peer-rest-client.go @@ -97,6 +97,17 @@ func (client *peerRESTClient) GetLocks() (lockMap map[string][]lockRequesterInfo return lockMap, err } +// LocalStorageInfo - fetch server information for a remote node. +func (client *peerRESTClient) LocalStorageInfo() (info StorageInfo, err error) { + respBody, err := client.call(peerRESTMethodLocalStorageInfo, nil, nil, -1) + if err != nil { + return + } + defer http.DrainBody(respBody) + err = gob.NewDecoder(respBody).Decode(&info) + return info, err +} + // ServerInfo - fetch server information for a remote node. func (client *peerRESTClient) ServerInfo() (info madmin.ServerProperties, err error) { respBody, err := client.call(peerRESTMethodServerInfo, nil, nil, -1) diff --git a/cmd/peer-rest-common.go b/cmd/peer-rest-common.go index 739ab9599..f24edd2c4 100644 --- a/cmd/peer-rest-common.go +++ b/cmd/peer-rest-common.go @@ -18,7 +18,7 @@ package cmd const ( - peerRESTVersion = "v28" // Added Rebalance peer APIs + peerRESTVersion = "v29" // Added LocalStorageInfo peer API peerRESTVersionPrefix = SlashSeparator + peerRESTVersion peerRESTPrefix = minioReservedBucketPath + "/peer" @@ -28,6 +28,7 @@ const ( const ( peerRESTMethodHealth = "/health" peerRESTMethodServerInfo = "/serverinfo" + peerRESTMethodLocalStorageInfo = "/localstorageinfo" peerRESTMethodCPUInfo = "/cpuinfo" peerRESTMethodDiskHwInfo = "/diskhwinfo" peerRESTMethodOsInfo = "/osinfo" diff --git a/cmd/peer-rest-server.go b/cmd/peer-rest-server.go index 4b0dbe014..4d84fef90 100644 --- a/cmd/peer-rest-server.go +++ b/cmd/peer-rest-server.go @@ -330,6 +330,23 @@ func (s *peerRESTServer) DownloadProfilingDataHandler(w http.ResponseWriter, r * logger.LogIf(ctx, gob.NewEncoder(w).Encode(profileData)) } +func (s *peerRESTServer) LocalStorageInfoHandler(w http.ResponseWriter, r *http.Request) { + if !s.IsValid(w, r) { + s.writeErrorResponse(w, errors.New("Invalid request")) + return + } + + ctx := newContext(r, w, "LocalStorageInfo") + + objLayer := newObjectLayerFn() + if objLayer == nil { + s.writeErrorResponse(w, errServerNotInitialized) + return + } + + logger.LogIf(ctx, gob.NewEncoder(w).Encode(objLayer.LocalStorageInfo(r.Context()))) +} + // ServerInfoHandler - returns Server Info func (s *peerRESTServer) ServerInfoHandler(w http.ResponseWriter, r *http.Request) { if !s.IsValid(w, r) { @@ -1363,6 +1380,7 @@ func registerPeerRESTHandlers(router *mux.Router) { subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodHealth).HandlerFunc(httpTraceHdrs(server.HealthHandler)) subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodGetLocks).HandlerFunc(httpTraceHdrs(server.GetLocksHandler)) subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodServerInfo).HandlerFunc(httpTraceHdrs(server.ServerInfoHandler)) + subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodLocalStorageInfo).HandlerFunc(httpTraceHdrs(server.LocalStorageInfoHandler)) subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodProcInfo).HandlerFunc(httpTraceHdrs(server.GetProcInfoHandler)) subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodMemInfo).HandlerFunc(httpTraceHdrs(server.GetMemInfoHandler)) subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodMetrics).HandlerFunc(httpTraceHdrs(server.GetMetricsHandler)).Queries(restQueries(peerRESTMetricsTypes)...) diff --git a/cmd/rebalance-admin.go b/cmd/rebalance-admin.go index ff22abcae..d1f2317c5 100644 --- a/cmd/rebalance-admin.go +++ b/cmd/rebalance-admin.go @@ -55,7 +55,7 @@ func rebalanceStatus(ctx context.Context, z *erasureServerPools) (r rebalanceAdm } // Compute disk usage percentage - si, _ := z.StorageInfo(ctx) + si := z.StorageInfo(ctx) diskStats := make([]struct { AvailableSpace uint64 TotalSpace uint64 diff --git a/cmd/scannermetric_string.go b/cmd/scannermetric_string.go index 4f69dea55..1d8066977 100644 --- a/cmd/scannermetric_string.go +++ b/cmd/scannermetric_string.go @@ -25,13 +25,13 @@ func _() { _ = x[scannerMetricLastRealtime-14] _ = x[scannerMetricScanFolder-15] _ = x[scannerMetricScanCycle-16] - _ = x[scannerMetricScanBucketDisk-17] + _ = x[scannerMetricScanBucketDrive-17] _ = x[scannerMetricLast-18] } -const _scannerMetric_name = "ReadMetadataCheckMissingSaveUsageApplyAllApplyVersionTierObjSweepHealCheckILMCheckReplicationYieldCleanAbandonedApplyNonCurrentStartTraceScanObjectLastRealtimeScanFolderScanCycleScanBucketDiskLast" +const _scannerMetric_name = "ReadMetadataCheckMissingSaveUsageApplyAllApplyVersionTierObjSweepHealCheckILMCheckReplicationYieldCleanAbandonedApplyNonCurrentStartTraceScanObjectLastRealtimeScanFolderScanCycleScanBucketDriveLast" -var _scannerMetric_index = [...]uint8{0, 12, 24, 33, 41, 53, 65, 74, 77, 93, 98, 112, 127, 137, 147, 159, 169, 178, 192, 196} +var _scannerMetric_index = [...]uint8{0, 12, 24, 33, 41, 53, 65, 74, 77, 93, 98, 112, 127, 137, 147, 159, 169, 178, 193, 197} func (i scannerMetric) String() string { if i >= scannerMetric(len(_scannerMetric_index)-1) { diff --git a/cmd/server-startup-msg.go b/cmd/server-startup-msg.go index 1586cabf4..45735d1a0 100644 --- a/cmd/server-startup-msg.go +++ b/cmd/server-startup-msg.go @@ -37,11 +37,6 @@ func getFormatStr(strLen int, padding int) string { return "%" + formatStr } -func mustGetStorageInfo(objAPI ObjectLayer) StorageInfo { - storageInfo, _ := objAPI.StorageInfo(GlobalContext) - return storageInfo -} - // Prints the formatted startup message. func printStartupMessage(apiEndpoints []string, err error) { logger.Info(color.Bold("MinIO Object Storage Server")) @@ -67,7 +62,7 @@ func printStartupMessage(apiEndpoints []string, err error) { // Object layer is initialized then print StorageInfo. objAPI := newObjectLayerFn() if objAPI != nil { - printStorageInfo(mustGetStorageInfo(objAPI)) + printStorageInfo(objAPI.StorageInfo(GlobalContext)) } // Prints credential, region and browser access.