mirror of https://github.com/minio/minio.git
Add support converting non-inlined to inlined (#15444)
This is a feature to allow for inode compaction on large clusters that use a lot of small files spread across a large heirarchy.
This commit is contained in:
parent
e956369c4e
commit
a6e0ec4e6f
|
@ -49,3 +49,4 @@ jobs:
|
|||
make verify-healing
|
||||
make verify-healing-inconsistent-versions
|
||||
make verify-healing-with-root-disks
|
||||
make verify-healing-with-rewrite
|
||||
|
|
9
Makefile
9
Makefile
|
@ -15,7 +15,7 @@ checks: ## check dependencies
|
|||
@(env bash $(PWD)/buildscripts/checkdeps.sh)
|
||||
|
||||
help: ## print this help
|
||||
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' Makefile | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
|
||||
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' Makefile | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-40s\033[0m %s\n", $$1, $$2}'
|
||||
|
||||
getdeps: ## fetch necessary dependencies
|
||||
@mkdir -p ${GOPATH}/bin
|
||||
|
@ -90,11 +90,16 @@ verify-healing: ## verify healing and replacing disks with minio binary
|
|||
@(env bash $(PWD)/buildscripts/verify-healing.sh)
|
||||
@(env bash $(PWD)/buildscripts/unaligned-healing.sh)
|
||||
|
||||
verify-healing-with-root-disks:
|
||||
verify-healing-with-root-disks: ## verify healing root disks
|
||||
@echo "Verify healing with root disks"
|
||||
@GORACE=history_size=7 CGO_ENABLED=1 go build -race -tags kqueue -trimpath --ldflags "$(LDFLAGS)" -o $(PWD)/minio 1>/dev/null
|
||||
@(env bash $(PWD)/buildscripts/verify-healing-with-root-disks.sh)
|
||||
|
||||
verify-healing-with-rewrite: ## verify healing to rewrite old xl.meta -> new xl.meta
|
||||
@echo "Verify healing with rewrite"
|
||||
@GORACE=history_size=7 CGO_ENABLED=1 go build -race -tags kqueue -trimpath --ldflags "$(LDFLAGS)" -o $(PWD)/minio 1>/dev/null
|
||||
@(env bash $(PWD)/buildscripts/rewrite-old-new.sh)
|
||||
|
||||
verify-healing-inconsistent-versions: ## verify resolving inconsistent versions
|
||||
@echo "Verify resolving inconsistent versions build with race"
|
||||
@GORACE=history_size=7 CGO_ENABLED=1 go build -race -tags kqueue -trimpath --ldflags "$(LDFLAGS)" -o $(PWD)/minio 1>/dev/null
|
||||
|
|
|
@ -0,0 +1,87 @@
|
|||
//go:build ignore
|
||||
// +build ignore
|
||||
|
||||
//
|
||||
// MinIO Object Storage (c) 2022 MinIO, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/minio/madmin-go"
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Note: YOUR-ACCESSKEYID, YOUR-SECRETACCESSKEY are
|
||||
// dummy values, please replace them with original values.
|
||||
|
||||
// API requests are secure (HTTPS) if secure=true and insecure (HTTP) otherwise.
|
||||
// New returns an MinIO Admin client object.
|
||||
madmClnt, err := madmin.New(os.Args[1], os.Args[2], os.Args[3], false)
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
|
||||
opts := madmin.HealOpts{
|
||||
Recursive: true, // recursively heal all objects at 'prefix'
|
||||
Remove: true, // remove content that has lost quorum and not recoverable
|
||||
Recreate: true, // rewrite all old non-inlined xl.meta to new xl.meta
|
||||
ScanMode: madmin.HealNormalScan, // by default do not do 'deep' scanning
|
||||
}
|
||||
|
||||
start, _, err := madmClnt.Heal(context.Background(), "healing-rewrite-bucket", "", opts, "", false, false)
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
fmt.Println("Healstart sequence ===")
|
||||
enc := json.NewEncoder(os.Stdout)
|
||||
if err = enc.Encode(&start); err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
for {
|
||||
_, status, err := madmClnt.Heal(context.Background(), "healing-rewrite-bucket", "", opts, start.ClientToken, false, false)
|
||||
if status.Summary == "finished" {
|
||||
fmt.Println("Healstatus on items ===")
|
||||
for _, item := range status.Items {
|
||||
if err = enc.Encode(&item); err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
if status.Summary == "stopped" {
|
||||
fmt.Println("Healstatus on items ===")
|
||||
fmt.Println("Heal failed with", status.FailureDetail)
|
||||
break
|
||||
}
|
||||
|
||||
for _, item := range status.Items {
|
||||
if err = enc.Encode(&item); err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
}
|
||||
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,151 @@
|
|||
#!/bin/bash -e
|
||||
|
||||
set -E
|
||||
set -o pipefail
|
||||
set -x
|
||||
|
||||
WORK_DIR="$PWD/.verify-$RANDOM"
|
||||
MINIO_CONFIG_DIR="$WORK_DIR/.minio"
|
||||
MINIO_OLD=( "$PWD/minio.RELEASE.2020-10-28T08-16-50Z" --config-dir "$MINIO_CONFIG_DIR" server )
|
||||
MINIO=( "$PWD/minio" --config-dir "$MINIO_CONFIG_DIR" server )
|
||||
|
||||
if [ ! -x "$PWD/minio" ]; then
|
||||
echo "minio executable binary not found in current directory"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
function download_old_release() {
|
||||
if [ ! -f minio.RELEASE.2020-10-28T08-16-50Z ]; then
|
||||
curl --silent -O https://dl.minio.io/server/minio/release/linux-amd64/archive/minio.RELEASE.2020-10-28T08-16-50Z
|
||||
chmod a+x minio.RELEASE.2020-10-28T08-16-50Z
|
||||
fi
|
||||
}
|
||||
|
||||
function verify_rewrite() {
|
||||
start_port=$1
|
||||
|
||||
export MINIO_ACCESS_KEY=minio
|
||||
export MINIO_SECRET_KEY=minio123
|
||||
export MC_HOST_minio="http://minio:minio123@127.0.0.1:${start_port}/"
|
||||
unset MINIO_KMS_AUTO_ENCRYPTION # do not auto-encrypt objects
|
||||
export MINIO_CI_CD=1
|
||||
|
||||
MC_BUILD_DIR="mc-$RANDOM"
|
||||
if ! git clone --quiet https://github.com/minio/mc "$MC_BUILD_DIR"; then
|
||||
echo "failed to download https://github.com/minio/mc"
|
||||
purge "${MC_BUILD_DIR}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
(cd "${MC_BUILD_DIR}" && go build -o "$WORK_DIR/mc")
|
||||
|
||||
# remove mc source.
|
||||
purge "${MC_BUILD_DIR}"
|
||||
|
||||
"${MINIO_OLD[@]}" --address ":$start_port" "${WORK_DIR}/xl{1...16}" > "${WORK_DIR}/server1.log" 2>&1 &
|
||||
pid=$!
|
||||
disown $pid
|
||||
sleep 10
|
||||
|
||||
if ! ps -p ${pid} 1>&2 >/dev/null; then
|
||||
echo "server1 log:"
|
||||
cat "${WORK_DIR}/server1.log"
|
||||
echo "FAILED"
|
||||
purge "$WORK_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
"${WORK_DIR}/mc" mb minio/healing-rewrite-bucket --quiet --with-lock
|
||||
"${WORK_DIR}/mc" cp \
|
||||
buildscripts/verify-build.sh \
|
||||
minio/healing-rewrite-bucket/ \
|
||||
--disable-multipart --quiet
|
||||
|
||||
"${WORK_DIR}/mc" cp \
|
||||
buildscripts/verify-build.sh \
|
||||
minio/healing-rewrite-bucket/ \
|
||||
--disable-multipart --quiet
|
||||
|
||||
"${WORK_DIR}/mc" cp \
|
||||
buildscripts/verify-build.sh \
|
||||
minio/healing-rewrite-bucket/ \
|
||||
--disable-multipart --quiet
|
||||
|
||||
kill ${pid}
|
||||
sleep 3
|
||||
|
||||
"${MINIO[@]}" --address ":$start_port" "${WORK_DIR}/xl{1...16}" > "${WORK_DIR}/server1.log" 2>&1 &
|
||||
pid=$!
|
||||
disown $pid
|
||||
sleep 10
|
||||
|
||||
if ! ps -p ${pid} 1>&2 >/dev/null; then
|
||||
echo "server1 log:"
|
||||
cat "${WORK_DIR}/server1.log"
|
||||
echo "FAILED"
|
||||
purge "$WORK_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
go build ./docs/debugging/s3-check-md5/
|
||||
if ! ./s3-check-md5 \
|
||||
-debug \
|
||||
-versions \
|
||||
-access-key minio \
|
||||
-secret-key minio123 \
|
||||
-endpoint http://127.0.0.1:${start_port}/ 2>&1 | grep INTACT; then
|
||||
echo "server1 log:"
|
||||
cat "${WORK_DIR}/server1.log"
|
||||
echo "FAILED"
|
||||
mkdir -p inspects
|
||||
(cd inspects; "${WORK_DIR}/mc" admin inspect minio/healing-rewrite-bucket/verify-build.sh/**)
|
||||
|
||||
"${WORK_DIR}/mc" mb play/inspects
|
||||
"${WORK_DIR}/mc" mirror inspects play/inspects
|
||||
|
||||
purge "$WORK_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
go run ./buildscripts/heal-manual.go "127.0.0.1:${start_port}" "minio" "minio123"
|
||||
sleep 1
|
||||
|
||||
if ! ./s3-check-md5 \
|
||||
-debug \
|
||||
-versions \
|
||||
-access-key minio \
|
||||
-secret-key minio123 \
|
||||
-endpoint http://127.0.0.1:${start_port}/ 2>&1 | grep INTACT; then
|
||||
echo "server1 log:"
|
||||
cat "${WORK_DIR}/server1.log"
|
||||
echo "FAILED"
|
||||
mkdir -p inspects
|
||||
(cd inspects; "${WORK_DIR}/mc" admin inspect minio/healing-rewrite-bucket/verify-build.sh/**)
|
||||
|
||||
"${WORK_DIR}/mc" mb play/inspects
|
||||
"${WORK_DIR}/mc" mirror inspects play/inspects
|
||||
|
||||
purge "$WORK_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
kill ${pid}
|
||||
}
|
||||
|
||||
function main() {
|
||||
download_old_release
|
||||
|
||||
start_port=$(shuf -i 10000-65000 -n 1)
|
||||
|
||||
verify_rewrite ${start_port}
|
||||
}
|
||||
|
||||
function purge()
|
||||
{
|
||||
rm -rf "$1"
|
||||
}
|
||||
|
||||
( main "$@" )
|
||||
rv=$?
|
||||
purge "$WORK_DIR"
|
||||
exit "$rv"
|
|
@ -243,7 +243,7 @@ func listAllBuckets(ctx context.Context, storageDisks []StorageAPI, healBuckets
|
|||
|
||||
// Only heal on disks where we are sure that healing is needed. We can expand
|
||||
// this list as and when we figure out more errors can be added to this list safely.
|
||||
func shouldHealObjectOnDisk(erErr, dataErr error, meta FileInfo, latestMeta FileInfo) bool {
|
||||
func shouldHealObjectOnDisk(erErr, dataErr error, meta FileInfo, latestMeta FileInfo, doinline bool) bool {
|
||||
switch {
|
||||
case errors.Is(erErr, errFileNotFound) || errors.Is(erErr, errFileVersionNotFound):
|
||||
return true
|
||||
|
@ -256,6 +256,10 @@ func shouldHealObjectOnDisk(erErr, dataErr error, meta FileInfo, latestMeta File
|
|||
// always check first.
|
||||
return true
|
||||
}
|
||||
if doinline {
|
||||
// convert small files to 'inline'
|
||||
return true
|
||||
}
|
||||
if !meta.Deleted && !meta.IsRemote() {
|
||||
// If xl.meta was read fine but there may be problem with the part.N files.
|
||||
if IsErr(dataErr, []error{
|
||||
|
@ -342,6 +346,26 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
|
|||
availableDisks, dataErrs, diskMTime := disksWithAllParts(ctx, onlineDisks, partsMetadata,
|
||||
errs, latestMeta, bucket, object, scanMode)
|
||||
|
||||
var erasure Erasure
|
||||
var recreate bool
|
||||
if !latestMeta.Deleted && !latestMeta.IsRemote() {
|
||||
// Initialize erasure coding
|
||||
erasure, err = NewErasure(ctx, latestMeta.Erasure.DataBlocks,
|
||||
latestMeta.Erasure.ParityBlocks, latestMeta.Erasure.BlockSize)
|
||||
if err != nil {
|
||||
return result, err
|
||||
}
|
||||
|
||||
// Is only 'true' if the opts.Recreate is true and
|
||||
// the object shardSize < smallFileThreshold do not
|
||||
// set this to 'true' arbitrarily and must be only
|
||||
// 'true' with caller ask.
|
||||
recreate = (opts.Recreate &&
|
||||
!latestMeta.InlineData() &&
|
||||
len(latestMeta.Parts) == 1 &&
|
||||
erasure.ShardFileSize(latestMeta.Parts[0].ActualSize) < smallFileThreshold)
|
||||
}
|
||||
|
||||
// Loop to find number of disks with valid data, per-drive
|
||||
// data state and a list of outdated disks on which data needs
|
||||
// to be healed.
|
||||
|
@ -368,7 +392,7 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
|
|||
driveState = madmin.DriveStateCorrupt
|
||||
}
|
||||
|
||||
if shouldHealObjectOnDisk(errs[i], dataErrs[i], partsMetadata[i], latestMeta) {
|
||||
if shouldHealObjectOnDisk(errs[i], dataErrs[i], partsMetadata[i], latestMeta, recreate) {
|
||||
outDatedDisks[i] = storageDisks[i]
|
||||
disksToHealCount++
|
||||
result.Before.Drives = append(result.Before.Drives, madmin.HealDriveInfo{
|
||||
|
@ -422,7 +446,7 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
|
|||
return result, nil
|
||||
}
|
||||
|
||||
if !latestMeta.XLV1 && !latestMeta.Deleted && disksToHealCount > latestMeta.Erasure.ParityBlocks {
|
||||
if !latestMeta.XLV1 && !latestMeta.Deleted && !recreate && disksToHealCount > latestMeta.Erasure.ParityBlocks {
|
||||
// When disk to heal count is greater than parity blocks we should simply error out.
|
||||
err := fmt.Errorf("more disks are expected to heal than parity, returned errors: %v (dataErrs %v) -> %s/%s(%s)", errs, dataErrs, bucket, object, versionID)
|
||||
logger.LogIf(ctx, err)
|
||||
|
@ -478,18 +502,9 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
|
|||
}
|
||||
|
||||
var inlineBuffers []*bytes.Buffer
|
||||
if latestMeta.InlineData() {
|
||||
inlineBuffers = make([]*bytes.Buffer, len(outDatedDisks))
|
||||
}
|
||||
|
||||
if !latestMeta.Deleted && !latestMeta.IsRemote() {
|
||||
// Heal each part. erasureHealFile() will write the healed
|
||||
// part to .minio/tmp/uuid/ which needs to be renamed later to
|
||||
// the final location.
|
||||
erasure, err := NewErasure(ctx, latestMeta.Erasure.DataBlocks,
|
||||
latestMeta.Erasure.ParityBlocks, latestMeta.Erasure.BlockSize)
|
||||
if err != nil {
|
||||
return result, err
|
||||
if latestMeta.InlineData() || recreate {
|
||||
inlineBuffers = make([]*bytes.Buffer, len(outDatedDisks))
|
||||
}
|
||||
|
||||
erasureInfo := latestMeta.Erasure
|
||||
|
@ -525,6 +540,10 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
|
|||
tillOffset, DefaultBitrotAlgorithm, erasure.ShardSize())
|
||||
}
|
||||
}
|
||||
|
||||
// Heal each part. erasure.Heal() will write the healed
|
||||
// part to .minio/tmp/uuid/ which needs to be renamed
|
||||
// later to the final location.
|
||||
err = erasure.Heal(ctx, writers, readers, partSize)
|
||||
closeBitrotReaders(readers)
|
||||
closeBitrotWriters(writers)
|
||||
|
@ -556,6 +575,7 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
|
|||
})
|
||||
if len(inlineBuffers) > 0 && inlineBuffers[i] != nil {
|
||||
partsMetadata[i].Data = inlineBuffers[i].Bytes()
|
||||
partsMetadata[i].SetInlineData()
|
||||
} else {
|
||||
partsMetadata[i].Data = nil
|
||||
}
|
||||
|
@ -587,8 +607,9 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
|
|||
return result, err
|
||||
}
|
||||
|
||||
// Remove any remaining parts from outdated disks from before transition.
|
||||
if partsMetadata[i].IsRemote() {
|
||||
// - Remove any parts from healed disks after its been inlined.
|
||||
// - Remove any remaining parts from outdated disks from before transition.
|
||||
if recreate || partsMetadata[i].IsRemote() {
|
||||
rmDataDir := partsMetadata[i].DataDir
|
||||
disk.DeleteVol(ctx, pathJoin(bucket, encodeDirObject(object), rmDataDir), true)
|
||||
}
|
||||
|
|
|
@ -1769,7 +1769,9 @@ func (z *erasureServerPools) HealBucket(ctx context.Context, bucket string, opts
|
|||
}
|
||||
|
||||
// Attempt heal on the bucket metadata, ignore any failures
|
||||
defer z.HealObject(ctx, minioMetaBucket, pathJoin(bucketMetaPrefix, bucket, bucketMetadataFile), "", opts)
|
||||
hopts := opts
|
||||
hopts.Recreate = false
|
||||
defer z.HealObject(ctx, minioMetaBucket, pathJoin(bucketMetaPrefix, bucket, bucketMetadataFile), "", hopts)
|
||||
|
||||
for _, pool := range z.serverPools {
|
||||
result, err := pool.HealBucket(ctx, bucket, opts)
|
||||
|
|
Loading…
Reference in New Issue