Fix inconsistently written compressed files. (#20846)

Before https://github.com/minio/minio/pull/20575, files could pick up indices 
from unrelated files if no index was added.

This would result in these files not being consistent across a set.

When loading, search for the compression indicators and check if they 
are within the problematic date range, and clean up any parts that have 
an index but shouldn't.

The test validates that the signature matches the one in files stored without an index.

Bumps xlMetaVersion, so this check doesn't have to be made for future versions.
This commit is contained in:
Klaus Post 2025-01-17 11:17:18 -08:00 committed by GitHub
parent b4ac53d157
commit b9196757fd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 70 additions and 3 deletions

View File

@ -462,7 +462,7 @@ func (j *xlMetaV2Version) ToFileInfo(volume, path string, allParts bool) (fi Fil
const (
xlHeaderVersion = 3
xlMetaVersion = 2
xlMetaVersion = 3
)
func (j xlMetaV2DeleteMarker) ToFileInfo(volume, path string) (FileInfo, error) {
@ -969,6 +969,50 @@ func (x *xlMetaV2) loadIndexed(buf xlMetaBuf, data xlMetaInlineData) error {
}
ver.meta = meta
// Fix inconsistent compression index due to https://github.com/minio/minio/pull/20575
// First search marshaled content for encoded values.
// We have bumped metaV to make this check cheaper.
if metaV < 3 && ver.header.Type == ObjectType && bytes.Contains(meta, []byte("\xa7PartIdx")) &&
bytes.Contains(meta, []byte("\xbcX-Minio-Internal-compression\xc4\x15klauspost/compress/s2")) {
// Likely candidate...
version, err := x.getIdx(i)
if err == nil {
// Check write date...
// RELEASE.2023-12-02T10-51-33Z -> RELEASE.2024-10-29T16-01-48Z
const dateStart = 1701471618
const dateEnd = 1730156418
if version.WrittenByVersion > dateStart && version.WrittenByVersion < dateEnd &&
version.ObjectV2 != nil && len(version.ObjectV2.PartIndices) > 0 {
var changed bool
clearField := true
for i, sz := range version.ObjectV2.PartActualSizes {
if len(version.ObjectV2.PartIndices) > i {
// 8<<20 is current 'compMinIndexSize', but we detach it in case it should change in the future.
if sz <= 8<<20 && len(version.ObjectV2.PartIndices[i]) > 0 {
changed = true
version.ObjectV2.PartIndices[i] = nil
}
clearField = clearField && len(version.ObjectV2.PartIndices[i]) == 0
}
}
if changed {
// All empty, clear.
if clearField {
version.ObjectV2.PartIndices = nil
}
// Reindex since it was changed.
meta, err := version.MarshalMsg(make([]byte, 0, len(ver.meta)+10))
if err == nil {
// Override both if fine.
ver.header = version.header()
ver.meta = meta
}
}
}
}
}
// Fix inconsistent x-minio-internal-replication-timestamp by loading and reindexing.
if metaV < 2 && ver.header.Type == DeleteType {
// load (and convert) version.
@ -1631,7 +1675,7 @@ func (x *xlMetaV2) AddVersion(fi FileInfo) error {
}
ventry.ObjectV2.PartNumbers[i] = fi.Parts[i].Number
ventry.ObjectV2.PartActualSizes[i] = fi.Parts[i].ActualSize
if len(ventry.ObjectV2.PartIndices) > 0 {
if len(ventry.ObjectV2.PartIndices) > i {
ventry.ObjectV2.PartIndices[i] = fi.Parts[i].Index
}
}

View File

@ -23,6 +23,7 @@ import (
"compress/gzip"
"context"
"encoding/base64"
"encoding/binary"
"encoding/json"
"fmt"
"io"
@ -583,6 +584,28 @@ func Test_xlMetaV2Shallow_Load(t *testing.T) {
}
// t.Logf("data := %#v\n", data)
})
// Test compressed index consistency fix
t.Run("comp-index", func(t *testing.T) {
// This file has a compressed index, due to https://github.com/minio/minio/pull/20575
// We ensure it is rewritten without an index.
// We compare this against the signature of the files stored without a version.
data, err := base64.StdEncoding.DecodeString(`WEwyIAEAAwDGAAACKgMCAcQml8QQAAAAAAAAAAAAAAAAAAAAANMYGu+UIK7akcQEofwXhAECCAjFAfyDpFR5cGUBpVYyT2Jq3gASoklExBAAAAAAAAAAAAAAAAAAAAAApEREaXLEEFTyKFqhkkXVoWn+8R1Lr2ymRWNBbGdvAaNFY00Io0VjTginRWNCU2l6ZdIAEAAAp0VjSW5kZXgBpkVjRGlzdNwAEAECAwQFBgcICQoLDA0ODxCoQ1N1bUFsZ28BqFBhcnROdW1zkgECqVBhcnRFVGFnc8CpUGFydFNpemVzktIAFtgq0gAGvb+qUGFydEFTaXplc5LSAFKb69IAGZg0p1BhcnRJZHiSxFqKm+4h9J7JCYCAgAFEABSPlBzH5g6z9gah3wOPnwLDlAGeD+os0xbjFd8O8w+TBoM8rz6bHO0KzQWtBu4GwgGSBocH6QPUSu8J5A/8gwSWtQPOtgL0euoMmAPEAKRTaXpl0gAdlemlTVRpbWXTGBrvlCCu2pGnTWV0YVN5c4K8WC1NaW5pby1JbnRlcm5hbC1hY3R1YWwtc2l6ZcQHNzA5MTIzMbxYLU1pbmlvLUludGVybmFsLWNvbXByZXNzaW9uxBVrbGF1c3Bvc3QvY29tcHJlc3MvczKnTWV0YVVzcoKsY29udGVudC10eXBlqHRleHQvY3N2pGV0YWfZIjEzYmYyMDU0NGVjN2VmY2YxNzhiYWRmNjc4NzNjODg2LTKhds5mYYMqzv8Vdtk=`)
if err != nil {
t.Fatal(err)
}
var xl xlMetaV2
err = xl.Load(data)
if err != nil {
t.Fatal(err)
}
for _, v := range xl.versions {
// Signature should match
if binary.BigEndian.Uint32(v.header.Signature[:]) != 0x8e5a6406 {
t.Log(v.header.String())
t.Fatalf("invalid signature 0x%x", binary.BigEndian.Uint32(v.header.Signature[:]))
}
}
})
}
func Test_xlMetaV2Shallow_LoadTimeStamp(t *testing.T) {

View File

@ -664,7 +664,7 @@ func (x xlMetaInlineData) files(fn func(name string, data []byte)) error {
const (
xlHeaderVersion = 3
xlMetaVersion = 2
xlMetaVersion = 3
)
type xlHeaders struct {