// Copyright (c) 2015-2021 MinIO, Inc. // // This file is part of MinIO Object Storage stack // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU Affero General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Affero General Public License for more details. // // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . package cmd import ( "context" "encoding/binary" "errors" "fmt" "io" "os" "path/filepath" "sync" "time" "github.com/minio/minio/internal/logger" ) //go:generate msgp -file $GOFILE -unexported //msgp:ignore TierJournal tierDiskJournal walkfn type tierDiskJournal struct { sync.RWMutex diskPath string file *os.File // active journal file } // TierJournal holds an in-memory and an on-disk delete journal of tiered content. type TierJournal struct { *tierDiskJournal // for processing legacy journal entries *tierMemJournal // for processing new journal entries } type jentry struct { ObjName string `msg:"obj"` VersionID string `msg:"vid"` TierName string `msg:"tier"` } const ( tierJournalVersion = 1 tierJournalHdrLen = 2 // 2 bytes ) var errUnsupportedJournalVersion = errors.New("unsupported pending deletes journal version") func newTierDiskJournal() *tierDiskJournal { return &tierDiskJournal{} } // NewTierJournal initializes tier deletion journal func NewTierJournal() *TierJournal { j := &TierJournal{ tierMemJournal: newTierMemJournal(1000), tierDiskJournal: newTierDiskJournal(), } return j } // Init initializes an in-memory journal built using a // buffered channel for new journal entries. It also initializes the on-disk // journal only to process existing journal entries made from previous versions. func (t *TierJournal) Init(ctx context.Context) error { for _, diskPath := range globalEndpoints.LocalDisksPaths() { t.diskPath = diskPath go t.deletePending(ctx) // for existing journal entries from previous MinIO versions go t.processEntries(ctx) // for newer journal entries circa free-versions return nil } return errors.New("no local drive found") } // rotate rotates the journal. If a read-only journal already exists it does // nothing. Otherwise renames the active journal to a read-only journal and // opens a new active journal. func (jd *tierDiskJournal) rotate() error { // Do nothing if a read-only journal file already exists. if _, err := os.Stat(jd.ReadOnlyPath()); err == nil { return nil } // Close the active journal if present and delete it. return jd.Close() } type walkFn func(ctx context.Context, objName, rvID, tierName string) error func (jd *tierDiskJournal) ReadOnlyPath() string { return filepath.Join(jd.diskPath, minioMetaBucket, "ilm", "deletion-journal.ro.bin") } func (jd *tierDiskJournal) JournalPath() string { return filepath.Join(jd.diskPath, minioMetaBucket, "ilm", "deletion-journal.bin") } func (jd *tierDiskJournal) WalkEntries(ctx context.Context, fn walkFn) { if err := jd.rotate(); err != nil { logger.LogIf(ctx, fmt.Errorf("tier-journal: failed to rotate pending deletes journal %s", err)) return } ro, err := jd.OpenRO() switch { case errors.Is(err, os.ErrNotExist): return // No read-only journal to process; nothing to do. case err != nil: logger.LogIf(ctx, fmt.Errorf("tier-journal: failed open read-only journal for processing %s", err)) return } defer ro.Close() mr := msgpNewReader(ro) defer readMsgpReaderPoolPut(mr) done := false for { var entry jentry err := entry.DecodeMsg(mr) if errors.Is(err, io.EOF) { done = true break } if err != nil { logger.LogIf(ctx, fmt.Errorf("tier-journal: failed to decode journal entry %s", err)) break } err = fn(ctx, entry.ObjName, entry.VersionID, entry.TierName) if err != nil && !isErrObjectNotFound(err) { logger.LogIf(ctx, fmt.Errorf("tier-journal: failed to delete transitioned object %s from %s due to %s", entry.ObjName, entry.TierName, err)) // We add the entry into the active journal to try again // later. jd.addEntry(entry) } } if done { os.Remove(jd.ReadOnlyPath()) } } func deleteObjectFromRemoteTier(ctx context.Context, objName, rvID, tierName string) error { w, err := globalTierConfigMgr.getDriver(tierName) if err != nil { return err } err = w.Remove(ctx, objName, remoteVersionID(rvID)) if err != nil { return err } return nil } func (jd *tierDiskJournal) deletePending(ctx context.Context) { ticker := time.NewTicker(30 * time.Minute) defer ticker.Stop() for { select { case <-ticker.C: jd.WalkEntries(ctx, deleteObjectFromRemoteTier) case <-ctx.Done(): jd.Close() return } } } func (jd *tierDiskJournal) addEntry(je jentry) error { // Open journal if it hasn't been err := jd.Open() if err != nil { return err } b, err := je.MarshalMsg(nil) if err != nil { return err } jd.Lock() defer jd.Unlock() _, err = jd.file.Write(b) if err != nil { // Do not leak fd here, close the file properly. Fdatasync(jd.file) _ = jd.file.Close() jd.file = nil // reset to allow subsequent reopen when file/disk is available. } return err } // Close closes the active journal and renames it to read-only for pending // deletes processing. Note: calling Close on a closed journal is a no-op. func (jd *tierDiskJournal) Close() error { jd.Lock() defer jd.Unlock() if jd.file == nil { // already closed return nil } var ( f *os.File fi os.FileInfo err error ) // Setting j.file to nil f, jd.file = jd.file, f if fi, err = f.Stat(); err != nil { return err } f.Close() // close before rename() // Skip renaming active journal if empty. if fi.Size() == tierJournalHdrLen { return os.Remove(jd.JournalPath()) } jPath := jd.JournalPath() jroPath := jd.ReadOnlyPath() // Rotate active journal to perform pending deletes. return os.Rename(jPath, jroPath) } // Open opens a new active journal. Note: calling Open on an opened journal is a // no-op. func (jd *tierDiskJournal) Open() error { jd.Lock() defer jd.Unlock() if jd.file != nil { // already open return nil } var err error jd.file, err = OpenFile(jd.JournalPath(), os.O_APPEND|os.O_CREATE|os.O_WRONLY|writeMode, 0o666) if err != nil { return err } // write journal version header if active journal is empty fi, err := jd.file.Stat() if err != nil { return err } if fi.Size() == 0 { var data [tierJournalHdrLen]byte binary.LittleEndian.PutUint16(data[:], tierJournalVersion) _, err = jd.file.Write(data[:]) if err != nil { return err } } return nil } func (jd *tierDiskJournal) OpenRO() (io.ReadCloser, error) { file, err := Open(jd.ReadOnlyPath()) if err != nil { return nil, err } // read journal version header var data [tierJournalHdrLen]byte if _, err := io.ReadFull(file, data[:]); err != nil { return nil, err } switch binary.LittleEndian.Uint16(data[:]) { case tierJournalVersion: return file, nil default: return nil, errUnsupportedJournalVersion } } // jentryV1 represents the entry in the journal before RemoteVersionID was // added. It remains here for use in tests for the struct element addition. type jentryV1 struct { ObjName string `msg:"obj"` TierName string `msg:"tier"` }