mirror of https://github.com/minio/minio.git
296 lines
7.4 KiB
Go
296 lines
7.4 KiB
Go
// Copyright (c) 2015-2021 MinIO, Inc.
|
|
//
|
|
// This file is part of MinIO Object Storage stack
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Affero General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
package cmd
|
|
|
|
import (
|
|
"context"
|
|
"encoding/binary"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"path/filepath"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/minio/minio/internal/logger"
|
|
)
|
|
|
|
//go:generate msgp -file $GOFILE -unexported
|
|
//msgp:ignore TierJournal tierDiskJournal walkfn
|
|
|
|
type tierDiskJournal struct {
|
|
sync.RWMutex
|
|
diskPath string
|
|
file *os.File // active journal file
|
|
}
|
|
|
|
// TierJournal holds an in-memory and an on-disk delete journal of tiered content.
|
|
type TierJournal struct {
|
|
*tierDiskJournal // for processing legacy journal entries
|
|
*tierMemJournal // for processing new journal entries
|
|
}
|
|
|
|
type jentry struct {
|
|
ObjName string `msg:"obj"`
|
|
VersionID string `msg:"vid"`
|
|
TierName string `msg:"tier"`
|
|
}
|
|
|
|
const (
|
|
tierJournalVersion = 1
|
|
tierJournalHdrLen = 2 // 2 bytes
|
|
)
|
|
|
|
var errUnsupportedJournalVersion = errors.New("unsupported pending deletes journal version")
|
|
|
|
func newTierDiskJournal() *tierDiskJournal {
|
|
return &tierDiskJournal{}
|
|
}
|
|
|
|
// NewTierJournal initializes tier deletion journal
|
|
func NewTierJournal() *TierJournal {
|
|
j := &TierJournal{
|
|
tierMemJournal: newTierMemJournal(1000),
|
|
tierDiskJournal: newTierDiskJournal(),
|
|
}
|
|
return j
|
|
}
|
|
|
|
// Init intializes an in-memory journal built using a
|
|
// buffered channel for new journal entries. It also initializes the on-disk
|
|
// journal only to process existing journal entries made from previous versions.
|
|
func (t *TierJournal) Init(ctx context.Context) error {
|
|
for _, diskPath := range globalEndpoints.LocalDisksPaths() {
|
|
t.diskPath = diskPath
|
|
|
|
go t.deletePending(ctx) // for existing journal entries from previous MinIO versions
|
|
go t.processEntries(ctx) // for newer journal entries circa free-versions
|
|
return nil
|
|
}
|
|
|
|
return errors.New("no local drive found")
|
|
}
|
|
|
|
// rotate rotates the journal. If a read-only journal already exists it does
|
|
// nothing. Otherwise renames the active journal to a read-only journal and
|
|
// opens a new active journal.
|
|
func (jd *tierDiskJournal) rotate() error {
|
|
// Do nothing if a read-only journal file already exists.
|
|
if _, err := os.Stat(jd.ReadOnlyPath()); err == nil {
|
|
return nil
|
|
}
|
|
// Close the active journal if present and delete it.
|
|
return jd.Close()
|
|
}
|
|
|
|
type walkFn func(ctx context.Context, objName, rvID, tierName string) error
|
|
|
|
func (jd *tierDiskJournal) ReadOnlyPath() string {
|
|
return filepath.Join(jd.diskPath, minioMetaBucket, "ilm", "deletion-journal.ro.bin")
|
|
}
|
|
|
|
func (jd *tierDiskJournal) JournalPath() string {
|
|
return filepath.Join(jd.diskPath, minioMetaBucket, "ilm", "deletion-journal.bin")
|
|
}
|
|
|
|
func (jd *tierDiskJournal) WalkEntries(ctx context.Context, fn walkFn) {
|
|
if err := jd.rotate(); err != nil {
|
|
logger.LogIf(ctx, fmt.Errorf("tier-journal: failed to rotate pending deletes journal %s", err))
|
|
return
|
|
}
|
|
|
|
ro, err := jd.OpenRO()
|
|
switch {
|
|
case errors.Is(err, os.ErrNotExist):
|
|
return // No read-only journal to process; nothing to do.
|
|
case err != nil:
|
|
logger.LogIf(ctx, fmt.Errorf("tier-journal: failed open read-only journal for processing %s", err))
|
|
return
|
|
}
|
|
defer ro.Close()
|
|
mr := msgpNewReader(ro)
|
|
defer readMsgpReaderPoolPut(mr)
|
|
|
|
done := false
|
|
for {
|
|
var entry jentry
|
|
err := entry.DecodeMsg(mr)
|
|
if errors.Is(err, io.EOF) {
|
|
done = true
|
|
break
|
|
}
|
|
if err != nil {
|
|
logger.LogIf(ctx, fmt.Errorf("tier-journal: failed to decode journal entry %s", err))
|
|
break
|
|
}
|
|
err = fn(ctx, entry.ObjName, entry.VersionID, entry.TierName)
|
|
if err != nil && !isErrObjectNotFound(err) {
|
|
logger.LogIf(ctx, fmt.Errorf("tier-journal: failed to delete transitioned object %s from %s due to %s", entry.ObjName, entry.TierName, err))
|
|
// We add the entry into the active journal to try again
|
|
// later.
|
|
jd.addEntry(entry)
|
|
}
|
|
}
|
|
if done {
|
|
os.Remove(jd.ReadOnlyPath())
|
|
}
|
|
}
|
|
|
|
func deleteObjectFromRemoteTier(ctx context.Context, objName, rvID, tierName string) error {
|
|
w, err := globalTierConfigMgr.getDriver(tierName)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
err = w.Remove(ctx, objName, remoteVersionID(rvID))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (jd *tierDiskJournal) deletePending(ctx context.Context) {
|
|
ticker := time.NewTicker(30 * time.Minute)
|
|
defer ticker.Stop()
|
|
for {
|
|
select {
|
|
case <-ticker.C:
|
|
jd.WalkEntries(ctx, deleteObjectFromRemoteTier)
|
|
|
|
case <-ctx.Done():
|
|
jd.Close()
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func (jd *tierDiskJournal) addEntry(je jentry) error {
|
|
// Open journal if it hasn't been
|
|
err := jd.Open()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
b, err := je.MarshalMsg(nil)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
jd.Lock()
|
|
defer jd.Unlock()
|
|
_, err = jd.file.Write(b)
|
|
if err != nil {
|
|
// Do not leak fd here, close the file properly.
|
|
Fdatasync(jd.file)
|
|
_ = jd.file.Close()
|
|
|
|
jd.file = nil // reset to allow subsequent reopen when file/disk is available.
|
|
}
|
|
return err
|
|
}
|
|
|
|
// Close closes the active journal and renames it to read-only for pending
|
|
// deletes processing. Note: calling Close on a closed journal is a no-op.
|
|
func (jd *tierDiskJournal) Close() error {
|
|
jd.Lock()
|
|
defer jd.Unlock()
|
|
if jd.file == nil { // already closed
|
|
return nil
|
|
}
|
|
|
|
var (
|
|
f *os.File
|
|
fi os.FileInfo
|
|
err error
|
|
)
|
|
// Setting j.file to nil
|
|
f, jd.file = jd.file, f
|
|
if fi, err = f.Stat(); err != nil {
|
|
return err
|
|
}
|
|
f.Close() // close before rename()
|
|
|
|
// Skip renaming active journal if empty.
|
|
if fi.Size() == tierJournalHdrLen {
|
|
return os.Remove(jd.JournalPath())
|
|
}
|
|
|
|
jPath := jd.JournalPath()
|
|
jroPath := jd.ReadOnlyPath()
|
|
// Rotate active journal to perform pending deletes.
|
|
return os.Rename(jPath, jroPath)
|
|
}
|
|
|
|
// Open opens a new active journal. Note: calling Open on an opened journal is a
|
|
// no-op.
|
|
func (jd *tierDiskJournal) Open() error {
|
|
jd.Lock()
|
|
defer jd.Unlock()
|
|
if jd.file != nil { // already open
|
|
return nil
|
|
}
|
|
|
|
var err error
|
|
jd.file, err = OpenFile(jd.JournalPath(), os.O_APPEND|os.O_CREATE|os.O_WRONLY|writeMode, 0o666)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// write journal version header if active journal is empty
|
|
fi, err := jd.file.Stat()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if fi.Size() == 0 {
|
|
var data [tierJournalHdrLen]byte
|
|
binary.LittleEndian.PutUint16(data[:], tierJournalVersion)
|
|
_, err = jd.file.Write(data[:])
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (jd *tierDiskJournal) OpenRO() (io.ReadCloser, error) {
|
|
file, err := Open(jd.ReadOnlyPath())
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// read journal version header
|
|
var data [tierJournalHdrLen]byte
|
|
if _, err := io.ReadFull(file, data[:]); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
switch binary.LittleEndian.Uint16(data[:]) {
|
|
case tierJournalVersion:
|
|
return file, nil
|
|
default:
|
|
return nil, errUnsupportedJournalVersion
|
|
}
|
|
}
|
|
|
|
// jentryV1 represents the entry in the journal before RemoteVersionID was
|
|
// added. It remains here for use in tests for the struct element addition.
|
|
type jentryV1 struct {
|
|
ObjName string `msg:"obj"`
|
|
TierName string `msg:"tier"`
|
|
}
|