mirror of
https://github.com/minio/minio.git
synced 2025-01-12 15:33:22 -05:00
tooling: Add xlmeta --combine switch that will combine inline data (#17488)
Will combine or write partial data of each version found in the inspect data. Example: ``` > xl-meta -export -combine inspect-data.1228fb52.zip (... metadata json...) } Attempting to combine version "994f1113-da94-4be1-8551-9dbc54b204bc". Read shard 1 Data shards 9 Parity 4 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-01-of-13.data) Read shard 2 Data shards 9 Parity 4 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-02-of-13.data) Read shard 3 Data shards 9 Parity 4 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-03-of-13.data) Read shard 4 Data shards 9 Parity 4 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-04-of-13.data) Read shard 6 Data shards 9 Parity 4 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-06-of-13.data) Read shard 7 Data shards 9 Parity 4 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-07-of-13.data) Read shard 8 Data shards 8 Parity 5 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-08-of-13.data) Read shard 9 Data shards 8 Parity 5 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-09-of-13.data) Read shard 10 Data shards 8 Parity 5 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-10-of-13.data) Read shard 11 Data shards 8 Parity 5 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-11-of-13.data) Read shard 13 Data shards 8 Parity 5 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-13-of-13.data) Attempting to reconstruct using parity sets: * Setup: Data shards: 9 - Parity blocks: 6 Have 6 complete remapped data shards and 6 complete parity shards. Could NOT reconstruct: too few shards given * Setup: Data shards: 8 - Parity blocks: 5 Have 5 complete remapped data shards and 5 complete parity shards. Could reconstruct completely 0 bytes missing. Truncating 0 from the end. Wrote output to 994f1113-da94-4be1-8551-9dbc54b204bc.complete ``` So far only inline data, but no real reason that external data can't also be included with some handling of blocks. Supports only unencrypted data.
This commit is contained in:
parent
74759b05a5
commit
15daa2e74a
@ -27,11 +27,15 @@ import (
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/klauspost/compress/zip"
|
||||
"github.com/klauspost/filepathx"
|
||||
"github.com/klauspost/reedsolomon"
|
||||
"github.com/minio/cli"
|
||||
"github.com/tinylib/msgp/msgp"
|
||||
)
|
||||
@ -74,10 +78,21 @@ FLAGS:
|
||||
Usage: "export inline data",
|
||||
Name: "export",
|
||||
},
|
||||
cli.BoolFlag{
|
||||
Usage: "combine inline data",
|
||||
Name: "combine",
|
||||
},
|
||||
}
|
||||
|
||||
app.Action = func(c *cli.Context) error {
|
||||
ndjson := c.Bool("ndjson")
|
||||
if c.Bool("data") && c.Bool("combine") {
|
||||
return errors.New("cannot combine --data and --combine")
|
||||
}
|
||||
// file / version / file
|
||||
filemap := make(map[string]map[string]string)
|
||||
// versionID ->
|
||||
combineFiles := make(map[string][]string)
|
||||
decode := func(r io.Reader, file string) ([]byte, error) {
|
||||
b, err := io.ReadAll(r)
|
||||
if err != nil {
|
||||
@ -87,7 +102,7 @@ FLAGS:
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
filemap[file] = make(map[string]string)
|
||||
buf := bytes.NewBuffer(nil)
|
||||
var data xlMetaInlineData
|
||||
switch minor {
|
||||
@ -149,6 +164,21 @@ FLAGS:
|
||||
Header: b,
|
||||
Metadata: buf.Bytes(),
|
||||
}
|
||||
type erasureInfo struct {
|
||||
V2Obj *struct {
|
||||
EcDist []int
|
||||
EcIndex int
|
||||
EcM int
|
||||
EcN int
|
||||
}
|
||||
}
|
||||
var ei erasureInfo
|
||||
if err := json.Unmarshal(buf.Bytes(), &ei); err == nil && ei.V2Obj != nil {
|
||||
verID := uuid.UUID(header.VersionID).String()
|
||||
idx := ei.V2Obj.EcIndex
|
||||
filemap[file][verID] = fmt.Sprintf("%s/shard-%02d-of-%02d", verID, idx, ei.V2Obj.EcN+ei.V2Obj.EcM)
|
||||
filemap[file][verID+".json"] = buf.String()
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
@ -173,22 +203,39 @@ FLAGS:
|
||||
buf = bytes.NewBuffer(b)
|
||||
}
|
||||
if c.Bool("export") {
|
||||
file := strings.Map(func(r rune) rune {
|
||||
switch {
|
||||
case r >= 'a' && r <= 'z':
|
||||
return r
|
||||
case r >= 'A' && r <= 'Z':
|
||||
return r
|
||||
case r >= '0' && r <= '9':
|
||||
return r
|
||||
case strings.ContainsAny(string(r), "+=-_()!@."):
|
||||
return r
|
||||
default:
|
||||
return '_'
|
||||
}
|
||||
}, file)
|
||||
file := file
|
||||
if !c.Bool("combine") {
|
||||
file = strings.Map(func(r rune) rune {
|
||||
switch {
|
||||
case r >= 'a' && r <= 'z':
|
||||
return r
|
||||
case r >= 'A' && r <= 'Z':
|
||||
return r
|
||||
case r >= '0' && r <= '9':
|
||||
return r
|
||||
case strings.ContainsAny(string(r), "+=-_()!@."):
|
||||
return r
|
||||
default:
|
||||
return '_'
|
||||
}
|
||||
}, file)
|
||||
}
|
||||
err := data.files(func(name string, data []byte) {
|
||||
err = os.WriteFile(fmt.Sprintf("%s-%s.data", file, name), data, os.ModePerm)
|
||||
fn := fmt.Sprintf("%s-%s.data", file, name)
|
||||
if c.Bool("combine") {
|
||||
f := filemap[file][name]
|
||||
if f != "" {
|
||||
fn = f + ".data"
|
||||
os.MkdirAll(filepath.Dir(fn), os.ModePerm)
|
||||
err = os.WriteFile(fn+".json", []byte(filemap[file][name+".json"]), os.ModePerm)
|
||||
combineFiles[name] = append(combineFiles[name], fn)
|
||||
if err != nil {
|
||||
fmt.Println("ERR:", err)
|
||||
}
|
||||
_ = os.WriteFile(filepath.Dir(fn)+"/filename.txt", []byte(file), os.ModePerm)
|
||||
}
|
||||
}
|
||||
err = os.WriteFile(fn, data, os.ModePerm)
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
}
|
||||
@ -312,6 +359,13 @@ FLAGS:
|
||||
if multiple {
|
||||
fmt.Println("}")
|
||||
}
|
||||
if len(combineFiles) > 0 {
|
||||
for k, v := range combineFiles {
|
||||
if err := combine(v, k); err != nil {
|
||||
fmt.Println("ERROR:", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@ -587,3 +641,154 @@ func (z xlMetaV2VersionHeaderV2) MarshalJSON() (o []byte, err error) {
|
||||
}
|
||||
return json.Marshal(tmp)
|
||||
}
|
||||
|
||||
func combine(files []string, out string) error {
|
||||
sort.Strings(files)
|
||||
var size, shards, data, parity int
|
||||
mapped := make([]byte, size)
|
||||
filled := make([]byte, size)
|
||||
parityData := make(map[int]map[int][]byte)
|
||||
fmt.Printf("Attempting to combine version %q.\n", out)
|
||||
for _, file := range files {
|
||||
b, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
meta, err := os.ReadFile(file + ".json")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
type erasureInfo struct {
|
||||
V2Obj *struct {
|
||||
EcDist []int
|
||||
EcIndex int
|
||||
EcM int
|
||||
EcN int
|
||||
Size int
|
||||
}
|
||||
}
|
||||
var ei erasureInfo
|
||||
var idx int
|
||||
if err := json.Unmarshal(meta, &ei); err == nil && ei.V2Obj != nil {
|
||||
if size == 0 {
|
||||
size = ei.V2Obj.Size
|
||||
mapped = make([]byte, size)
|
||||
filled = make([]byte, size)
|
||||
}
|
||||
data = ei.V2Obj.EcM
|
||||
parity = ei.V2Obj.EcN
|
||||
if shards == 0 {
|
||||
shards = data + parity
|
||||
}
|
||||
idx = ei.V2Obj.EcIndex - 1
|
||||
fmt.Println("Read shard", ei.V2Obj.EcIndex, "Data shards", data, "Parity", parity, fmt.Sprintf("(%s)", file))
|
||||
if ei.V2Obj.Size != size {
|
||||
return fmt.Errorf("size mismatch. Meta size: %d", ei.V2Obj.Size)
|
||||
}
|
||||
} else {
|
||||
return err
|
||||
}
|
||||
if len(b) < 32 {
|
||||
return fmt.Errorf("file %s too short", file)
|
||||
}
|
||||
// Trim hash. Fine for inline data, since only one block.
|
||||
b = b[32:]
|
||||
|
||||
set := parityData[data]
|
||||
if set == nil {
|
||||
set = make(map[int][]byte)
|
||||
}
|
||||
set[idx] = b
|
||||
parityData[data] = set
|
||||
|
||||
// Combine
|
||||
start := len(b) * idx
|
||||
if start >= len(mapped) {
|
||||
continue
|
||||
}
|
||||
copy(mapped[start:], b)
|
||||
for j := range b {
|
||||
if j+start >= len(filled) {
|
||||
break
|
||||
}
|
||||
filled[j+start] = 1
|
||||
}
|
||||
}
|
||||
|
||||
lastValid := 0
|
||||
missing := 0
|
||||
for i := range filled {
|
||||
if filled[i] == 1 {
|
||||
lastValid = i
|
||||
} else {
|
||||
missing++
|
||||
}
|
||||
}
|
||||
if missing > 0 && len(parityData) > 0 {
|
||||
fmt.Println("Attempting to reconstruct using parity sets:")
|
||||
for k, v := range parityData {
|
||||
if missing == 0 {
|
||||
break
|
||||
}
|
||||
fmt.Println("* Setup: Data shards:", k, "- Parity blocks:", len(v))
|
||||
rs, err := reedsolomon.New(k, shards-k)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
split, err := rs.Split(mapped)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
splitFilled, err := rs.Split(filled)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ok := len(splitFilled)
|
||||
for i, sh := range splitFilled {
|
||||
for _, v := range sh {
|
||||
if v == 0 {
|
||||
split[i] = nil
|
||||
ok--
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
hasParity := 0
|
||||
for idx, sh := range v {
|
||||
split[idx] = sh
|
||||
if idx >= k && len(v) > 0 {
|
||||
hasParity++
|
||||
}
|
||||
}
|
||||
fmt.Printf("Have %d complete remapped data shards and %d complete parity shards. ", ok, hasParity)
|
||||
|
||||
if err := rs.ReconstructData(split); err == nil {
|
||||
fmt.Println("Could reconstruct completely")
|
||||
for i, data := range split[:k] {
|
||||
start := i * len(data)
|
||||
copy(mapped[start:], data)
|
||||
}
|
||||
lastValid = size - 1
|
||||
missing = 0
|
||||
} else {
|
||||
fmt.Println("Could NOT reconstruct:", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
if lastValid == 0 {
|
||||
return errors.New("no valid data found")
|
||||
}
|
||||
if missing > 0 {
|
||||
out += ".truncated"
|
||||
} else {
|
||||
out += ".complete"
|
||||
}
|
||||
fmt.Println(missing, "bytes missing. Truncating", len(filled)-lastValid-1, "from end.")
|
||||
mapped = mapped[:lastValid+1]
|
||||
err := os.WriteFile(out, mapped, os.ModePerm)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Println("Wrote output to", out)
|
||||
return nil
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user