From 15daa2e74add96b7f6b605f4e57801dd5b2f0580 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Thu, 22 Jun 2023 12:41:24 -0700 Subject: [PATCH] tooling: Add xlmeta --combine switch that will combine inline data (#17488) Will combine or write partial data of each version found in the inspect data. Example: ``` > xl-meta -export -combine inspect-data.1228fb52.zip (... metadata json...) } Attempting to combine version "994f1113-da94-4be1-8551-9dbc54b204bc". Read shard 1 Data shards 9 Parity 4 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-01-of-13.data) Read shard 2 Data shards 9 Parity 4 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-02-of-13.data) Read shard 3 Data shards 9 Parity 4 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-03-of-13.data) Read shard 4 Data shards 9 Parity 4 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-04-of-13.data) Read shard 6 Data shards 9 Parity 4 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-06-of-13.data) Read shard 7 Data shards 9 Parity 4 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-07-of-13.data) Read shard 8 Data shards 8 Parity 5 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-08-of-13.data) Read shard 9 Data shards 8 Parity 5 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-09-of-13.data) Read shard 10 Data shards 8 Parity 5 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-10-of-13.data) Read shard 11 Data shards 8 Parity 5 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-11-of-13.data) Read shard 13 Data shards 8 Parity 5 (994f1113-da94-4be1-8551-9dbc54b204bc/shard-13-of-13.data) Attempting to reconstruct using parity sets: * Setup: Data shards: 9 - Parity blocks: 6 Have 6 complete remapped data shards and 6 complete parity shards. Could NOT reconstruct: too few shards given * Setup: Data shards: 8 - Parity blocks: 5 Have 5 complete remapped data shards and 5 complete parity shards. Could reconstruct completely 0 bytes missing. Truncating 0 from the end. Wrote output to 994f1113-da94-4be1-8551-9dbc54b204bc.complete ``` So far only inline data, but no real reason that external data can't also be included with some handling of blocks. Supports only unencrypted data. --- docs/debugging/xl-meta/main.go | 237 ++++++++++++++++++++++++++++++--- 1 file changed, 221 insertions(+), 16 deletions(-) diff --git a/docs/debugging/xl-meta/main.go b/docs/debugging/xl-meta/main.go index 4ed617773..70f137e39 100644 --- a/docs/debugging/xl-meta/main.go +++ b/docs/debugging/xl-meta/main.go @@ -27,11 +27,15 @@ import ( "io" "log" "os" + "path/filepath" + "sort" "strings" "time" + "github.com/google/uuid" "github.com/klauspost/compress/zip" "github.com/klauspost/filepathx" + "github.com/klauspost/reedsolomon" "github.com/minio/cli" "github.com/tinylib/msgp/msgp" ) @@ -74,10 +78,21 @@ FLAGS: Usage: "export inline data", Name: "export", }, + cli.BoolFlag{ + Usage: "combine inline data", + Name: "combine", + }, } app.Action = func(c *cli.Context) error { ndjson := c.Bool("ndjson") + if c.Bool("data") && c.Bool("combine") { + return errors.New("cannot combine --data and --combine") + } + // file / version / file + filemap := make(map[string]map[string]string) + // versionID -> + combineFiles := make(map[string][]string) decode := func(r io.Reader, file string) ([]byte, error) { b, err := io.ReadAll(r) if err != nil { @@ -87,7 +102,7 @@ FLAGS: if err != nil { return nil, err } - + filemap[file] = make(map[string]string) buf := bytes.NewBuffer(nil) var data xlMetaInlineData switch minor { @@ -149,6 +164,21 @@ FLAGS: Header: b, Metadata: buf.Bytes(), } + type erasureInfo struct { + V2Obj *struct { + EcDist []int + EcIndex int + EcM int + EcN int + } + } + var ei erasureInfo + if err := json.Unmarshal(buf.Bytes(), &ei); err == nil && ei.V2Obj != nil { + verID := uuid.UUID(header.VersionID).String() + idx := ei.V2Obj.EcIndex + filemap[file][verID] = fmt.Sprintf("%s/shard-%02d-of-%02d", verID, idx, ei.V2Obj.EcN+ei.V2Obj.EcM) + filemap[file][verID+".json"] = buf.String() + } return nil }) if err != nil { @@ -173,22 +203,39 @@ FLAGS: buf = bytes.NewBuffer(b) } if c.Bool("export") { - file := strings.Map(func(r rune) rune { - switch { - case r >= 'a' && r <= 'z': - return r - case r >= 'A' && r <= 'Z': - return r - case r >= '0' && r <= '9': - return r - case strings.ContainsAny(string(r), "+=-_()!@."): - return r - default: - return '_' - } - }, file) + file := file + if !c.Bool("combine") { + file = strings.Map(func(r rune) rune { + switch { + case r >= 'a' && r <= 'z': + return r + case r >= 'A' && r <= 'Z': + return r + case r >= '0' && r <= '9': + return r + case strings.ContainsAny(string(r), "+=-_()!@."): + return r + default: + return '_' + } + }, file) + } err := data.files(func(name string, data []byte) { - err = os.WriteFile(fmt.Sprintf("%s-%s.data", file, name), data, os.ModePerm) + fn := fmt.Sprintf("%s-%s.data", file, name) + if c.Bool("combine") { + f := filemap[file][name] + if f != "" { + fn = f + ".data" + os.MkdirAll(filepath.Dir(fn), os.ModePerm) + err = os.WriteFile(fn+".json", []byte(filemap[file][name+".json"]), os.ModePerm) + combineFiles[name] = append(combineFiles[name], fn) + if err != nil { + fmt.Println("ERR:", err) + } + _ = os.WriteFile(filepath.Dir(fn)+"/filename.txt", []byte(file), os.ModePerm) + } + } + err = os.WriteFile(fn, data, os.ModePerm) if err != nil { fmt.Println(err) } @@ -312,6 +359,13 @@ FLAGS: if multiple { fmt.Println("}") } + if len(combineFiles) > 0 { + for k, v := range combineFiles { + if err := combine(v, k); err != nil { + fmt.Println("ERROR:", err) + } + } + } return nil } @@ -587,3 +641,154 @@ func (z xlMetaV2VersionHeaderV2) MarshalJSON() (o []byte, err error) { } return json.Marshal(tmp) } + +func combine(files []string, out string) error { + sort.Strings(files) + var size, shards, data, parity int + mapped := make([]byte, size) + filled := make([]byte, size) + parityData := make(map[int]map[int][]byte) + fmt.Printf("Attempting to combine version %q.\n", out) + for _, file := range files { + b, err := os.ReadFile(file) + if err != nil { + return err + } + meta, err := os.ReadFile(file + ".json") + if err != nil { + return err + } + type erasureInfo struct { + V2Obj *struct { + EcDist []int + EcIndex int + EcM int + EcN int + Size int + } + } + var ei erasureInfo + var idx int + if err := json.Unmarshal(meta, &ei); err == nil && ei.V2Obj != nil { + if size == 0 { + size = ei.V2Obj.Size + mapped = make([]byte, size) + filled = make([]byte, size) + } + data = ei.V2Obj.EcM + parity = ei.V2Obj.EcN + if shards == 0 { + shards = data + parity + } + idx = ei.V2Obj.EcIndex - 1 + fmt.Println("Read shard", ei.V2Obj.EcIndex, "Data shards", data, "Parity", parity, fmt.Sprintf("(%s)", file)) + if ei.V2Obj.Size != size { + return fmt.Errorf("size mismatch. Meta size: %d", ei.V2Obj.Size) + } + } else { + return err + } + if len(b) < 32 { + return fmt.Errorf("file %s too short", file) + } + // Trim hash. Fine for inline data, since only one block. + b = b[32:] + + set := parityData[data] + if set == nil { + set = make(map[int][]byte) + } + set[idx] = b + parityData[data] = set + + // Combine + start := len(b) * idx + if start >= len(mapped) { + continue + } + copy(mapped[start:], b) + for j := range b { + if j+start >= len(filled) { + break + } + filled[j+start] = 1 + } + } + + lastValid := 0 + missing := 0 + for i := range filled { + if filled[i] == 1 { + lastValid = i + } else { + missing++ + } + } + if missing > 0 && len(parityData) > 0 { + fmt.Println("Attempting to reconstruct using parity sets:") + for k, v := range parityData { + if missing == 0 { + break + } + fmt.Println("* Setup: Data shards:", k, "- Parity blocks:", len(v)) + rs, err := reedsolomon.New(k, shards-k) + if err != nil { + return err + } + split, err := rs.Split(mapped) + if err != nil { + return err + } + splitFilled, err := rs.Split(filled) + if err != nil { + return err + } + ok := len(splitFilled) + for i, sh := range splitFilled { + for _, v := range sh { + if v == 0 { + split[i] = nil + ok-- + break + } + } + } + hasParity := 0 + for idx, sh := range v { + split[idx] = sh + if idx >= k && len(v) > 0 { + hasParity++ + } + } + fmt.Printf("Have %d complete remapped data shards and %d complete parity shards. ", ok, hasParity) + + if err := rs.ReconstructData(split); err == nil { + fmt.Println("Could reconstruct completely") + for i, data := range split[:k] { + start := i * len(data) + copy(mapped[start:], data) + } + lastValid = size - 1 + missing = 0 + } else { + fmt.Println("Could NOT reconstruct:", err) + } + } + } + if lastValid == 0 { + return errors.New("no valid data found") + } + if missing > 0 { + out += ".truncated" + } else { + out += ".complete" + } + fmt.Println(missing, "bytes missing. Truncating", len(filled)-lastValid-1, "from end.") + mapped = mapped[:lastValid+1] + err := os.WriteFile(out, mapped, os.ModePerm) + if err != nil { + return err + } + fmt.Println("Wrote output to", out) + return nil +}