mirror of
https://github.com/minio/minio.git
synced 2025-03-29 00:33:42 -04:00
Add cross-version remapped merges to xl-meta (#19765)
Adds `-xver` which can be used with `-export` and `-combine` to attempt to combine files across versions if data is suspected to be the same. Overlapping data is compared. Bonus: Make `inspect` accept wildcards.
This commit is contained in:
parent
1fd90c93ff
commit
2c7bcee53f
@ -4,6 +4,7 @@ go 1.19
|
|||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/klauspost/compress v1.17.4
|
github.com/klauspost/compress v1.17.4
|
||||||
|
github.com/klauspost/filepathx v1.1.1
|
||||||
github.com/minio/colorjson v1.0.6
|
github.com/minio/colorjson v1.0.6
|
||||||
github.com/minio/madmin-go/v3 v3.0.36
|
github.com/minio/madmin-go/v3 v3.0.36
|
||||||
github.com/secure-io/sio-go v0.3.1
|
github.com/secure-io/sio-go v0.3.1
|
||||||
|
@ -4,6 +4,8 @@ github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM=
|
|||||||
github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE=
|
github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE=
|
||||||
github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4=
|
github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4=
|
||||||
github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
|
github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
|
||||||
|
github.com/klauspost/filepathx v1.1.1 h1:201zvAsL1PhZvmXTP+QLer3AavWrO3U1NILWpniHK4w=
|
||||||
|
github.com/klauspost/filepathx v1.1.1/go.mod h1:XWxdp8rEw4gupPBrxrV5Q57dL/71xj0OgV1gKt2zTfU=
|
||||||
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
|
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
|
||||||
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
|
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
|
||||||
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
|
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
|
||||||
|
@ -31,6 +31,8 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/klauspost/filepathx"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@ -68,7 +70,7 @@ func main() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var inputFileName, outputFileName string
|
var inputs []string
|
||||||
|
|
||||||
// Parse parameters
|
// Parse parameters
|
||||||
switch {
|
switch {
|
||||||
@ -83,22 +85,34 @@ func main() {
|
|||||||
fatalErr(err)
|
fatalErr(err)
|
||||||
}
|
}
|
||||||
fatalErr(json.Unmarshal(got, &input))
|
fatalErr(json.Unmarshal(got, &input))
|
||||||
inputFileName = input.File
|
inputs = []string{input.File}
|
||||||
*keyHex = input.Key
|
*keyHex = input.Key
|
||||||
case len(flag.Args()) == 1:
|
case len(flag.Args()) == 1:
|
||||||
inputFileName = flag.Args()[0]
|
var err error
|
||||||
|
inputs, err = filepathx.Glob(flag.Args()[0])
|
||||||
|
fatalErr(err)
|
||||||
default:
|
default:
|
||||||
flag.Usage()
|
flag.Usage()
|
||||||
fatalIf(true, "Only 1 file can be decrypted")
|
fatalIf(true, "Only 1 file can be decrypted")
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
for _, input := range inputs {
|
||||||
|
processFile(input, privateKey)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func processFile(inputFileName string, privateKey []byte) {
|
||||||
// Calculate the output file name
|
// Calculate the output file name
|
||||||
|
var outputFileName string
|
||||||
switch {
|
switch {
|
||||||
case strings.HasSuffix(inputFileName, ".enc"):
|
case strings.HasSuffix(inputFileName, ".enc"):
|
||||||
outputFileName = strings.TrimSuffix(inputFileName, ".enc") + ".zip"
|
outputFileName = strings.TrimSuffix(inputFileName, ".enc") + ".zip"
|
||||||
case strings.HasSuffix(inputFileName, ".zip"):
|
case strings.HasSuffix(inputFileName, ".zip"):
|
||||||
outputFileName = strings.TrimSuffix(inputFileName, ".zip") + ".decrypted.zip"
|
outputFileName = strings.TrimSuffix(inputFileName, ".zip") + ".decrypted.zip"
|
||||||
|
case strings.Contains(inputFileName, ".enc."):
|
||||||
|
outputFileName = strings.Replace(inputFileName, ".enc.", ".", 1) + ".zip"
|
||||||
|
default:
|
||||||
|
outputFileName = inputFileName + ".decrypted"
|
||||||
}
|
}
|
||||||
|
|
||||||
// Backup any already existing output file
|
// Backup any already existing output file
|
||||||
|
@ -28,6 +28,7 @@ import (
|
|||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@ -63,9 +64,9 @@ FLAGS:
|
|||||||
{{range .VisibleFlags}}{{.}}
|
{{range .VisibleFlags}}{{.}}
|
||||||
{{end}}
|
{{end}}
|
||||||
`
|
`
|
||||||
|
isPart := regexp.MustCompile("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/part\\.[0-9]+$")
|
||||||
|
|
||||||
app.HideHelpCommand = true
|
app.HideHelpCommand = true
|
||||||
|
|
||||||
app.Flags = []cli.Flag{
|
app.Flags = []cli.Flag{
|
||||||
cli.BoolFlag{
|
cli.BoolFlag{
|
||||||
Usage: "print each file as a separate line without formatting",
|
Usage: "print each file as a separate line without formatting",
|
||||||
@ -84,6 +85,10 @@ FLAGS:
|
|||||||
Usage: "combine inline data",
|
Usage: "combine inline data",
|
||||||
Name: "combine",
|
Name: "combine",
|
||||||
},
|
},
|
||||||
|
cli.BoolFlag{
|
||||||
|
Usage: "combine inline data across versions when size matches",
|
||||||
|
Name: "xver",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
app.Action = func(c *cli.Context) error {
|
app.Action = func(c *cli.Context) error {
|
||||||
@ -93,6 +98,9 @@ FLAGS:
|
|||||||
}
|
}
|
||||||
// file / version / file
|
// file / version / file
|
||||||
filemap := make(map[string]map[string]string)
|
filemap := make(map[string]map[string]string)
|
||||||
|
foundData := make(map[string][]byte)
|
||||||
|
partDataToVerID := make(map[string][2]string)
|
||||||
|
var baseName string
|
||||||
// versionID ->
|
// versionID ->
|
||||||
combineFiles := make(map[string][]string)
|
combineFiles := make(map[string][]string)
|
||||||
decode := func(r io.Reader, file string) ([]byte, error) {
|
decode := func(r io.Reader, file string) ([]byte, error) {
|
||||||
@ -171,10 +179,12 @@ FLAGS:
|
|||||||
}
|
}
|
||||||
type erasureInfo struct {
|
type erasureInfo struct {
|
||||||
V2Obj *struct {
|
V2Obj *struct {
|
||||||
EcDist []int
|
EcDist []int
|
||||||
EcIndex int
|
EcIndex int
|
||||||
EcM int
|
EcM int
|
||||||
EcN int
|
EcN int
|
||||||
|
DDir []byte
|
||||||
|
PartNums []int
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
var ei erasureInfo
|
var ei erasureInfo
|
||||||
@ -188,6 +198,12 @@ FLAGS:
|
|||||||
idx := ei.V2Obj.EcIndex
|
idx := ei.V2Obj.EcIndex
|
||||||
filemap[file][verID] = fmt.Sprintf("%s/shard-%02d-of-%02d", verID, idx, ei.V2Obj.EcN+ei.V2Obj.EcM)
|
filemap[file][verID] = fmt.Sprintf("%s/shard-%02d-of-%02d", verID, idx, ei.V2Obj.EcN+ei.V2Obj.EcM)
|
||||||
filemap[file][verID+".json"] = buf.String()
|
filemap[file][verID+".json"] = buf.String()
|
||||||
|
for _, i := range ei.V2Obj.PartNums {
|
||||||
|
dataFile := fmt.Sprintf("%s%s/part.%d", strings.TrimSuffix(file, "xl.meta"), uuid.UUID(ei.V2Obj.DDir).String(), i)
|
||||||
|
partDataToVerID[dataFile] = [2]string{file, verID}
|
||||||
|
}
|
||||||
|
} else if err != nil {
|
||||||
|
fmt.Println("Error:", err)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
@ -230,6 +246,14 @@ FLAGS:
|
|||||||
}
|
}
|
||||||
}, file)
|
}, file)
|
||||||
}
|
}
|
||||||
|
if baseName == "" {
|
||||||
|
if strings.HasSuffix(file, "/xl.meta") {
|
||||||
|
baseName = strings.TrimSuffix(file, "/xl.meta")
|
||||||
|
if idx := strings.LastIndexByte(baseName, '/'); idx > 0 {
|
||||||
|
baseName = baseName[idx+1:]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
err := data.files(func(name string, data []byte) {
|
err := data.files(func(name string, data []byte) {
|
||||||
fn := fmt.Sprintf("%s-%s.data", file, name)
|
fn := fmt.Sprintf("%s-%s.data", file, name)
|
||||||
if c.Bool("combine") {
|
if c.Bool("combine") {
|
||||||
@ -334,7 +358,10 @@ FLAGS:
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
for _, file := range zr.File {
|
for _, file := range zr.File {
|
||||||
if !file.FileInfo().IsDir() && strings.HasSuffix(file.Name, "xl.meta") {
|
if file.FileInfo().IsDir() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if strings.HasSuffix(file.Name, "xl.meta") {
|
||||||
r, err := file.Open()
|
r, err := file.Open()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@ -352,6 +379,18 @@ FLAGS:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
toPrint = append(toPrint, fmt.Sprintf("\t%s: %s", string(b), string(b2)))
|
toPrint = append(toPrint, fmt.Sprintf("\t%s: %s", string(b), string(b2)))
|
||||||
|
} else if c.Bool("combine") && isPart.MatchString(file.Name) {
|
||||||
|
// name := isPart.FindString(file.Name)
|
||||||
|
name := strings.ReplaceAll(file.Name, ":", "_")
|
||||||
|
r, err := file.Open()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
all, err := io.ReadAll(r)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
foundData[name] = all
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -376,12 +415,46 @@ FLAGS:
|
|||||||
}
|
}
|
||||||
sort.Strings(toPrint)
|
sort.Strings(toPrint)
|
||||||
fmt.Printf("{\n%s\n}\n", strings.Join(toPrint, ",\n"))
|
fmt.Printf("{\n%s\n}\n", strings.Join(toPrint, ",\n"))
|
||||||
|
for partName, data := range foundData {
|
||||||
|
if verid := partDataToVerID[partName]; verid != [2]string{} {
|
||||||
|
file := verid[0]
|
||||||
|
name := verid[1]
|
||||||
|
f := filemap[file][name]
|
||||||
|
fn := fmt.Sprintf("%s-%s.data", file, name)
|
||||||
|
if f != "" {
|
||||||
|
fn = f + ".data"
|
||||||
|
err := os.MkdirAll(filepath.Dir(fn), os.ModePerm)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("MkdirAll:", filepath.Dir(fn), err)
|
||||||
|
}
|
||||||
|
err = os.WriteFile(fn+".json", []byte(filemap[file][name+".json"]), os.ModePerm)
|
||||||
|
combineFiles[name] = append(combineFiles[name], fn)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("WriteFile:", err)
|
||||||
|
}
|
||||||
|
err = os.WriteFile(filepath.Dir(fn)+"/filename.txt", []byte(file), os.ModePerm)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("combine WriteFile:", err)
|
||||||
|
}
|
||||||
|
fmt.Println("Remapped", partName, "to", fn)
|
||||||
|
}
|
||||||
|
err := os.WriteFile(fn, data, os.ModePerm)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("WriteFile:", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
if len(combineFiles) > 0 {
|
if len(combineFiles) > 0 {
|
||||||
for k, v := range combineFiles {
|
if c.Bool("xver") {
|
||||||
if err := combine(v, k); err != nil {
|
if err := combineCrossVer(combineFiles, baseName, foundData); err != nil {
|
||||||
fmt.Println("ERROR:", err)
|
fmt.Println("ERROR:", err)
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
for k, v := range combineFiles {
|
||||||
|
if err := combine(v, k+"-"+baseName); err != nil {
|
||||||
|
fmt.Println("ERROR:", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -709,21 +782,27 @@ func (z xlMetaV2VersionHeaderV2) MarshalJSON() (o []byte, err error) {
|
|||||||
return json.Marshal(tmp)
|
return json.Marshal(tmp)
|
||||||
}
|
}
|
||||||
|
|
||||||
func combine(files []string, out string) error {
|
type mappedData struct {
|
||||||
|
mapped, filled []byte
|
||||||
|
size, shards, data, parity int
|
||||||
|
parityData map[int]map[int][]byte
|
||||||
|
blockOffset int // Offset in bytes to start of block.
|
||||||
|
blocks int // 0 = one block.
|
||||||
|
objSize int
|
||||||
|
}
|
||||||
|
|
||||||
|
func readAndMap(files []string, blockNum int) (*mappedData, error) {
|
||||||
|
var m mappedData
|
||||||
sort.Strings(files)
|
sort.Strings(files)
|
||||||
var size, shards, data, parity int
|
m.parityData = make(map[int]map[int][]byte)
|
||||||
mapped := make([]byte, size)
|
|
||||||
filled := make([]byte, size)
|
|
||||||
parityData := make(map[int]map[int][]byte)
|
|
||||||
fmt.Printf("Attempting to combine version %q.\n", out)
|
|
||||||
for _, file := range files {
|
for _, file := range files {
|
||||||
b, err := os.ReadFile(file)
|
b, err := os.ReadFile(file)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
meta, err := os.ReadFile(file + ".json")
|
meta, err := os.ReadFile(file + ".json")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
type erasureInfo struct {
|
type erasureInfo struct {
|
||||||
V2Obj *struct {
|
V2Obj *struct {
|
||||||
@ -732,81 +811,117 @@ func combine(files []string, out string) error {
|
|||||||
EcM int
|
EcM int
|
||||||
EcN int
|
EcN int
|
||||||
Size int
|
Size int
|
||||||
|
EcBSize int
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
var ei erasureInfo
|
var ei erasureInfo
|
||||||
var idx int
|
var idx int
|
||||||
if err := json.Unmarshal(meta, &ei); err == nil && ei.V2Obj != nil {
|
if err := json.Unmarshal(meta, &ei); err == nil && ei.V2Obj != nil {
|
||||||
if size == 0 {
|
if m.objSize == 0 {
|
||||||
size = ei.V2Obj.Size
|
m.objSize = ei.V2Obj.Size
|
||||||
mapped = make([]byte, size)
|
|
||||||
filled = make([]byte, size)
|
|
||||||
}
|
}
|
||||||
data = ei.V2Obj.EcM
|
m.data = ei.V2Obj.EcM
|
||||||
parity = ei.V2Obj.EcN
|
m.parity = ei.V2Obj.EcN
|
||||||
if shards == 0 {
|
if m.shards == 0 {
|
||||||
shards = data + parity
|
m.shards = m.data + m.parity
|
||||||
}
|
}
|
||||||
idx = ei.V2Obj.EcIndex - 1
|
idx = ei.V2Obj.EcIndex - 1
|
||||||
fmt.Println("Read shard", ei.V2Obj.EcIndex, "Data shards", data, "Parity", parity, fmt.Sprintf("(%s)", file))
|
fmt.Println("Read shard", ei.V2Obj.EcIndex, "Data shards", m.data, "Parity", m.parity, fmt.Sprintf("(%s)", file))
|
||||||
if ei.V2Obj.Size != size {
|
if ei.V2Obj.Size != m.objSize {
|
||||||
return fmt.Errorf("size mismatch. Meta size: %d", ei.V2Obj.Size)
|
return nil, fmt.Errorf("size mismatch. Meta size: %d, Prev: %d", ei.V2Obj.Size, m.objSize)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
if len(b) < 32 {
|
if len(b) < 32 {
|
||||||
return fmt.Errorf("file %s too short", file)
|
return nil, fmt.Errorf("file %s too short", file)
|
||||||
|
}
|
||||||
|
offset := ei.V2Obj.EcBSize * blockNum
|
||||||
|
if offset > ei.V2Obj.Size {
|
||||||
|
return nil, fmt.Errorf("block %d out of range. offset %d > size %d", blockNum, offset, ei.V2Obj.Size)
|
||||||
|
}
|
||||||
|
m.blockOffset = offset
|
||||||
|
m.blocks = (ei.V2Obj.Size + ei.V2Obj.EcBSize - 1) / ei.V2Obj.EcBSize
|
||||||
|
if m.blocks > 0 {
|
||||||
|
m.blocks--
|
||||||
|
}
|
||||||
|
if blockNum < m.blocks {
|
||||||
|
m.size = ei.V2Obj.EcBSize
|
||||||
|
} else {
|
||||||
|
m.size = ei.V2Obj.Size - offset
|
||||||
}
|
}
|
||||||
// Trim hash. Fine for inline data, since only one block.
|
|
||||||
b = b[32:]
|
|
||||||
|
|
||||||
set := parityData[data]
|
// Extract block data.
|
||||||
|
ssz := shardSize(ei.V2Obj.EcBSize, ei.V2Obj.EcM)
|
||||||
|
b, err = bitrot(b, blockNum*ssz, ssz)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
fmt.Println("Block data size:", m.size, "Shard size", ssz, "Got Shard:", len(b), "Bitrot ok")
|
||||||
|
|
||||||
|
if m.mapped == nil {
|
||||||
|
m.mapped = make([]byte, m.size)
|
||||||
|
m.filled = make([]byte, m.size)
|
||||||
|
}
|
||||||
|
|
||||||
|
set := m.parityData[m.data]
|
||||||
if set == nil {
|
if set == nil {
|
||||||
set = make(map[int][]byte)
|
set = make(map[int][]byte)
|
||||||
}
|
}
|
||||||
set[idx] = b
|
set[idx] = b
|
||||||
parityData[data] = set
|
m.parityData[m.data] = set
|
||||||
|
|
||||||
// Combine
|
// Combine
|
||||||
start := len(b) * idx
|
start := len(b) * idx
|
||||||
if start >= len(mapped) {
|
if start >= len(m.mapped) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
copy(mapped[start:], b)
|
copy(m.mapped[start:], b)
|
||||||
for j := range b {
|
for j := range b {
|
||||||
if j+start >= len(filled) {
|
if j+start >= len(m.filled) {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
filled[j+start] = 1
|
m.filled[j+start] = 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return &m, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func combine(files []string, out string) error {
|
||||||
|
fmt.Printf("Attempting to combine version %q.\n", out)
|
||||||
|
m, err := readAndMap(files, 0)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if m.blocks > 0 {
|
||||||
|
// TODO: Support multiple blocks. For now use -xver.
|
||||||
|
return fmt.Errorf("multiple blocks found, only one block supported. Try with -xver")
|
||||||
|
}
|
||||||
lastValid := 0
|
lastValid := 0
|
||||||
missing := 0
|
missing := 0
|
||||||
for i := range filled {
|
for i := range m.filled {
|
||||||
if filled[i] == 1 {
|
if m.filled[i] == 1 {
|
||||||
lastValid = i
|
lastValid = i
|
||||||
} else {
|
} else {
|
||||||
missing++
|
missing++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if missing > 0 && len(parityData) > 0 {
|
if missing > 0 && len(m.parityData) > 0 {
|
||||||
fmt.Println("Attempting to reconstruct using parity sets:")
|
fmt.Println("Attempting to reconstruct using parity sets:")
|
||||||
for k, v := range parityData {
|
for k, v := range m.parityData {
|
||||||
if missing == 0 {
|
if missing == 0 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
fmt.Println("* Setup: Data shards:", k, "- Parity blocks:", len(v))
|
fmt.Println("* Setup: Data shards:", k, "- Parity blocks:", m.shards-k)
|
||||||
rs, err := reedsolomon.New(k, shards-k)
|
rs, err := reedsolomon.New(k, m.shards-k)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
split, err := rs.Split(mapped)
|
split, err := rs.Split(m.mapped)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
splitFilled, err := rs.Split(filled)
|
splitFilled, err := rs.Split(m.filled)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -833,9 +948,9 @@ func combine(files []string, out string) error {
|
|||||||
fmt.Println("Could reconstruct completely")
|
fmt.Println("Could reconstruct completely")
|
||||||
for i, data := range split[:k] {
|
for i, data := range split[:k] {
|
||||||
start := i * len(data)
|
start := i * len(data)
|
||||||
copy(mapped[start:], data)
|
copy(m.mapped[start:], data)
|
||||||
}
|
}
|
||||||
lastValid = size - 1
|
lastValid = m.size - 1
|
||||||
missing = 0
|
missing = 0
|
||||||
} else {
|
} else {
|
||||||
fmt.Println("Could NOT reconstruct:", err)
|
fmt.Println("Could NOT reconstruct:", err)
|
||||||
@ -846,16 +961,238 @@ func combine(files []string, out string) error {
|
|||||||
return errors.New("no valid data found")
|
return errors.New("no valid data found")
|
||||||
}
|
}
|
||||||
if missing > 0 {
|
if missing > 0 {
|
||||||
out += ".truncated"
|
fmt.Println(missing, "bytes missing. Truncating", len(m.filled)-lastValid-1, "from end.")
|
||||||
|
out += ".incomplete"
|
||||||
} else {
|
} else {
|
||||||
|
fmt.Println("No bytes missing.")
|
||||||
out += ".complete"
|
out += ".complete"
|
||||||
}
|
}
|
||||||
fmt.Println(missing, "bytes missing. Truncating", len(filled)-lastValid-1, "from end.")
|
m.mapped = m.mapped[:lastValid+1]
|
||||||
mapped = mapped[:lastValid+1]
|
err = os.WriteFile(out, m.mapped, os.ModePerm)
|
||||||
err := os.WriteFile(out, mapped, os.ModePerm)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
fmt.Println("Wrote output to", out)
|
fmt.Println("Wrote output to", out)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func combineCrossVer(all map[string][]string, baseName string, additional map[string][]byte) error {
|
||||||
|
names := make([]string, 0, len(all))
|
||||||
|
files := make([][]string, 0, len(all))
|
||||||
|
for k, v := range all {
|
||||||
|
names = append(names, k)
|
||||||
|
files = append(files, v)
|
||||||
|
}
|
||||||
|
if len(files) <= 1 {
|
||||||
|
if len(files) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return combine(files[0], names[0])
|
||||||
|
}
|
||||||
|
exportedSizes := make(map[int]bool)
|
||||||
|
nextFile:
|
||||||
|
for i, file := range files {
|
||||||
|
var combined []byte
|
||||||
|
var missingAll int
|
||||||
|
var lastValidAll int
|
||||||
|
for block := 0; ; block++ {
|
||||||
|
fmt.Printf("Block %d, Base version %q.\n", block+1, names[i])
|
||||||
|
m, err := readAndMap(file, block)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if exportedSizes[m.objSize] {
|
||||||
|
fmt.Println("Skipping version", names[i], "as it has already been exported.")
|
||||||
|
continue nextFile
|
||||||
|
}
|
||||||
|
compareFile:
|
||||||
|
for j, other := range files {
|
||||||
|
if i == j {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
fmt.Printf("Reading version %q.\n", names[j])
|
||||||
|
otherM, err := readAndMap(other, block)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if m.objSize != otherM.objSize {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
var ok int
|
||||||
|
for i, filled := range otherM.filled[:m.size] {
|
||||||
|
if filled == 1 && m.filled[i] == 1 {
|
||||||
|
if m.mapped[i] != otherM.mapped[i] {
|
||||||
|
fmt.Println("Data mismatch at byte", i, "- Disregarding version", names[j])
|
||||||
|
continue compareFile
|
||||||
|
}
|
||||||
|
ok++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If data+parity matches, combine.
|
||||||
|
if m.parity == otherM.parity && m.data == otherM.data {
|
||||||
|
for k, v := range m.parityData {
|
||||||
|
if otherM.parityData[k] == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
for i, data := range v {
|
||||||
|
if data != nil || otherM.parityData[k][i] == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
m.parityData[k][i] = otherM.parityData[k][i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("Data overlaps (%d bytes). Combining with %q.\n", ok, names[j])
|
||||||
|
for i := range otherM.filled {
|
||||||
|
if otherM.filled[i] == 1 {
|
||||||
|
m.filled[i] = 1
|
||||||
|
m.mapped[i] = otherM.mapped[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lastValid := 0
|
||||||
|
missing := 0
|
||||||
|
for i := range m.filled {
|
||||||
|
if m.filled[i] == 1 {
|
||||||
|
lastValid = i
|
||||||
|
} else {
|
||||||
|
missing++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if missing > 0 && len(m.parityData) > 0 {
|
||||||
|
fmt.Println("Attempting to reconstruct using parity sets:")
|
||||||
|
for k, v := range m.parityData {
|
||||||
|
if missing == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
fmt.Println("* Setup: Data shards:", k, "- Parity blocks:", m.shards-k)
|
||||||
|
rs, err := reedsolomon.New(k, m.shards-k)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
split, err := rs.Split(m.mapped)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
splitFilled, err := rs.Split(m.filled)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
ok := len(splitFilled)
|
||||||
|
for i, sh := range splitFilled {
|
||||||
|
for _, v := range sh {
|
||||||
|
if v == 0 {
|
||||||
|
split[i] = nil
|
||||||
|
ok--
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
hasParity := 0
|
||||||
|
for idx, sh := range v {
|
||||||
|
split[idx] = sh
|
||||||
|
if idx >= k && len(v) > 0 {
|
||||||
|
hasParity++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fmt.Printf("Have %d complete remapped data shards and %d complete parity shards. ", ok, hasParity)
|
||||||
|
|
||||||
|
if err := rs.ReconstructData(split); err == nil {
|
||||||
|
fmt.Println("Could reconstruct completely")
|
||||||
|
for i, data := range split[:k] {
|
||||||
|
start := i * len(data)
|
||||||
|
copy(m.mapped[start:], data)
|
||||||
|
}
|
||||||
|
lastValid = m.size - 1
|
||||||
|
missing = 0
|
||||||
|
} else {
|
||||||
|
fmt.Println("Could NOT reconstruct:", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if m.blockOffset != len(combined) {
|
||||||
|
return fmt.Errorf("Block offset mismatch. Expected %d got %d", m.blockOffset, len(combined))
|
||||||
|
}
|
||||||
|
combined = append(combined, m.mapped[:m.size]...)
|
||||||
|
missingAll += missing
|
||||||
|
if lastValid > 0 {
|
||||||
|
lastValidAll = lastValid + m.blockOffset
|
||||||
|
}
|
||||||
|
if m.blocks == block {
|
||||||
|
if len(combined) != m.objSize {
|
||||||
|
fmt.Println("Combined size mismatch. Expected", m.objSize, "got", len(combined))
|
||||||
|
}
|
||||||
|
fmt.Println("Reached block", block+1, "of", m.blocks+1, "for", names[i], ". Done.")
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if lastValidAll == 0 {
|
||||||
|
return errors.New("no valid data found")
|
||||||
|
}
|
||||||
|
out := names[i] + "-" + baseName
|
||||||
|
if missingAll > 0 {
|
||||||
|
out += ".incomplete"
|
||||||
|
fmt.Println(missingAll, "bytes missing. Truncating", len(combined)-lastValidAll-1, "from end.")
|
||||||
|
} else {
|
||||||
|
out += ".complete"
|
||||||
|
fmt.Println("No bytes missing.")
|
||||||
|
}
|
||||||
|
if missingAll == 0 {
|
||||||
|
exportedSizes[len(combined)] = true
|
||||||
|
}
|
||||||
|
combined = combined[:lastValidAll+1]
|
||||||
|
err := os.WriteFile(out, combined, os.ModePerm)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
fmt.Println("Wrote output to", out)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// bitrot returns a shard beginning at startOffset after doing bitrot checks.
|
||||||
|
func bitrot(val []byte, startOffset, shardSize int) ([]byte, error) {
|
||||||
|
var res []byte
|
||||||
|
for len(val) >= 32 {
|
||||||
|
want := val[:32]
|
||||||
|
data := val[32:]
|
||||||
|
if len(data) > shardSize {
|
||||||
|
data = data[:shardSize]
|
||||||
|
}
|
||||||
|
|
||||||
|
const magicHighwayHash256Key = "\x4b\xe7\x34\xfa\x8e\x23\x8a\xcd\x26\x3e\x83\xe6\xbb\x96\x85\x52\x04\x0f\x93\x5d\xa3\x9f\x44\x14\x97\xe0\x9d\x13\x22\xde\x36\xa0"
|
||||||
|
|
||||||
|
hh, _ := highwayhash.New([]byte(magicHighwayHash256Key))
|
||||||
|
hh.Write(data)
|
||||||
|
if !bytes.Equal(want, hh.Sum(nil)) {
|
||||||
|
return res, fmt.Errorf("bitrot detected")
|
||||||
|
}
|
||||||
|
res = append(res, data...)
|
||||||
|
val = val[32+len(data):]
|
||||||
|
if len(res) > startOffset {
|
||||||
|
return res[startOffset:], nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return res, fmt.Errorf("bitrot: data too short to get block. len(res)=%d, startOffset=%d", len(res), startOffset)
|
||||||
|
}
|
||||||
|
|
||||||
|
// shardSize returns the shard size for a given block size and data blocks.
|
||||||
|
func shardSize(blockSize, dataBlocks int) (sz int) {
|
||||||
|
if dataBlocks == 0 {
|
||||||
|
// do nothing on invalid input
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Make denominator positive
|
||||||
|
if dataBlocks < 0 {
|
||||||
|
blockSize = -blockSize
|
||||||
|
dataBlocks = -dataBlocks
|
||||||
|
}
|
||||||
|
sz = blockSize / dataBlocks
|
||||||
|
if blockSize > 0 && blockSize%dataBlocks != 0 {
|
||||||
|
sz++
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user