Merge pull request #2149 from harshavardhana/hash-order

XL/metadata: use new hashOrder algorithm for newXLMeta. (#2147)
2025-07-28 01:40:09 -04:00 · 2016-07-08 15:57:16 -07:00 · 2016-07-08 15:57:16 -07:00 · cb415ef12e
commit cb415ef12e
parent 5ff1203fc0 6266328a85
8 changed files with 55 additions and 25 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -4,9 +4,9 @@ language: go

 os:
 - linux
-## Turning off for now.
-# - osx
-# osx_image: xcode7.2
+- osx
+
+osx_image: xcode7.2

 env:
 - ARCH=x86_64
--- a/xl-v1-common.go
+++ b/xl-v1-common.go
@ -16,7 +16,10 @@

 package main

-import "path"
+import (
+	"path"
+	"time"
+)

 // getLoadBalancedQuorumDisks - fetches load balanced sufficiently
 // randomized quorum disk slice.
@ -29,7 +32,7 @@ func (xl xlObjects) getLoadBalancedQuorumDisks() (disks []StorageAPI) {
 // randomized) disk slice.
 func (xl xlObjects) getLoadBalancedDisks() (disks []StorageAPI) {
 	// Based on the random shuffling return back randomized disks.
-	for _, i := range randInts(len(xl.storageDisks)) {
+	for _, i := range hashOrder(time.Now().UTC().String(), len(xl.storageDisks)) {
 		disks = append(disks, xl.storageDisks[i-1])
 	}
 	return disks
--- a/xl-v1-metadata.go
+++ b/xl-v1-metadata.go
@ -107,9 +107,8 @@ type xlMetaV1 struct {
 	Parts []objectPartInfo `json:"parts,omitempty"`
 }

-// newXLMetaV1 - initializes new xlMetaV1, adds version, allocates a
-// fresh erasure info.
-func newXLMetaV1(dataBlocks, parityBlocks int) (xlMeta xlMetaV1) {
+// newXLMetaV1 - initializes new xlMetaV1, adds version, allocates a fresh erasure info.
+func newXLMetaV1(object string, dataBlocks, parityBlocks int) (xlMeta xlMetaV1) {
 	xlMeta = xlMetaV1{}
 	xlMeta.Version = "1.0.0"
 	xlMeta.Format = "xl"
@ -119,7 +118,7 @@ func newXLMetaV1(dataBlocks, parityBlocks int) (xlMeta xlMetaV1) {
 		DataBlocks:   dataBlocks,
 		ParityBlocks: parityBlocks,
 		BlockSize:    blockSizeV1,
-		Distribution: randInts(dataBlocks + parityBlocks),
+		Distribution: hashOrder(object, dataBlocks+parityBlocks),
 	}
 	return xlMeta
 }
--- a/xl-v1-metadata_test.go
+++ b/xl-v1-metadata_test.go
@ -39,7 +39,7 @@ func TestXLMetaV1(t *testing.T) {
 	}

 	// Create a XLMetaV1 structure to test on.
-	meta := newXLMetaV1(8, 8)
+	meta := newXLMetaV1("minio", 8, 8)

 	// Add 5 parts.
 	for _, test := range testCases {
--- a/xl-v1-multipart.go
+++ b/xl-v1-multipart.go
@ -252,7 +252,7 @@ func (xl xlObjects) ListMultipartUploads(bucket, prefix, keyMarker, uploadIDMark
 // all the disks. `uploads.json` carries metadata regarding on going
 // multipart operation on the object.
 func (xl xlObjects) newMultipartUpload(bucket string, object string, meta map[string]string) (uploadID string, err error) {
-	xlMeta := newXLMetaV1(xl.dataBlocks, xl.parityBlocks)
+	xlMeta := newXLMetaV1(object, xl.dataBlocks, xl.parityBlocks)
 	// If not set default to "application/octet-stream"
 	if meta["content-type"] == "" {
 		contentType := "application/octet-stream"
--- a/xl-v1-object.go
+++ b/xl-v1-object.go
@ -368,7 +368,7 @@ func (xl xlObjects) PutObject(bucket string, object string, size int64, data io.
 	tempObj := uniqueID

 	// Initialize xl meta.
-	xlMeta := newXLMetaV1(xl.dataBlocks, xl.parityBlocks)
+	xlMeta := newXLMetaV1(object, xl.dataBlocks, xl.parityBlocks)

 	// Read metadata associated with the object from all disks.
 	partsMetadata, errs := xl.readAllXLMetadata(bucket, object)
--- a/xl-v1-utils.go
+++ b/xl-v1-utils.go
@ -18,9 +18,9 @@ package main

 import (
 	"encoding/json"
-	"math/rand"
+	"errors"
+	"hash/crc32"
 	"path"
-	"time"
 )

 // Validates if we have quorum based on the errors with errDiskNotFound.
@ -48,19 +48,19 @@ func diskCount(disks []StorageAPI) int {
 	return diskCount
 }

-// randInts - uses Knuth Fisher-Yates shuffle algorithm for generating uniform shuffling.
-func randInts(count int) []int {
-	rand.Seed(time.Now().UTC().UnixNano()) // Seed with current time.
-	ints := make([]int, count)
-	for i := 0; i < count; i++ {
-		ints[i] = i + 1
+// hashOrder - returns consistent hashed integers of count slice, based on the input token.
+func hashOrder(token string, count int) []int {
+	if count < 0 {
+		panic(errors.New("hashOrder count cannot be negative"))
 	}
-	for i := 0; i < count; i++ {
-		// Choose index uniformly in [i, count-1]
-		r := i + rand.Intn(count-i)
-		ints[r], ints[i] = ints[i], ints[r]
+	nums := make([]int, count)
+	tokenCrc := crc32.Checksum([]byte(token), crc32.IEEETable)
+
+	start := int(uint32(tokenCrc)%uint32(count)) | 1
+	for i := 1; i <= count; i++ {
+		nums[i-1] = 1 + ((start + i) % count)
 	}
-	return ints
+	return nums
 }

 // readXLMeta reads `xl.json` and returns back XL metadata structure.
--- a/xl-v1_test.go
+++ b/xl-v1_test.go
@ -19,6 +19,7 @@ package main
 import (
 	"os"
 	"path/filepath"
+	"reflect"
 	"testing"
 )

@ -136,3 +137,30 @@ func TestNewXL(t *testing.T) {
 		t.Fatalf("Unable to initialize erasure, %s", err)
 	}
 }
+
+// TestHashOrder - test order of ints in array
+func TestHashOrder(t *testing.T) {
+	testCases := []struct {
+		objectName  string
+		hashedOrder []int
+	}{
+		// cases which should pass the test.
+		// passing in valid object name.
+		{"object", []int{15, 16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}},
+		{"The Shining Script <v1>.pdf", []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}},
+		{"Cost Benefit Analysis (2009-2010).pptx", []int{15, 16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}},
+		{"117Gn8rfHL2ACARPAhaFd0AGzic9pUbIA/5OCn5A", []int{3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1, 2}},
+		{"SHØRT", []int{11, 12, 13, 14, 15, 16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}},
+		{"There are far too many object names, and far too few bucket names!", []int{15, 16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}},
+		{"a/b/c/", []int{3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1, 2}},
+		{"/a/b/c", []int{7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 3, 4, 5, 6}},
+		{string([]byte{0xff, 0xfe, 0xfd}), []int{15, 16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}},
+	}
+
+	for i, testCase := range testCases {
+		hashedOrder := hashOrder(testCase.objectName, 16)
+		if !reflect.DeepEqual(testCase.hashedOrder, hashedOrder) {
+			t.Errorf("Test case %d: Expected \"%#v\" but failed \"%#v\"", i+1, testCase.hashedOrder, hashedOrder)
+		}
+	}
+}