internode lockArgs should use messagepack (#13329)

it would seem like using `bufio.Scan()` is very
slow for heavy concurrent I/O, ie. when r.Body
is slow , instead use a proper
binary exchange format, to marshal and unmarshal
the LockArgs datastructure in a cleaner way.

this PR increases performance of the locking
sub-system for tiny repeated read lock requests
on same object.

```
BenchmarkLockArgs
BenchmarkLockArgs-4              6417609               185.7 ns/op            56 B/op          2 allocs/op
BenchmarkLockArgsOld
BenchmarkLockArgsOld-4           1187368              1015 ns/op            4096 B/op          1 allocs/op
```
This commit is contained in:
Harshavardhana 2021-09-30 11:53:01 -07:00 committed by GitHub
parent d00ff3c453
commit ffd497673f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 545 additions and 83 deletions

View File

@ -22,7 +22,6 @@ import (
"context"
"io"
"net/url"
"strconv"
"github.com/minio/minio/internal/dsync"
"github.com/minio/minio/internal/http"
@ -89,17 +88,13 @@ func (client *lockRESTClient) Close() error {
// restCall makes a call to the lock REST server.
func (client *lockRESTClient) restCall(ctx context.Context, call string, args dsync.LockArgs) (reply bool, err error) {
values := url.Values{}
values.Set(lockRESTUID, args.UID)
values.Set(lockRESTOwner, args.Owner)
values.Set(lockRESTSource, args.Source)
values.Set(lockRESTQuorum, strconv.Itoa(args.Quorum))
var buffer bytes.Buffer
for _, resource := range args.Resources {
buffer.WriteString(resource)
buffer.WriteString("\n")
argsBytes, err := args.MarshalMsg(metaDataPoolGet()[:0])
if err != nil {
return false, err
}
respBody, err := client.callWithContext(ctx, call, values, &buffer, -1)
defer metaDataPoolPut(argsBytes)
body := bytes.NewReader(argsBytes)
respBody, err := client.callWithContext(ctx, call, nil, body, body.Size())
defer http.DrainBody(respBody)
switch err {
case nil:

View File

@ -22,7 +22,7 @@ import (
)
const (
lockRESTVersion = "v6" // Add Refresh API
lockRESTVersion = "v7" // Add msgp for lockArgs
lockRESTVersionPrefix = SlashSeparator + lockRESTVersion
lockRESTPrefix = minioReservedBucketPath + "/lock"
)
@ -35,20 +35,6 @@ const (
lockRESTMethodUnlock = "/unlock"
lockRESTMethodRUnlock = "/runlock"
lockRESTMethodForceUnlock = "/force-unlock"
// lockRESTOwner represents owner UUID
lockRESTOwner = "owner"
// Unique ID of lock/unlock request.
lockRESTUID = "uid"
// Source contains the line number, function and file name of the code
// on the client node that requested the lock.
lockRESTSource = "source"
// Quroum value to be saved along lock requester info, useful
// in verifying stale locks
lockRESTQuorum = "quorum"
)
var (

View File

@ -18,14 +18,13 @@
package cmd
import (
"bufio"
"context"
"errors"
"io"
"net/http"
"sort"
"strconv"
"time"
"github.com/dustin/go-humanize"
"github.com/gorilla/mux"
"github.com/minio/minio/internal/dsync"
)
@ -63,34 +62,12 @@ func (l *lockRESTServer) IsValid(w http.ResponseWriter, r *http.Request) bool {
}
func getLockArgs(r *http.Request) (args dsync.LockArgs, err error) {
values := r.Form
quorum, err := strconv.Atoi(values.Get(lockRESTQuorum))
if err != nil {
dec := msgpNewReader(io.LimitReader(r.Body, 1000*humanize.KiByte))
defer readMsgpReaderPool.Put(dec)
err = args.DecodeMsg(dec)
return args, err
}
args = dsync.LockArgs{
Owner: values.Get(lockRESTOwner),
UID: values.Get(lockRESTUID),
Source: values.Get(lockRESTSource),
Quorum: quorum,
}
var resources []string
bio := bufio.NewScanner(r.Body)
for bio.Scan() {
resources = append(resources, bio.Text())
}
if err := bio.Err(); err != nil {
return args, err
}
sort.Strings(resources)
args.Resources = resources
return args, nil
}
// HealthHandler returns success if request is authenticated.
func (l *lockRESTServer) HealthHandler(w http.ResponseWriter, r *http.Request) {
l.IsValid(w, r)

View File

@ -0,0 +1,104 @@
// Copyright (c) 2015-2021 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package cmd
import (
"bufio"
"bytes"
"io/ioutil"
"net/http"
"net/url"
"sort"
"strconv"
"testing"
"github.com/minio/minio/internal/dsync"
)
func BenchmarkLockArgs(b *testing.B) {
args := dsync.LockArgs{
Owner: "minio",
UID: "uid",
Source: "lockArgs.go",
Quorum: 3,
Resources: []string{"obj.txt"},
}
argBytes, err := args.MarshalMsg(nil)
if err != nil {
b.Fatal(err)
}
req := &http.Request{
Body: ioutil.NopCloser(bytes.NewReader(argBytes)),
}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
getLockArgs(req)
}
}
func BenchmarkLockArgsOld(b *testing.B) {
values := url.Values{}
values.Set("owner", "minio")
values.Set("uid", "uid")
values.Set("source", "lockArgs.go")
values.Set("quorum", "3")
req := &http.Request{
Form: values,
Body: ioutil.NopCloser(bytes.NewReader([]byte(`obj.txt`))),
}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
getLockArgsOld(req)
}
}
func getLockArgsOld(r *http.Request) (args dsync.LockArgs, err error) {
values := r.Form
quorum, err := strconv.Atoi(values.Get("quorum"))
if err != nil {
return args, err
}
args = dsync.LockArgs{
Owner: values.Get("onwer"),
UID: values.Get("uid"),
Source: values.Get("source"),
Quorum: quorum,
}
var resources []string
bio := bufio.NewScanner(r.Body)
for bio.Scan() {
resources = append(resources, bio.Text())
}
if err := bio.Err(); err != nil {
return args, err
}
sort.Strings(resources)
args.Resources = resources
return args, nil
}

View File

@ -157,7 +157,10 @@ func osErrToFileErr(err error) error {
return errFaultyDisk
}
if isSysErrInvalidArg(err) {
return errUnsupportedDisk
// For some odd calls with O_DIRECT reads
// filesystems can return EINVAL, handle
// these as FileNotFound instead.
return errFileNotFound
}
if isSysErrNoSpace(err) {
return errDiskFull

View File

@ -22,6 +22,7 @@ import (
"errors"
"math/rand"
"os"
"sort"
"sync"
"time"
@ -86,6 +87,7 @@ func isLocked(uid string) bool {
// NewDRWMutex - initializes a new dsync RW mutex.
func NewDRWMutex(clnt *Dsync, names ...string) *DRWMutex {
restClnts, _ := clnt.GetLockers()
sort.Strings(names)
return &DRWMutex{
writeLocks: make([]string, len(restClnts)),
Names: names,
@ -372,7 +374,7 @@ func refresh(ctx context.Context, ds *Dsync, id, source string, quorum int) (boo
}
// lock tries to acquire the distributed lock, returning true or false.
func lock(ctx context.Context, ds *Dsync, locks *[]string, id, source string, isReadLock bool, tolerance, quorum int, lockNames ...string) bool {
func lock(ctx context.Context, ds *Dsync, locks *[]string, id, source string, isReadLock bool, tolerance, quorum int, names ...string) bool {
for i := range *locks {
(*locks)[i] = ""
}
@ -386,7 +388,7 @@ func lock(ctx context.Context, ds *Dsync, locks *[]string, id, source string, is
args := LockArgs{
Owner: owner,
UID: id,
Resources: lockNames,
Resources: names,
Source: source,
Quorum: quorum,
}
@ -468,7 +470,7 @@ func lock(ctx context.Context, ds *Dsync, locks *[]string, id, source string, is
if !quorumLocked {
log("dsync: Unable to acquire lock in quorum %#v\n", args)
// Release all acquired locks without quorum.
if !releaseAll(ds, tolerance, owner, locks, isReadLock, restClnts, lockNames...) {
if !releaseAll(ds, tolerance, owner, locks, isReadLock, restClnts, names...) {
log("Unable to release acquired locks, these locks will expire automatically %#v\n", args)
}
}
@ -482,7 +484,7 @@ func lock(ctx context.Context, ds *Dsync, locks *[]string, id, source string, is
// release abandoned lock
log("Releasing abandoned lock\n")
sendRelease(ds, restClnts[grantToBeReleased.index],
owner, grantToBeReleased.lockUID, isReadLock, lockNames...)
owner, grantToBeReleased.lockUID, isReadLock, names...)
}
}
}()
@ -530,14 +532,14 @@ func checkQuorumLocked(locks *[]string, quorum int) bool {
}
// releaseAll releases all locks that are marked as locked
func releaseAll(ds *Dsync, tolerance int, owner string, locks *[]string, isReadLock bool, restClnts []NetLocker, lockNames ...string) bool {
func releaseAll(ds *Dsync, tolerance int, owner string, locks *[]string, isReadLock bool, restClnts []NetLocker, names ...string) bool {
var wg sync.WaitGroup
for lockID := range restClnts {
wg.Add(1)
go func(lockID int) {
defer wg.Done()
if isLocked((*locks)[lockID]) {
if sendRelease(ds, restClnts[lockID], owner, (*locks)[lockID], isReadLock, lockNames...) {
if sendRelease(ds, restClnts[lockID], owner, (*locks)[lockID], isReadLock, names...) {
(*locks)[lockID] = ""
}
}

View File

@ -0,0 +1,40 @@
// Copyright (c) 2015-2021 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package dsync
//go:generate msgp -file $GOFILE
// LockArgs is minimal required values for any dsync compatible lock operation.
type LockArgs struct {
// Unique ID of lock/unlock request.
UID string
// Resources contains single or multiple entries to be locked/unlocked.
Resources []string
// Source contains the line number, function and file name of the code
// on the client node that requested the lock.
Source string
// Owner represents unique ID for this instance, an owner who originally requested
// the locked resource, useful primarily in figuring our stale locks.
Owner string
// Quorum represents the expected quorum for this lock type.
Quorum int
}

View File

@ -0,0 +1,250 @@
package dsync
// Code generated by github.com/tinylib/msgp DO NOT EDIT.
import (
"github.com/tinylib/msgp/msgp"
)
// DecodeMsg implements msgp.Decodable
func (z *LockArgs) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
var zb0001 uint32
zb0001, err = dc.ReadMapHeader()
if err != nil {
err = msgp.WrapError(err)
return
}
for zb0001 > 0 {
zb0001--
field, err = dc.ReadMapKeyPtr()
if err != nil {
err = msgp.WrapError(err)
return
}
switch msgp.UnsafeString(field) {
case "UID":
z.UID, err = dc.ReadString()
if err != nil {
err = msgp.WrapError(err, "UID")
return
}
case "Resources":
var zb0002 uint32
zb0002, err = dc.ReadArrayHeader()
if err != nil {
err = msgp.WrapError(err, "Resources")
return
}
if cap(z.Resources) >= int(zb0002) {
z.Resources = (z.Resources)[:zb0002]
} else {
z.Resources = make([]string, zb0002)
}
for za0001 := range z.Resources {
z.Resources[za0001], err = dc.ReadString()
if err != nil {
err = msgp.WrapError(err, "Resources", za0001)
return
}
}
case "Source":
z.Source, err = dc.ReadString()
if err != nil {
err = msgp.WrapError(err, "Source")
return
}
case "Owner":
z.Owner, err = dc.ReadString()
if err != nil {
err = msgp.WrapError(err, "Owner")
return
}
case "Quorum":
z.Quorum, err = dc.ReadInt()
if err != nil {
err = msgp.WrapError(err, "Quorum")
return
}
default:
err = dc.Skip()
if err != nil {
err = msgp.WrapError(err)
return
}
}
}
return
}
// EncodeMsg implements msgp.Encodable
func (z *LockArgs) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 5
// write "UID"
err = en.Append(0x85, 0xa3, 0x55, 0x49, 0x44)
if err != nil {
return
}
err = en.WriteString(z.UID)
if err != nil {
err = msgp.WrapError(err, "UID")
return
}
// write "Resources"
err = en.Append(0xa9, 0x52, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x73)
if err != nil {
return
}
err = en.WriteArrayHeader(uint32(len(z.Resources)))
if err != nil {
err = msgp.WrapError(err, "Resources")
return
}
for za0001 := range z.Resources {
err = en.WriteString(z.Resources[za0001])
if err != nil {
err = msgp.WrapError(err, "Resources", za0001)
return
}
}
// write "Source"
err = en.Append(0xa6, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65)
if err != nil {
return
}
err = en.WriteString(z.Source)
if err != nil {
err = msgp.WrapError(err, "Source")
return
}
// write "Owner"
err = en.Append(0xa5, 0x4f, 0x77, 0x6e, 0x65, 0x72)
if err != nil {
return
}
err = en.WriteString(z.Owner)
if err != nil {
err = msgp.WrapError(err, "Owner")
return
}
// write "Quorum"
err = en.Append(0xa6, 0x51, 0x75, 0x6f, 0x72, 0x75, 0x6d)
if err != nil {
return
}
err = en.WriteInt(z.Quorum)
if err != nil {
err = msgp.WrapError(err, "Quorum")
return
}
return
}
// MarshalMsg implements msgp.Marshaler
func (z *LockArgs) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 5
// string "UID"
o = append(o, 0x85, 0xa3, 0x55, 0x49, 0x44)
o = msgp.AppendString(o, z.UID)
// string "Resources"
o = append(o, 0xa9, 0x52, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x73)
o = msgp.AppendArrayHeader(o, uint32(len(z.Resources)))
for za0001 := range z.Resources {
o = msgp.AppendString(o, z.Resources[za0001])
}
// string "Source"
o = append(o, 0xa6, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65)
o = msgp.AppendString(o, z.Source)
// string "Owner"
o = append(o, 0xa5, 0x4f, 0x77, 0x6e, 0x65, 0x72)
o = msgp.AppendString(o, z.Owner)
// string "Quorum"
o = append(o, 0xa6, 0x51, 0x75, 0x6f, 0x72, 0x75, 0x6d)
o = msgp.AppendInt(o, z.Quorum)
return
}
// UnmarshalMsg implements msgp.Unmarshaler
func (z *LockArgs) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte
_ = field
var zb0001 uint32
zb0001, bts, err = msgp.ReadMapHeaderBytes(bts)
if err != nil {
err = msgp.WrapError(err)
return
}
for zb0001 > 0 {
zb0001--
field, bts, err = msgp.ReadMapKeyZC(bts)
if err != nil {
err = msgp.WrapError(err)
return
}
switch msgp.UnsafeString(field) {
case "UID":
z.UID, bts, err = msgp.ReadStringBytes(bts)
if err != nil {
err = msgp.WrapError(err, "UID")
return
}
case "Resources":
var zb0002 uint32
zb0002, bts, err = msgp.ReadArrayHeaderBytes(bts)
if err != nil {
err = msgp.WrapError(err, "Resources")
return
}
if cap(z.Resources) >= int(zb0002) {
z.Resources = (z.Resources)[:zb0002]
} else {
z.Resources = make([]string, zb0002)
}
for za0001 := range z.Resources {
z.Resources[za0001], bts, err = msgp.ReadStringBytes(bts)
if err != nil {
err = msgp.WrapError(err, "Resources", za0001)
return
}
}
case "Source":
z.Source, bts, err = msgp.ReadStringBytes(bts)
if err != nil {
err = msgp.WrapError(err, "Source")
return
}
case "Owner":
z.Owner, bts, err = msgp.ReadStringBytes(bts)
if err != nil {
err = msgp.WrapError(err, "Owner")
return
}
case "Quorum":
z.Quorum, bts, err = msgp.ReadIntBytes(bts)
if err != nil {
err = msgp.WrapError(err, "Quorum")
return
}
default:
bts, err = msgp.Skip(bts)
if err != nil {
err = msgp.WrapError(err)
return
}
}
}
o = bts
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *LockArgs) Msgsize() (s int) {
s = 1 + 4 + msgp.StringPrefixSize + len(z.UID) + 10 + msgp.ArrayHeaderSize
for za0001 := range z.Resources {
s += msgp.StringPrefixSize + len(z.Resources[za0001])
}
s += 7 + msgp.StringPrefixSize + len(z.Source) + 6 + msgp.StringPrefixSize + len(z.Owner) + 7 + msgp.IntSize
return
}

View File

@ -0,0 +1,123 @@
package dsync
// Code generated by github.com/tinylib/msgp DO NOT EDIT.
import (
"bytes"
"testing"
"github.com/tinylib/msgp/msgp"
)
func TestMarshalUnmarshalLockArgs(t *testing.T) {
v := LockArgs{}
bts, err := v.MarshalMsg(nil)
if err != nil {
t.Fatal(err)
}
left, err := v.UnmarshalMsg(bts)
if err != nil {
t.Fatal(err)
}
if len(left) > 0 {
t.Errorf("%d bytes left over after UnmarshalMsg(): %q", len(left), left)
}
left, err = msgp.Skip(bts)
if err != nil {
t.Fatal(err)
}
if len(left) > 0 {
t.Errorf("%d bytes left over after Skip(): %q", len(left), left)
}
}
func BenchmarkMarshalMsgLockArgs(b *testing.B) {
v := LockArgs{}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
v.MarshalMsg(nil)
}
}
func BenchmarkAppendMsgLockArgs(b *testing.B) {
v := LockArgs{}
bts := make([]byte, 0, v.Msgsize())
bts, _ = v.MarshalMsg(bts[0:0])
b.SetBytes(int64(len(bts)))
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
bts, _ = v.MarshalMsg(bts[0:0])
}
}
func BenchmarkUnmarshalLockArgs(b *testing.B) {
v := LockArgs{}
bts, _ := v.MarshalMsg(nil)
b.ReportAllocs()
b.SetBytes(int64(len(bts)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := v.UnmarshalMsg(bts)
if err != nil {
b.Fatal(err)
}
}
}
func TestEncodeDecodeLockArgs(t *testing.T) {
v := LockArgs{}
var buf bytes.Buffer
msgp.Encode(&buf, &v)
m := v.Msgsize()
if buf.Len() > m {
t.Log("WARNING: TestEncodeDecodeLockArgs Msgsize() is inaccurate")
}
vn := LockArgs{}
err := msgp.Decode(&buf, &vn)
if err != nil {
t.Error(err)
}
buf.Reset()
msgp.Encode(&buf, &v)
err = msgp.NewReader(&buf).Skip()
if err != nil {
t.Error(err)
}
}
func BenchmarkEncodeLockArgs(b *testing.B) {
v := LockArgs{}
var buf bytes.Buffer
msgp.Encode(&buf, &v)
b.SetBytes(int64(buf.Len()))
en := msgp.NewWriter(msgp.Nowhere)
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
v.EncodeMsg(en)
}
en.Flush()
}
func BenchmarkDecodeLockArgs(b *testing.B) {
v := LockArgs{}
var buf bytes.Buffer
msgp.Encode(&buf, &v)
b.SetBytes(int64(buf.Len()))
rd := msgp.NewEndlessReader(buf.Bytes(), b)
dc := msgp.NewReader(rd)
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
err := v.DecodeMsg(dc)
if err != nil {
b.Fatal(err)
}
}
}

View File

@ -19,26 +19,6 @@ package dsync
import "context"
// LockArgs is minimal required values for any dsync compatible lock operation.
type LockArgs struct {
// Unique ID of lock/unlock request.
UID string
// Resources contains single or multiple entries to be locked/unlocked.
Resources []string
// Source contains the line number, function and file name of the code
// on the client node that requested the lock.
Source string
// Owner represents unique ID for this instance, an owner who originally requested
// the locked resource, useful primarily in figuring our stale locks.
Owner string
// Quorum represents the expected quorum for this lock type.
Quorum int
}
// NetLocker is dsync compatible locker interface.
type NetLocker interface {
// Do read lock for given LockArgs. It should return

View File

@ -133,7 +133,9 @@ func (c *Client) Call(ctx context.Context, method string, values url.Values, bod
}
req.Header.Set("Authorization", "Bearer "+c.newAuthToken(req.URL.RawQuery))
req.Header.Set("X-Minio-Time", time.Now().UTC().Format(time.RFC3339))
if body != nil {
req.Header.Set("Expect", "100-continue")
}
if length > 0 {
req.ContentLength = length
}