2015-10-19 23:11:32 -07:00

707 lines
18 KiB
Go

// mgo - MongoDB driver for Go
//
// Copyright (c) 2010-2012 - Gustavo Niemeyer <gustavo@niemeyer.net>
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package mgo
import (
"errors"
"fmt"
"net"
"sync"
"time"
"gopkg.in/mgo.v2/bson"
)
type replyFunc func(err error, reply *replyOp, docNum int, docData []byte)
type mongoSocket struct {
sync.Mutex
server *mongoServer // nil when cached
conn net.Conn
timeout time.Duration
addr string // For debugging only.
nextRequestId uint32
replyFuncs map[uint32]replyFunc
references int
creds []Credential
logout []Credential
cachedNonce string
gotNonce sync.Cond
dead error
serverInfo *mongoServerInfo
}
type queryOpFlags uint32
const (
_ queryOpFlags = 1 << iota
flagTailable
flagSlaveOk
flagLogReplay
flagNoCursorTimeout
flagAwaitData
)
type queryOp struct {
collection string
query interface{}
skip int32
limit int32
selector interface{}
flags queryOpFlags
replyFunc replyFunc
mode Mode
options queryWrapper
hasOptions bool
serverTags []bson.D
}
type queryWrapper struct {
Query interface{} "$query"
OrderBy interface{} "$orderby,omitempty"
Hint interface{} "$hint,omitempty"
Explain bool "$explain,omitempty"
Snapshot bool "$snapshot,omitempty"
ReadPreference bson.D "$readPreference,omitempty"
MaxScan int "$maxScan,omitempty"
MaxTimeMS int "$maxTimeMS,omitempty"
Comment string "$comment,omitempty"
}
func (op *queryOp) finalQuery(socket *mongoSocket) interface{} {
if socket.ServerInfo().Mongos {
var modeName string
if op.flags&flagSlaveOk == 0 {
modeName = "primary"
} else {
switch op.mode {
case Strong:
modeName = "primary"
case Monotonic, Eventual:
modeName = "secondaryPreferred"
case PrimaryPreferred:
modeName = "primaryPreferred"
case Secondary:
modeName = "secondary"
case SecondaryPreferred:
modeName = "secondaryPreferred"
case Nearest:
modeName = "nearest"
default:
panic(fmt.Sprintf("unsupported read mode: %d", op.mode))
}
}
op.hasOptions = true
op.options.ReadPreference = make(bson.D, 0, 2)
op.options.ReadPreference = append(op.options.ReadPreference, bson.DocElem{"mode", modeName})
if len(op.serverTags) > 0 {
op.options.ReadPreference = append(op.options.ReadPreference, bson.DocElem{"tags", op.serverTags})
}
}
if op.hasOptions {
if op.query == nil {
var empty bson.D
op.options.Query = empty
} else {
op.options.Query = op.query
}
debugf("final query is %#v\n", &op.options)
return &op.options
}
return op.query
}
type getMoreOp struct {
collection string
limit int32
cursorId int64
replyFunc replyFunc
}
type replyOp struct {
flags uint32
cursorId int64
firstDoc int32
replyDocs int32
}
type insertOp struct {
collection string // "database.collection"
documents []interface{} // One or more documents to insert
flags uint32
}
type updateOp struct {
Collection string `bson:"-"` // "database.collection"
Selector interface{} `bson:"q"`
Update interface{} `bson:"u"`
Flags uint32 `bson:"-"`
Multi bool `bson:"multi,omitempty"`
Upsert bool `bson:"upsert,omitempty"`
}
type deleteOp struct {
collection string // "database.collection"
selector interface{}
flags uint32
}
type killCursorsOp struct {
cursorIds []int64
}
type requestInfo struct {
bufferPos int
replyFunc replyFunc
}
func newSocket(server *mongoServer, conn net.Conn, timeout time.Duration) *mongoSocket {
socket := &mongoSocket{
conn: conn,
addr: server.Addr,
server: server,
replyFuncs: make(map[uint32]replyFunc),
}
socket.gotNonce.L = &socket.Mutex
if err := socket.InitialAcquire(server.Info(), timeout); err != nil {
panic("newSocket: InitialAcquire returned error: " + err.Error())
}
stats.socketsAlive(+1)
debugf("Socket %p to %s: initialized", socket, socket.addr)
socket.resetNonce()
go socket.readLoop()
return socket
}
// Server returns the server that the socket is associated with.
// It returns nil while the socket is cached in its respective server.
func (socket *mongoSocket) Server() *mongoServer {
socket.Lock()
server := socket.server
socket.Unlock()
return server
}
// ServerInfo returns details for the server at the time the socket
// was initially acquired.
func (socket *mongoSocket) ServerInfo() *mongoServerInfo {
socket.Lock()
serverInfo := socket.serverInfo
socket.Unlock()
return serverInfo
}
// InitialAcquire obtains the first reference to the socket, either
// right after the connection is made or once a recycled socket is
// being put back in use.
func (socket *mongoSocket) InitialAcquire(serverInfo *mongoServerInfo, timeout time.Duration) error {
socket.Lock()
if socket.references > 0 {
panic("Socket acquired out of cache with references")
}
if socket.dead != nil {
dead := socket.dead
socket.Unlock()
return dead
}
socket.references++
socket.serverInfo = serverInfo
socket.timeout = timeout
stats.socketsInUse(+1)
stats.socketRefs(+1)
socket.Unlock()
return nil
}
// Acquire obtains an additional reference to the socket.
// The socket will only be recycled when it's released as many
// times as it's been acquired.
func (socket *mongoSocket) Acquire() (info *mongoServerInfo) {
socket.Lock()
if socket.references == 0 {
panic("Socket got non-initial acquire with references == 0")
}
// We'll track references to dead sockets as well.
// Caller is still supposed to release the socket.
socket.references++
stats.socketRefs(+1)
serverInfo := socket.serverInfo
socket.Unlock()
return serverInfo
}
// Release decrements a socket reference. The socket will be
// recycled once its released as many times as it's been acquired.
func (socket *mongoSocket) Release() {
socket.Lock()
if socket.references == 0 {
panic("socket.Release() with references == 0")
}
socket.references--
stats.socketRefs(-1)
if socket.references == 0 {
stats.socketsInUse(-1)
server := socket.server
socket.Unlock()
socket.LogoutAll()
// If the socket is dead server is nil.
if server != nil {
server.RecycleSocket(socket)
}
} else {
socket.Unlock()
}
}
// SetTimeout changes the timeout used on socket operations.
func (socket *mongoSocket) SetTimeout(d time.Duration) {
socket.Lock()
socket.timeout = d
socket.Unlock()
}
type deadlineType int
const (
readDeadline deadlineType = 1
writeDeadline deadlineType = 2
)
func (socket *mongoSocket) updateDeadline(which deadlineType) {
var when time.Time
if socket.timeout > 0 {
when = time.Now().Add(socket.timeout)
}
whichstr := ""
switch which {
case readDeadline | writeDeadline:
whichstr = "read/write"
socket.conn.SetDeadline(when)
case readDeadline:
whichstr = "read"
socket.conn.SetReadDeadline(when)
case writeDeadline:
whichstr = "write"
socket.conn.SetWriteDeadline(when)
default:
panic("invalid parameter to updateDeadline")
}
debugf("Socket %p to %s: updated %s deadline to %s ahead (%s)", socket, socket.addr, whichstr, socket.timeout, when)
}
// Close terminates the socket use.
func (socket *mongoSocket) Close() {
socket.kill(errors.New("Closed explicitly"), false)
}
func (socket *mongoSocket) kill(err error, abend bool) {
socket.Lock()
if socket.dead != nil {
debugf("Socket %p to %s: killed again: %s (previously: %s)", socket, socket.addr, err.Error(), socket.dead.Error())
socket.Unlock()
return
}
logf("Socket %p to %s: closing: %s (abend=%v)", socket, socket.addr, err.Error(), abend)
socket.dead = err
socket.conn.Close()
stats.socketsAlive(-1)
replyFuncs := socket.replyFuncs
socket.replyFuncs = make(map[uint32]replyFunc)
server := socket.server
socket.server = nil
socket.gotNonce.Broadcast()
socket.Unlock()
for _, replyFunc := range replyFuncs {
logf("Socket %p to %s: notifying replyFunc of closed socket: %s", socket, socket.addr, err.Error())
replyFunc(err, nil, -1, nil)
}
if abend {
server.AbendSocket(socket)
}
}
func (socket *mongoSocket) SimpleQuery(op *queryOp) (data []byte, err error) {
var wait, change sync.Mutex
var replyDone bool
var replyData []byte
var replyErr error
wait.Lock()
op.replyFunc = func(err error, reply *replyOp, docNum int, docData []byte) {
change.Lock()
if !replyDone {
replyDone = true
replyErr = err
if err == nil {
replyData = docData
}
}
change.Unlock()
wait.Unlock()
}
err = socket.Query(op)
if err != nil {
return nil, err
}
wait.Lock()
change.Lock()
data = replyData
err = replyErr
change.Unlock()
return data, err
}
func (socket *mongoSocket) Query(ops ...interface{}) (err error) {
if lops := socket.flushLogout(); len(lops) > 0 {
ops = append(lops, ops...)
}
buf := make([]byte, 0, 256)
// Serialize operations synchronously to avoid interrupting
// other goroutines while we can't really be sending data.
// Also, record id positions so that we can compute request
// ids at once later with the lock already held.
requests := make([]requestInfo, len(ops))
requestCount := 0
for _, op := range ops {
debugf("Socket %p to %s: serializing op: %#v", socket, socket.addr, op)
start := len(buf)
var replyFunc replyFunc
switch op := op.(type) {
case *updateOp:
buf = addHeader(buf, 2001)
buf = addInt32(buf, 0) // Reserved
buf = addCString(buf, op.Collection)
buf = addInt32(buf, int32(op.Flags))
debugf("Socket %p to %s: serializing selector document: %#v", socket, socket.addr, op.Selector)
buf, err = addBSON(buf, op.Selector)
if err != nil {
return err
}
debugf("Socket %p to %s: serializing update document: %#v", socket, socket.addr, op.Update)
buf, err = addBSON(buf, op.Update)
if err != nil {
return err
}
case *insertOp:
buf = addHeader(buf, 2002)
buf = addInt32(buf, int32(op.flags))
buf = addCString(buf, op.collection)
for _, doc := range op.documents {
debugf("Socket %p to %s: serializing document for insertion: %#v", socket, socket.addr, doc)
buf, err = addBSON(buf, doc)
if err != nil {
return err
}
}
case *queryOp:
buf = addHeader(buf, 2004)
buf = addInt32(buf, int32(op.flags))
buf = addCString(buf, op.collection)
buf = addInt32(buf, op.skip)
buf = addInt32(buf, op.limit)
buf, err = addBSON(buf, op.finalQuery(socket))
if err != nil {
return err
}
if op.selector != nil {
buf, err = addBSON(buf, op.selector)
if err != nil {
return err
}
}
replyFunc = op.replyFunc
case *getMoreOp:
buf = addHeader(buf, 2005)
buf = addInt32(buf, 0) // Reserved
buf = addCString(buf, op.collection)
buf = addInt32(buf, op.limit)
buf = addInt64(buf, op.cursorId)
replyFunc = op.replyFunc
case *deleteOp:
buf = addHeader(buf, 2006)
buf = addInt32(buf, 0) // Reserved
buf = addCString(buf, op.collection)
buf = addInt32(buf, int32(op.flags))
debugf("Socket %p to %s: serializing selector document: %#v", socket, socket.addr, op.selector)
buf, err = addBSON(buf, op.selector)
if err != nil {
return err
}
case *killCursorsOp:
buf = addHeader(buf, 2007)
buf = addInt32(buf, 0) // Reserved
buf = addInt32(buf, int32(len(op.cursorIds)))
for _, cursorId := range op.cursorIds {
buf = addInt64(buf, cursorId)
}
default:
panic("internal error: unknown operation type")
}
setInt32(buf, start, int32(len(buf)-start))
if replyFunc != nil {
request := &requests[requestCount]
request.replyFunc = replyFunc
request.bufferPos = start
requestCount++
}
}
// Buffer is ready for the pipe. Lock, allocate ids, and enqueue.
socket.Lock()
if socket.dead != nil {
dead := socket.dead
socket.Unlock()
debugf("Socket %p to %s: failing query, already closed: %s", socket, socket.addr, socket.dead.Error())
// XXX This seems necessary in case the session is closed concurrently
// with a query being performed, but it's not yet tested:
for i := 0; i != requestCount; i++ {
request := &requests[i]
if request.replyFunc != nil {
request.replyFunc(dead, nil, -1, nil)
}
}
return dead
}
wasWaiting := len(socket.replyFuncs) > 0
// Reserve id 0 for requests which should have no responses.
requestId := socket.nextRequestId + 1
if requestId == 0 {
requestId++
}
socket.nextRequestId = requestId + uint32(requestCount)
for i := 0; i != requestCount; i++ {
request := &requests[i]
setInt32(buf, request.bufferPos+4, int32(requestId))
socket.replyFuncs[requestId] = request.replyFunc
requestId++
}
debugf("Socket %p to %s: sending %d op(s) (%d bytes)", socket, socket.addr, len(ops), len(buf))
stats.sentOps(len(ops))
socket.updateDeadline(writeDeadline)
_, err = socket.conn.Write(buf)
if !wasWaiting && requestCount > 0 {
socket.updateDeadline(readDeadline)
}
socket.Unlock()
return err
}
func fill(r net.Conn, b []byte) error {
l := len(b)
n, err := r.Read(b)
for n != l && err == nil {
var ni int
ni, err = r.Read(b[n:])
n += ni
}
return err
}
// Estimated minimum cost per socket: 1 goroutine + memory for the largest
// document ever seen.
func (socket *mongoSocket) readLoop() {
p := make([]byte, 36) // 16 from header + 20 from OP_REPLY fixed fields
s := make([]byte, 4)
conn := socket.conn // No locking, conn never changes.
for {
// XXX Handle timeouts, , etc
err := fill(conn, p)
if err != nil {
socket.kill(err, true)
return
}
totalLen := getInt32(p, 0)
responseTo := getInt32(p, 8)
opCode := getInt32(p, 12)
// Don't use socket.server.Addr here. socket is not
// locked and socket.server may go away.
debugf("Socket %p to %s: got reply (%d bytes)", socket, socket.addr, totalLen)
_ = totalLen
if opCode != 1 {
socket.kill(errors.New("opcode != 1, corrupted data?"), true)
return
}
reply := replyOp{
flags: uint32(getInt32(p, 16)),
cursorId: getInt64(p, 20),
firstDoc: getInt32(p, 28),
replyDocs: getInt32(p, 32),
}
stats.receivedOps(+1)
stats.receivedDocs(int(reply.replyDocs))
socket.Lock()
replyFunc, ok := socket.replyFuncs[uint32(responseTo)]
if ok {
delete(socket.replyFuncs, uint32(responseTo))
}
socket.Unlock()
if replyFunc != nil && reply.replyDocs == 0 {
replyFunc(nil, &reply, -1, nil)
} else {
for i := 0; i != int(reply.replyDocs); i++ {
err := fill(conn, s)
if err != nil {
if replyFunc != nil {
replyFunc(err, nil, -1, nil)
}
socket.kill(err, true)
return
}
b := make([]byte, int(getInt32(s, 0)))
// copy(b, s) in an efficient way.
b[0] = s[0]
b[1] = s[1]
b[2] = s[2]
b[3] = s[3]
err = fill(conn, b[4:])
if err != nil {
if replyFunc != nil {
replyFunc(err, nil, -1, nil)
}
socket.kill(err, true)
return
}
if globalDebug && globalLogger != nil {
m := bson.M{}
if err := bson.Unmarshal(b, m); err == nil {
debugf("Socket %p to %s: received document: %#v", socket, socket.addr, m)
}
}
if replyFunc != nil {
replyFunc(nil, &reply, i, b)
}
// XXX Do bound checking against totalLen.
}
}
socket.Lock()
if len(socket.replyFuncs) == 0 {
// Nothing else to read for now. Disable deadline.
socket.conn.SetReadDeadline(time.Time{})
} else {
socket.updateDeadline(readDeadline)
}
socket.Unlock()
// XXX Do bound checking against totalLen.
}
}
var emptyHeader = []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
func addHeader(b []byte, opcode int) []byte {
i := len(b)
b = append(b, emptyHeader...)
// Enough for current opcodes.
b[i+12] = byte(opcode)
b[i+13] = byte(opcode >> 8)
return b
}
func addInt32(b []byte, i int32) []byte {
return append(b, byte(i), byte(i>>8), byte(i>>16), byte(i>>24))
}
func addInt64(b []byte, i int64) []byte {
return append(b, byte(i), byte(i>>8), byte(i>>16), byte(i>>24),
byte(i>>32), byte(i>>40), byte(i>>48), byte(i>>56))
}
func addCString(b []byte, s string) []byte {
b = append(b, []byte(s)...)
b = append(b, 0)
return b
}
func addBSON(b []byte, doc interface{}) ([]byte, error) {
if doc == nil {
return append(b, 5, 0, 0, 0, 0), nil
}
data, err := bson.Marshal(doc)
if err != nil {
return b, err
}
return append(b, data...), nil
}
func setInt32(b []byte, pos int, i int32) {
b[pos] = byte(i)
b[pos+1] = byte(i >> 8)
b[pos+2] = byte(i >> 16)
b[pos+3] = byte(i >> 24)
}
func getInt32(b []byte, pos int) int32 {
return (int32(b[pos+0])) |
(int32(b[pos+1]) << 8) |
(int32(b[pos+2]) << 16) |
(int32(b[pos+3]) << 24)
}
func getInt64(b []byte, pos int) int64 {
return (int64(b[pos+0])) |
(int64(b[pos+1]) << 8) |
(int64(b[pos+2]) << 16) |
(int64(b[pos+3]) << 24) |
(int64(b[pos+4]) << 32) |
(int64(b[pos+5]) << 40) |
(int64(b[pos+6]) << 48) |
(int64(b[pos+7]) << 56)
}