// Copyright 2012-2015 Oliver Eilhard. All rights reserved.
// Use of this source code is governed by a MIT-license.
// See http://olivere.mit-license.org/license.txt for details.

package elastic

import (
	"encoding/json"
	"errors"
)

// Reindexer simplifies the process of reindexing an index. You typically
// reindex a source index to a target index. However, you can also specify
// a query that filters out documents from the source index before bulk
// indexing them into the target index. The caller may also specify a
// different client for the target, e.g. when copying indices from one
// Elasticsearch cluster to another.
//
// Internally, the Reindex users a scan and scroll operation on the source
// index and bulk indexing to push data into the target index.
//
// By default the reindexer fetches the _source, _parent, and _routing
// attributes from the source index, using the provided CopyToTargetIndex
// will copy those attributes into the destinationIndex.
// This behaviour can be overridden by setting the ScanFields and providing a
// custom ReindexerFunc.
//
// The caller is responsible for setting up and/or clearing the target index
// before starting the reindex process.
//
// See http://www.elastic.co/guide/en/elasticsearch/guide/current/reindex.html
// for more information about reindexing.
type Reindexer struct {
	sourceClient, targetClient *Client
	sourceIndex                string
	query                      Query
	scanFields                 []string
	bulkSize                   int
	size                       int
	scroll                     string
	reindexerFunc              ReindexerFunc
	progress                   ReindexerProgressFunc
	statsOnly                  bool
}

// A ReindexerFunc receives each hit from the sourceIndex.
// It can choose to add any number of BulkableRequests to the bulkService.
type ReindexerFunc func(hit *SearchHit, bulkService *BulkService) error

// CopyToTargetIndex returns a ReindexerFunc that copies the SearchHit's
// _source, _parent, and _routing attributes into the targetIndex
func CopyToTargetIndex(targetIndex string) ReindexerFunc {
	return func(hit *SearchHit, bulkService *BulkService) error {
		// TODO(oe) Do we need to deserialize here?
		source := make(map[string]interface{})
		if err := json.Unmarshal(*hit.Source, &source); err != nil {
			return err
		}
		req := NewBulkIndexRequest().Index(targetIndex).Type(hit.Type).Id(hit.Id).Doc(source)
		if hit.Parent != "" {
			req = req.Parent(hit.Parent)
		}
		if hit.Routing != "" {
			req = req.Routing(hit.Routing)
		}
		bulkService.Add(req)
		return nil
	}
}

// ReindexerProgressFunc is a callback that can be used with Reindexer
// to report progress while reindexing data.
type ReindexerProgressFunc func(current, total int64)

// ReindexerResponse is returned from the Do func in a Reindexer.
// By default, it returns the number of succeeded and failed bulk operations.
// To return details about all failed items, set StatsOnly to false in
// Reindexer.
type ReindexerResponse struct {
	Success int64
	Failed  int64
	Errors  []*BulkResponseItem
}

// NewReindexer returns a new Reindexer.
func NewReindexer(client *Client, source string, reindexerFunc ReindexerFunc) *Reindexer {
	return &Reindexer{
		sourceClient:  client,
		sourceIndex:   source,
		reindexerFunc: reindexerFunc,
		statsOnly:     true,
	}
}

// TargetClient specifies a different client for the target. This is
// necessary when the target index is in a different Elasticsearch cluster.
// By default, the source and target clients are the same.
func (ix *Reindexer) TargetClient(c *Client) *Reindexer {
	ix.targetClient = c
	return ix
}

// Query specifies the query to apply to the source. It filters out those
// documents to be indexed into target. A nil query does not filter out any
// documents.
func (ix *Reindexer) Query(q Query) *Reindexer {
	ix.query = q
	return ix
}

// ScanFields specifies the fields the scan query should load.
// The default fields are _source, _parent, _routing.
func (ix *Reindexer) ScanFields(scanFields ...string) *Reindexer {
	ix.scanFields = scanFields
	return ix
}

// BulkSize returns the number of documents to send to Elasticsearch per chunk.
// The default is 500.
func (ix *Reindexer) BulkSize(bulkSize int) *Reindexer {
	ix.bulkSize = bulkSize
	return ix
}

// Size is the number of results to return per shard, not per request.
// So a size of 10 which hits 5 shards will return a maximum of 50 results
// per scan request.
func (ix *Reindexer) Size(size int) *Reindexer {
	ix.size = size
	return ix
}

// Scroll specifies for how long the scroll operation on the source index
// should be maintained. The default is 5m.
func (ix *Reindexer) Scroll(timeout string) *Reindexer {
	ix.scroll = timeout
	return ix
}

// Progress indicates a callback that will be called while indexing.
func (ix *Reindexer) Progress(f ReindexerProgressFunc) *Reindexer {
	ix.progress = f
	return ix
}

// StatsOnly indicates whether the Do method should return details e.g. about
// the documents that failed while indexing. It is true by default, i.e. only
// the number of documents that succeeded/failed are returned. Set to false
// if you want all the details.
func (ix *Reindexer) StatsOnly(statsOnly bool) *Reindexer {
	ix.statsOnly = statsOnly
	return ix
}

// Do starts the reindexing process.
func (ix *Reindexer) Do() (*ReindexerResponse, error) {
	if ix.sourceClient == nil {
		return nil, errors.New("no source client")
	}
	if ix.sourceIndex == "" {
		return nil, errors.New("no source index")
	}
	if ix.targetClient == nil {
		ix.targetClient = ix.sourceClient
	}
	if ix.scanFields == nil {
		ix.scanFields = []string{"_source", "_parent", "_routing"}
	}
	if ix.bulkSize <= 0 {
		ix.bulkSize = 500
	}
	if ix.scroll == "" {
		ix.scroll = "5m"
	}

	// Count total to report progress (if necessary)
	var err error
	var current, total int64
	if ix.progress != nil {
		total, err = ix.count()
		if err != nil {
			return nil, err
		}
	}

	// Prepare scan and scroll to iterate through the source index
	scanner := ix.sourceClient.Scan(ix.sourceIndex).Scroll(ix.scroll).Fields(ix.scanFields...)
	if ix.query != nil {
		scanner = scanner.Query(ix.query)
	}
	if ix.size > 0 {
		scanner = scanner.Size(ix.size)
	}
	cursor, err := scanner.Do()

	bulk := ix.targetClient.Bulk()

	ret := &ReindexerResponse{
		Errors: make([]*BulkResponseItem, 0),
	}

	// Main loop iterates through the source index and bulk indexes into target.
	for {
		docs, err := cursor.Next()
		if err == EOS {
			break
		}
		if err != nil {
			return ret, err
		}

		if docs.TotalHits() > 0 {
			for _, hit := range docs.Hits.Hits {
				if ix.progress != nil {
					current++
					ix.progress(current, total)
				}

				err := ix.reindexerFunc(hit, bulk)
				if err != nil {
					return ret, err
				}

				if bulk.NumberOfActions() >= ix.bulkSize {
					bulk, err = ix.commit(bulk, ret)
					if err != nil {
						return ret, err
					}
				}
			}
		}
	}

	// Final flush
	if bulk.NumberOfActions() > 0 {
		bulk, err = ix.commit(bulk, ret)
		if err != nil {
			return ret, err
		}
		bulk = nil
	}

	return ret, nil
}

// count returns the number of documents in the source index.
// The query is taken into account, if specified.
func (ix *Reindexer) count() (int64, error) {
	service := ix.sourceClient.Count(ix.sourceIndex)
	if ix.query != nil {
		service = service.Query(ix.query)
	}
	return service.Do()
}

// commit commits a bulk, updates the stats, and returns a fresh bulk service.
func (ix *Reindexer) commit(bulk *BulkService, ret *ReindexerResponse) (*BulkService, error) {
	bres, err := bulk.Do()
	if err != nil {
		return nil, err
	}
	ret.Success += int64(len(bres.Succeeded()))
	failed := bres.Failed()
	ret.Failed += int64(len(failed))
	if !ix.statsOnly {
		ret.Errors = append(ret.Errors, failed...)
	}
	bulk = ix.targetClient.Bulk()
	return bulk, nil
}