meditime/vendor/github.com/dgraph-io/badger/y/watermark.go

256 lines
7.5 KiB
Go
Raw Normal View History

/*
* Copyright 2016-2018 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package y
import (
"container/heap"
"context"
"sync/atomic"
"golang.org/x/net/trace"
)
type uint64Heap []uint64
func (u uint64Heap) Len() int { return len(u) }
func (u uint64Heap) Less(i, j int) bool { return u[i] < u[j] }
func (u uint64Heap) Swap(i, j int) { u[i], u[j] = u[j], u[i] }
func (u *uint64Heap) Push(x interface{}) { *u = append(*u, x.(uint64)) }
func (u *uint64Heap) Pop() interface{} {
old := *u
n := len(old)
x := old[n-1]
*u = old[0 : n-1]
return x
}
// mark contains one of more indices, along with a done boolean to indicate the
// status of the index: begin or done. It also contains waiters, who could be
// waiting for the watermark to reach >= a certain index.
type mark struct {
// Either this is an (index, waiter) pair or (index, done) or (indices, done).
index uint64
waiter chan struct{}
indices []uint64
done bool // Set to true if the index is done.
}
// WaterMark is used to keep track of the minimum un-finished index. Typically, an index k becomes
// finished or "done" according to a WaterMark once Done(k) has been called
// 1. as many times as Begin(k) has, AND
// 2. a positive number of times.
//
// An index may also become "done" by calling SetDoneUntil at a time such that it is not
// inter-mingled with Begin/Done calls.
//
// Since doneUntil and lastIndex addresses are passed to sync/atomic packages, we ensure that they
// are 64-bit aligned by putting them at the beginning of the structure.
type WaterMark struct {
doneUntil uint64
lastIndex uint64
Name string
markCh chan mark
elog trace.EventLog
}
// Init initializes a WaterMark struct. MUST be called before using it.
func (w *WaterMark) Init(closer *Closer, eventLogging bool) {
w.markCh = make(chan mark, 100)
if eventLogging {
w.elog = trace.NewEventLog("Watermark", w.Name)
} else {
w.elog = NoEventLog
}
go w.process(closer)
}
// Begin sets the last index to the given value.
func (w *WaterMark) Begin(index uint64) {
atomic.StoreUint64(&w.lastIndex, index)
w.markCh <- mark{index: index, done: false}
}
// BeginMany works like Begin but accepts multiple indices.
func (w *WaterMark) BeginMany(indices []uint64) {
atomic.StoreUint64(&w.lastIndex, indices[len(indices)-1])
w.markCh <- mark{index: 0, indices: indices, done: false}
}
// Done sets a single index as done.
func (w *WaterMark) Done(index uint64) {
w.markCh <- mark{index: index, done: true}
}
// DoneMany works like Done but accepts multiple indices.
func (w *WaterMark) DoneMany(indices []uint64) {
w.markCh <- mark{index: 0, indices: indices, done: true}
}
// DoneUntil returns the maximum index that has the property that all indices
// less than or equal to it are done.
func (w *WaterMark) DoneUntil() uint64 {
return atomic.LoadUint64(&w.doneUntil)
}
// SetDoneUntil sets the maximum index that has the property that all indices
// less than or equal to it are done.
func (w *WaterMark) SetDoneUntil(val uint64) {
atomic.StoreUint64(&w.doneUntil, val)
}
// LastIndex returns the last index for which Begin has been called.
func (w *WaterMark) LastIndex() uint64 {
return atomic.LoadUint64(&w.lastIndex)
}
// WaitForMark waits until the given index is marked as done.
func (w *WaterMark) WaitForMark(ctx context.Context, index uint64) error {
if w.DoneUntil() >= index {
return nil
}
waitCh := make(chan struct{})
w.markCh <- mark{index: index, waiter: waitCh}
select {
case <-ctx.Done():
return ctx.Err()
case <-waitCh:
return nil
}
}
// process is used to process the Mark channel. This is not thread-safe,
// so only run one goroutine for process. One is sufficient, because
// all goroutine ops use purely memory and cpu.
// Each index has to emit atleast one begin watermark in serial order otherwise waiters
// can get blocked idefinitely. Example: We had an watermark at 100 and a waiter at 101,
// if no watermark is emitted at index 101 then waiter would get stuck indefinitely as it
// can't decide whether the task at 101 has decided not to emit watermark or it didn't get
// scheduled yet.
func (w *WaterMark) process(closer *Closer) {
defer closer.Done()
var indices uint64Heap
// pending maps raft proposal index to the number of pending mutations for this proposal.
pending := make(map[uint64]int)
waiters := make(map[uint64][]chan struct{})
heap.Init(&indices)
var loop uint64
processOne := func(index uint64, done bool) {
// If not already done, then set. Otherwise, don't undo a done entry.
prev, present := pending[index]
if !present {
heap.Push(&indices, index)
}
delta := 1
if done {
delta = -1
}
pending[index] = prev + delta
loop++
if len(indices) > 0 && loop%10000 == 0 {
min := indices[0]
w.elog.Printf("WaterMark %s: Done entry %4d. Size: %4d Watermark: %-4d Looking for: "+
"%-4d. Value: %d\n", w.Name, index, len(indices), w.DoneUntil(), min, pending[min])
}
// Update mark by going through all indices in order; and checking if they have
// been done. Stop at the first index, which isn't done.
doneUntil := w.DoneUntil()
if doneUntil > index {
AssertTruef(false, "Name: %s doneUntil: %d. Index: %d", w.Name, doneUntil, index)
}
until := doneUntil
loops := 0
for len(indices) > 0 {
min := indices[0]
if done := pending[min]; done > 0 {
break // len(indices) will be > 0.
}
// Even if done is called multiple times causing it to become
// negative, we should still pop the index.
heap.Pop(&indices)
delete(pending, min)
until = min
loops++
}
if until != doneUntil {
AssertTrue(atomic.CompareAndSwapUint64(&w.doneUntil, doneUntil, until))
w.elog.Printf("%s: Done until %d. Loops: %d\n", w.Name, until, loops)
}
notifyAndRemove := func(idx uint64, toNotify []chan struct{}) {
for _, ch := range toNotify {
close(ch)
}
delete(waiters, idx) // Release the memory back.
}
if until-doneUntil <= uint64(len(waiters)) {
// Issue #908 showed that if doneUntil is close to 2^60, while until is zero, this loop
// can hog up CPU just iterating over integers creating a busy-wait loop. So, only do
// this path if until - doneUntil is less than the number of waiters.
for idx := doneUntil + 1; idx <= until; idx++ {
if toNotify, ok := waiters[idx]; ok {
notifyAndRemove(idx, toNotify)
}
}
} else {
for idx, toNotify := range waiters {
if idx <= until {
notifyAndRemove(idx, toNotify)
}
}
} // end of notifying waiters.
}
for {
select {
case <-closer.HasBeenClosed():
return
case mark := <-w.markCh:
if mark.waiter != nil {
doneUntil := atomic.LoadUint64(&w.doneUntil)
if doneUntil >= mark.index {
close(mark.waiter)
} else {
ws, ok := waiters[mark.index]
if !ok {
waiters[mark.index] = []chan struct{}{mark.waiter}
} else {
waiters[mark.index] = append(ws, mark.waiter)
}
}
} else {
if mark.index > 0 {
processOne(mark.index, mark.done)
}
for _, index := range mark.indices {
processOne(index, mark.done)
}
}
}
}
}