255 lines
7.5 KiB
Go
255 lines
7.5 KiB
Go
/*
|
|
* Copyright 2016-2018 Dgraph Labs, Inc. and Contributors
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package y
|
|
|
|
import (
|
|
"container/heap"
|
|
"context"
|
|
"sync/atomic"
|
|
|
|
"golang.org/x/net/trace"
|
|
)
|
|
|
|
type uint64Heap []uint64
|
|
|
|
func (u uint64Heap) Len() int { return len(u) }
|
|
func (u uint64Heap) Less(i, j int) bool { return u[i] < u[j] }
|
|
func (u uint64Heap) Swap(i, j int) { u[i], u[j] = u[j], u[i] }
|
|
func (u *uint64Heap) Push(x interface{}) { *u = append(*u, x.(uint64)) }
|
|
func (u *uint64Heap) Pop() interface{} {
|
|
old := *u
|
|
n := len(old)
|
|
x := old[n-1]
|
|
*u = old[0 : n-1]
|
|
return x
|
|
}
|
|
|
|
// mark contains one of more indices, along with a done boolean to indicate the
|
|
// status of the index: begin or done. It also contains waiters, who could be
|
|
// waiting for the watermark to reach >= a certain index.
|
|
type mark struct {
|
|
// Either this is an (index, waiter) pair or (index, done) or (indices, done).
|
|
index uint64
|
|
waiter chan struct{}
|
|
indices []uint64
|
|
done bool // Set to true if the index is done.
|
|
}
|
|
|
|
// WaterMark is used to keep track of the minimum un-finished index. Typically, an index k becomes
|
|
// finished or "done" according to a WaterMark once Done(k) has been called
|
|
// 1. as many times as Begin(k) has, AND
|
|
// 2. a positive number of times.
|
|
//
|
|
// An index may also become "done" by calling SetDoneUntil at a time such that it is not
|
|
// inter-mingled with Begin/Done calls.
|
|
//
|
|
// Since doneUntil and lastIndex addresses are passed to sync/atomic packages, we ensure that they
|
|
// are 64-bit aligned by putting them at the beginning of the structure.
|
|
type WaterMark struct {
|
|
doneUntil uint64
|
|
lastIndex uint64
|
|
Name string
|
|
markCh chan mark
|
|
elog trace.EventLog
|
|
}
|
|
|
|
// Init initializes a WaterMark struct. MUST be called before using it.
|
|
func (w *WaterMark) Init(closer *Closer, eventLogging bool) {
|
|
w.markCh = make(chan mark, 100)
|
|
if eventLogging {
|
|
w.elog = trace.NewEventLog("Watermark", w.Name)
|
|
} else {
|
|
w.elog = NoEventLog
|
|
}
|
|
go w.process(closer)
|
|
}
|
|
|
|
// Begin sets the last index to the given value.
|
|
func (w *WaterMark) Begin(index uint64) {
|
|
atomic.StoreUint64(&w.lastIndex, index)
|
|
w.markCh <- mark{index: index, done: false}
|
|
}
|
|
|
|
// BeginMany works like Begin but accepts multiple indices.
|
|
func (w *WaterMark) BeginMany(indices []uint64) {
|
|
atomic.StoreUint64(&w.lastIndex, indices[len(indices)-1])
|
|
w.markCh <- mark{index: 0, indices: indices, done: false}
|
|
}
|
|
|
|
// Done sets a single index as done.
|
|
func (w *WaterMark) Done(index uint64) {
|
|
w.markCh <- mark{index: index, done: true}
|
|
}
|
|
|
|
// DoneMany works like Done but accepts multiple indices.
|
|
func (w *WaterMark) DoneMany(indices []uint64) {
|
|
w.markCh <- mark{index: 0, indices: indices, done: true}
|
|
}
|
|
|
|
// DoneUntil returns the maximum index that has the property that all indices
|
|
// less than or equal to it are done.
|
|
func (w *WaterMark) DoneUntil() uint64 {
|
|
return atomic.LoadUint64(&w.doneUntil)
|
|
}
|
|
|
|
// SetDoneUntil sets the maximum index that has the property that all indices
|
|
// less than or equal to it are done.
|
|
func (w *WaterMark) SetDoneUntil(val uint64) {
|
|
atomic.StoreUint64(&w.doneUntil, val)
|
|
}
|
|
|
|
// LastIndex returns the last index for which Begin has been called.
|
|
func (w *WaterMark) LastIndex() uint64 {
|
|
return atomic.LoadUint64(&w.lastIndex)
|
|
}
|
|
|
|
// WaitForMark waits until the given index is marked as done.
|
|
func (w *WaterMark) WaitForMark(ctx context.Context, index uint64) error {
|
|
if w.DoneUntil() >= index {
|
|
return nil
|
|
}
|
|
waitCh := make(chan struct{})
|
|
w.markCh <- mark{index: index, waiter: waitCh}
|
|
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case <-waitCh:
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// process is used to process the Mark channel. This is not thread-safe,
|
|
// so only run one goroutine for process. One is sufficient, because
|
|
// all goroutine ops use purely memory and cpu.
|
|
// Each index has to emit atleast one begin watermark in serial order otherwise waiters
|
|
// can get blocked idefinitely. Example: We had an watermark at 100 and a waiter at 101,
|
|
// if no watermark is emitted at index 101 then waiter would get stuck indefinitely as it
|
|
// can't decide whether the task at 101 has decided not to emit watermark or it didn't get
|
|
// scheduled yet.
|
|
func (w *WaterMark) process(closer *Closer) {
|
|
defer closer.Done()
|
|
|
|
var indices uint64Heap
|
|
// pending maps raft proposal index to the number of pending mutations for this proposal.
|
|
pending := make(map[uint64]int)
|
|
waiters := make(map[uint64][]chan struct{})
|
|
|
|
heap.Init(&indices)
|
|
var loop uint64
|
|
|
|
processOne := func(index uint64, done bool) {
|
|
// If not already done, then set. Otherwise, don't undo a done entry.
|
|
prev, present := pending[index]
|
|
if !present {
|
|
heap.Push(&indices, index)
|
|
}
|
|
|
|
delta := 1
|
|
if done {
|
|
delta = -1
|
|
}
|
|
pending[index] = prev + delta
|
|
|
|
loop++
|
|
if len(indices) > 0 && loop%10000 == 0 {
|
|
min := indices[0]
|
|
w.elog.Printf("WaterMark %s: Done entry %4d. Size: %4d Watermark: %-4d Looking for: "+
|
|
"%-4d. Value: %d\n", w.Name, index, len(indices), w.DoneUntil(), min, pending[min])
|
|
}
|
|
|
|
// Update mark by going through all indices in order; and checking if they have
|
|
// been done. Stop at the first index, which isn't done.
|
|
doneUntil := w.DoneUntil()
|
|
if doneUntil > index {
|
|
AssertTruef(false, "Name: %s doneUntil: %d. Index: %d", w.Name, doneUntil, index)
|
|
}
|
|
|
|
until := doneUntil
|
|
loops := 0
|
|
|
|
for len(indices) > 0 {
|
|
min := indices[0]
|
|
if done := pending[min]; done > 0 {
|
|
break // len(indices) will be > 0.
|
|
}
|
|
// Even if done is called multiple times causing it to become
|
|
// negative, we should still pop the index.
|
|
heap.Pop(&indices)
|
|
delete(pending, min)
|
|
until = min
|
|
loops++
|
|
}
|
|
|
|
if until != doneUntil {
|
|
AssertTrue(atomic.CompareAndSwapUint64(&w.doneUntil, doneUntil, until))
|
|
w.elog.Printf("%s: Done until %d. Loops: %d\n", w.Name, until, loops)
|
|
}
|
|
|
|
notifyAndRemove := func(idx uint64, toNotify []chan struct{}) {
|
|
for _, ch := range toNotify {
|
|
close(ch)
|
|
}
|
|
delete(waiters, idx) // Release the memory back.
|
|
}
|
|
|
|
if until-doneUntil <= uint64(len(waiters)) {
|
|
// Issue #908 showed that if doneUntil is close to 2^60, while until is zero, this loop
|
|
// can hog up CPU just iterating over integers creating a busy-wait loop. So, only do
|
|
// this path if until - doneUntil is less than the number of waiters.
|
|
for idx := doneUntil + 1; idx <= until; idx++ {
|
|
if toNotify, ok := waiters[idx]; ok {
|
|
notifyAndRemove(idx, toNotify)
|
|
}
|
|
}
|
|
} else {
|
|
for idx, toNotify := range waiters {
|
|
if idx <= until {
|
|
notifyAndRemove(idx, toNotify)
|
|
}
|
|
}
|
|
} // end of notifying waiters.
|
|
}
|
|
|
|
for {
|
|
select {
|
|
case <-closer.HasBeenClosed():
|
|
return
|
|
case mark := <-w.markCh:
|
|
if mark.waiter != nil {
|
|
doneUntil := atomic.LoadUint64(&w.doneUntil)
|
|
if doneUntil >= mark.index {
|
|
close(mark.waiter)
|
|
} else {
|
|
ws, ok := waiters[mark.index]
|
|
if !ok {
|
|
waiters[mark.index] = []chan struct{}{mark.waiter}
|
|
} else {
|
|
waiters[mark.index] = append(ws, mark.waiter)
|
|
}
|
|
}
|
|
} else {
|
|
if mark.index > 0 {
|
|
processOne(mark.index, mark.done)
|
|
}
|
|
for _, index := range mark.indices {
|
|
processOne(index, mark.done)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|