Add pushover notifications, this should be a super basic MVP
This commit is contained in:
parent
ed13a5994f
commit
d9917ab8b0
505 changed files with 195741 additions and 9 deletions
vendor/github.com/dgraph-io/badger/table
69
vendor/github.com/dgraph-io/badger/table/README.md
generated
vendored
Normal file
69
vendor/github.com/dgraph-io/badger/table/README.md
generated
vendored
Normal file
|
@ -0,0 +1,69 @@
|
|||
Size of table is 122,173,606 bytes for all benchmarks.
|
||||
|
||||
# BenchmarkRead
|
||||
```
|
||||
$ go test -bench ^BenchmarkRead$ -run ^$ -count 3
|
||||
goos: linux
|
||||
goarch: amd64
|
||||
pkg: github.com/dgraph-io/badger/table
|
||||
BenchmarkRead-16 10 153281932 ns/op
|
||||
BenchmarkRead-16 10 153454443 ns/op
|
||||
BenchmarkRead-16 10 155349696 ns/op
|
||||
PASS
|
||||
ok github.com/dgraph-io/badger/table 23.549s
|
||||
```
|
||||
|
||||
Size of table is 122,173,606 bytes, which is ~117MB.
|
||||
|
||||
The rate is ~750MB/s using LoadToRAM (when table is in RAM).
|
||||
|
||||
To read a 64MB table, this would take ~0.0853s, which is negligible.
|
||||
|
||||
# BenchmarkReadAndBuild
|
||||
```go
|
||||
$ go test -bench BenchmarkReadAndBuild -run ^$ -count 3
|
||||
goos: linux
|
||||
goarch: amd64
|
||||
pkg: github.com/dgraph-io/badger/table
|
||||
BenchmarkReadAndBuild-16 2 945041628 ns/op
|
||||
BenchmarkReadAndBuild-16 2 947120893 ns/op
|
||||
BenchmarkReadAndBuild-16 2 954909506 ns/op
|
||||
PASS
|
||||
ok github.com/dgraph-io/badger/table 26.856s
|
||||
```
|
||||
|
||||
The rate is ~122MB/s. To build a 64MB table, this would take ~0.52s. Note that this
|
||||
does NOT include the flushing of the table to disk. All we are doing above is
|
||||
reading one table (which is in RAM) and write one table in memory.
|
||||
|
||||
The table building takes 0.52-0.0853s ~ 0.4347s.
|
||||
|
||||
# BenchmarkReadMerged
|
||||
Below, we merge 5 tables. The total size remains unchanged at ~122M.
|
||||
|
||||
```go
|
||||
$ go test -bench ReadMerged -run ^$ -count 3
|
||||
BenchmarkReadMerged-16 2 954475788 ns/op
|
||||
BenchmarkReadMerged-16 2 955252462 ns/op
|
||||
BenchmarkReadMerged-16 2 956857353 ns/op
|
||||
PASS
|
||||
ok github.com/dgraph-io/badger/table 33.327s
|
||||
```
|
||||
|
||||
The rate is ~122MB/s. To read a 64MB table using merge iterator, this would take ~0.52s.
|
||||
|
||||
# BenchmarkRandomRead
|
||||
|
||||
```go
|
||||
go test -bench BenchmarkRandomRead$ -run ^$ -count 3
|
||||
goos: linux
|
||||
goarch: amd64
|
||||
pkg: github.com/dgraph-io/badger/table
|
||||
BenchmarkRandomRead-16 300000 3596 ns/op
|
||||
BenchmarkRandomRead-16 300000 3621 ns/op
|
||||
BenchmarkRandomRead-16 300000 3596 ns/op
|
||||
PASS
|
||||
ok github.com/dgraph-io/badger/table 44.727s
|
||||
```
|
||||
|
||||
For random read benchmarking, we are randomly reading a key and verifying its value.
|
236
vendor/github.com/dgraph-io/badger/table/builder.go
generated
vendored
Normal file
236
vendor/github.com/dgraph-io/badger/table/builder.go
generated
vendored
Normal file
|
@ -0,0 +1,236 @@
|
|||
/*
|
||||
* Copyright 2017 Dgraph Labs, Inc. and Contributors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package table
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"io"
|
||||
"math"
|
||||
|
||||
"github.com/AndreasBriese/bbloom"
|
||||
"github.com/dgraph-io/badger/y"
|
||||
)
|
||||
|
||||
var (
|
||||
restartInterval = 100 // Might want to change this to be based on total size instead of numKeys.
|
||||
)
|
||||
|
||||
func newBuffer(sz int) *bytes.Buffer {
|
||||
b := new(bytes.Buffer)
|
||||
b.Grow(sz)
|
||||
return b
|
||||
}
|
||||
|
||||
type header struct {
|
||||
plen uint16 // Overlap with base key.
|
||||
klen uint16 // Length of the diff.
|
||||
vlen uint16 // Length of value.
|
||||
prev uint32 // Offset for the previous key-value pair. The offset is relative to block base offset.
|
||||
}
|
||||
|
||||
// Encode encodes the header.
|
||||
func (h header) Encode(b []byte) {
|
||||
binary.BigEndian.PutUint16(b[0:2], h.plen)
|
||||
binary.BigEndian.PutUint16(b[2:4], h.klen)
|
||||
binary.BigEndian.PutUint16(b[4:6], h.vlen)
|
||||
binary.BigEndian.PutUint32(b[6:10], h.prev)
|
||||
}
|
||||
|
||||
// Decode decodes the header.
|
||||
func (h *header) Decode(buf []byte) int {
|
||||
h.plen = binary.BigEndian.Uint16(buf[0:2])
|
||||
h.klen = binary.BigEndian.Uint16(buf[2:4])
|
||||
h.vlen = binary.BigEndian.Uint16(buf[4:6])
|
||||
h.prev = binary.BigEndian.Uint32(buf[6:10])
|
||||
return h.Size()
|
||||
}
|
||||
|
||||
// Size returns size of the header. Currently it's just a constant.
|
||||
func (h header) Size() int { return 10 }
|
||||
|
||||
// Builder is used in building a table.
|
||||
type Builder struct {
|
||||
counter int // Number of keys written for the current block.
|
||||
|
||||
// Typically tens or hundreds of meg. This is for one single file.
|
||||
buf *bytes.Buffer
|
||||
|
||||
baseKey []byte // Base key for the current block.
|
||||
baseOffset uint32 // Offset for the current block.
|
||||
|
||||
restarts []uint32 // Base offsets of every block.
|
||||
|
||||
// Tracks offset for the previous key-value pair. Offset is relative to block base offset.
|
||||
prevOffset uint32
|
||||
|
||||
keyBuf *bytes.Buffer
|
||||
keyCount int
|
||||
}
|
||||
|
||||
// NewTableBuilder makes a new TableBuilder.
|
||||
func NewTableBuilder() *Builder {
|
||||
return &Builder{
|
||||
keyBuf: newBuffer(1 << 20),
|
||||
buf: newBuffer(1 << 20),
|
||||
prevOffset: math.MaxUint32, // Used for the first element!
|
||||
}
|
||||
}
|
||||
|
||||
// Close closes the TableBuilder.
|
||||
func (b *Builder) Close() {}
|
||||
|
||||
// Empty returns whether it's empty.
|
||||
func (b *Builder) Empty() bool { return b.buf.Len() == 0 }
|
||||
|
||||
// keyDiff returns a suffix of newKey that is different from b.baseKey.
|
||||
func (b Builder) keyDiff(newKey []byte) []byte {
|
||||
var i int
|
||||
for i = 0; i < len(newKey) && i < len(b.baseKey); i++ {
|
||||
if newKey[i] != b.baseKey[i] {
|
||||
break
|
||||
}
|
||||
}
|
||||
return newKey[i:]
|
||||
}
|
||||
|
||||
func (b *Builder) addHelper(key []byte, v y.ValueStruct) {
|
||||
// Add key to bloom filter.
|
||||
if len(key) > 0 {
|
||||
var klen [2]byte
|
||||
keyNoTs := y.ParseKey(key)
|
||||
binary.BigEndian.PutUint16(klen[:], uint16(len(keyNoTs)))
|
||||
b.keyBuf.Write(klen[:])
|
||||
b.keyBuf.Write(keyNoTs)
|
||||
b.keyCount++
|
||||
}
|
||||
|
||||
// diffKey stores the difference of key with baseKey.
|
||||
var diffKey []byte
|
||||
if len(b.baseKey) == 0 {
|
||||
// Make a copy. Builder should not keep references. Otherwise, caller has to be very careful
|
||||
// and will have to make copies of keys every time they add to builder, which is even worse.
|
||||
b.baseKey = append(b.baseKey[:0], key...)
|
||||
diffKey = key
|
||||
} else {
|
||||
diffKey = b.keyDiff(key)
|
||||
}
|
||||
|
||||
h := header{
|
||||
plen: uint16(len(key) - len(diffKey)),
|
||||
klen: uint16(len(diffKey)),
|
||||
vlen: uint16(v.EncodedSize()),
|
||||
prev: b.prevOffset, // prevOffset is the location of the last key-value added.
|
||||
}
|
||||
b.prevOffset = uint32(b.buf.Len()) - b.baseOffset // Remember current offset for the next Add call.
|
||||
|
||||
// Layout: header, diffKey, value.
|
||||
var hbuf [10]byte
|
||||
h.Encode(hbuf[:])
|
||||
b.buf.Write(hbuf[:])
|
||||
b.buf.Write(diffKey) // We only need to store the key difference.
|
||||
|
||||
v.EncodeTo(b.buf)
|
||||
b.counter++ // Increment number of keys added for this current block.
|
||||
}
|
||||
|
||||
func (b *Builder) finishBlock() {
|
||||
// When we are at the end of the block and Valid=false, and the user wants to do a Prev,
|
||||
// we need a dummy header to tell us the offset of the previous key-value pair.
|
||||
b.addHelper([]byte{}, y.ValueStruct{})
|
||||
}
|
||||
|
||||
// Add adds a key-value pair to the block.
|
||||
// If doNotRestart is true, we will not restart even if b.counter >= restartInterval.
|
||||
func (b *Builder) Add(key []byte, value y.ValueStruct) {
|
||||
if b.counter >= restartInterval {
|
||||
b.finishBlock()
|
||||
// Start a new block. Initialize the block.
|
||||
b.restarts = append(b.restarts, uint32(b.buf.Len()))
|
||||
b.counter = 0
|
||||
b.baseKey = []byte{}
|
||||
b.baseOffset = uint32(b.buf.Len())
|
||||
b.prevOffset = math.MaxUint32 // First key-value pair of block has header.prev=MaxInt.
|
||||
}
|
||||
b.addHelper(key, value)
|
||||
}
|
||||
|
||||
// TODO: vvv this was the comment on ReachedCapacity.
|
||||
// FinalSize returns the *rough* final size of the array, counting the header which is
|
||||
// not yet written.
|
||||
// TODO: Look into why there is a discrepancy. I suspect it is because of Write(empty, empty)
|
||||
// at the end. The diff can vary.
|
||||
|
||||
// ReachedCapacity returns true if we... roughly (?) reached capacity?
|
||||
func (b *Builder) ReachedCapacity(cap int64) bool {
|
||||
estimateSz := b.buf.Len() + 8 /* empty header */ + 4*len(b.restarts) +
|
||||
8 /* 8 = end of buf offset + len(restarts) */
|
||||
return int64(estimateSz) > cap
|
||||
}
|
||||
|
||||
// blockIndex generates the block index for the table.
|
||||
// It is mainly a list of all the block base offsets.
|
||||
func (b *Builder) blockIndex() []byte {
|
||||
// Store the end offset, so we know the length of the final block.
|
||||
b.restarts = append(b.restarts, uint32(b.buf.Len()))
|
||||
|
||||
// Add 4 because we want to write out number of restarts at the end.
|
||||
sz := 4*len(b.restarts) + 4
|
||||
out := make([]byte, sz)
|
||||
buf := out
|
||||
for _, r := range b.restarts {
|
||||
binary.BigEndian.PutUint32(buf[:4], r)
|
||||
buf = buf[4:]
|
||||
}
|
||||
binary.BigEndian.PutUint32(buf[:4], uint32(len(b.restarts)))
|
||||
return out
|
||||
}
|
||||
|
||||
// Finish finishes the table by appending the index.
|
||||
func (b *Builder) Finish() []byte {
|
||||
bf := bbloom.New(float64(b.keyCount), 0.01)
|
||||
var klen [2]byte
|
||||
key := make([]byte, 1024)
|
||||
for {
|
||||
if _, err := b.keyBuf.Read(klen[:]); err == io.EOF {
|
||||
break
|
||||
} else if err != nil {
|
||||
y.Check(err)
|
||||
}
|
||||
kl := int(binary.BigEndian.Uint16(klen[:]))
|
||||
if cap(key) < kl {
|
||||
key = make([]byte, 2*int(kl)) // 2 * uint16 will overflow
|
||||
}
|
||||
key = key[:kl]
|
||||
y.Check2(b.keyBuf.Read(key))
|
||||
bf.Add(key)
|
||||
}
|
||||
|
||||
b.finishBlock() // This will never start a new block.
|
||||
index := b.blockIndex()
|
||||
b.buf.Write(index)
|
||||
|
||||
// Write bloom filter.
|
||||
bdata := bf.JSONMarshal()
|
||||
n, err := b.buf.Write(bdata)
|
||||
y.Check(err)
|
||||
var buf [4]byte
|
||||
binary.BigEndian.PutUint32(buf[:], uint32(n))
|
||||
b.buf.Write(buf[:])
|
||||
|
||||
return b.buf.Bytes()
|
||||
}
|
557
vendor/github.com/dgraph-io/badger/table/iterator.go
generated
vendored
Normal file
557
vendor/github.com/dgraph-io/badger/table/iterator.go
generated
vendored
Normal file
|
@ -0,0 +1,557 @@
|
|||
/*
|
||||
* Copyright 2017 Dgraph Labs, Inc. and Contributors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package table
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"math"
|
||||
"sort"
|
||||
|
||||
"github.com/dgraph-io/badger/y"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
type blockIterator struct {
|
||||
data []byte
|
||||
pos uint32
|
||||
err error
|
||||
baseKey []byte
|
||||
|
||||
key []byte
|
||||
val []byte
|
||||
init bool
|
||||
|
||||
last header // The last header we saw.
|
||||
}
|
||||
|
||||
func (itr *blockIterator) Reset() {
|
||||
itr.pos = 0
|
||||
itr.err = nil
|
||||
itr.baseKey = []byte{}
|
||||
itr.key = []byte{}
|
||||
itr.val = []byte{}
|
||||
itr.init = false
|
||||
itr.last = header{}
|
||||
}
|
||||
|
||||
func (itr *blockIterator) Init() {
|
||||
if !itr.init {
|
||||
itr.Next()
|
||||
}
|
||||
}
|
||||
|
||||
func (itr *blockIterator) Valid() bool {
|
||||
return itr != nil && itr.err == nil
|
||||
}
|
||||
|
||||
func (itr *blockIterator) Error() error {
|
||||
return itr.err
|
||||
}
|
||||
|
||||
func (itr *blockIterator) Close() {}
|
||||
|
||||
var (
|
||||
origin = 0
|
||||
current = 1
|
||||
)
|
||||
|
||||
// Seek brings us to the first block element that is >= input key.
|
||||
func (itr *blockIterator) Seek(key []byte, whence int) {
|
||||
itr.err = nil
|
||||
|
||||
switch whence {
|
||||
case origin:
|
||||
itr.Reset()
|
||||
case current:
|
||||
}
|
||||
|
||||
var done bool
|
||||
for itr.Init(); itr.Valid(); itr.Next() {
|
||||
k := itr.Key()
|
||||
if y.CompareKeys(k, key) >= 0 {
|
||||
// We are done as k is >= key.
|
||||
done = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !done {
|
||||
itr.err = io.EOF
|
||||
}
|
||||
}
|
||||
|
||||
func (itr *blockIterator) SeekToFirst() {
|
||||
itr.err = nil
|
||||
itr.Init()
|
||||
}
|
||||
|
||||
// SeekToLast brings us to the last element. Valid should return true.
|
||||
func (itr *blockIterator) SeekToLast() {
|
||||
itr.err = nil
|
||||
for itr.Init(); itr.Valid(); itr.Next() {
|
||||
}
|
||||
itr.Prev()
|
||||
}
|
||||
|
||||
// parseKV would allocate a new byte slice for key and for value.
|
||||
func (itr *blockIterator) parseKV(h header) {
|
||||
if cap(itr.key) < int(h.plen+h.klen) {
|
||||
sz := int(h.plen) + int(h.klen) // Convert to int before adding to avoid uint16 overflow.
|
||||
itr.key = make([]byte, 2*sz)
|
||||
}
|
||||
itr.key = itr.key[:h.plen+h.klen]
|
||||
copy(itr.key, itr.baseKey[:h.plen])
|
||||
copy(itr.key[h.plen:], itr.data[itr.pos:itr.pos+uint32(h.klen)])
|
||||
itr.pos += uint32(h.klen)
|
||||
|
||||
if itr.pos+uint32(h.vlen) > uint32(len(itr.data)) {
|
||||
itr.err = errors.Errorf("Value exceeded size of block: %d %d %d %d %v",
|
||||
itr.pos, h.klen, h.vlen, len(itr.data), h)
|
||||
return
|
||||
}
|
||||
itr.val = y.SafeCopy(itr.val, itr.data[itr.pos:itr.pos+uint32(h.vlen)])
|
||||
itr.pos += uint32(h.vlen)
|
||||
}
|
||||
|
||||
func (itr *blockIterator) Next() {
|
||||
itr.init = true
|
||||
itr.err = nil
|
||||
if itr.pos >= uint32(len(itr.data)) {
|
||||
itr.err = io.EOF
|
||||
return
|
||||
}
|
||||
|
||||
var h header
|
||||
itr.pos += uint32(h.Decode(itr.data[itr.pos:]))
|
||||
itr.last = h // Store the last header.
|
||||
|
||||
if h.klen == 0 && h.plen == 0 {
|
||||
// Last entry in the table.
|
||||
itr.err = io.EOF
|
||||
return
|
||||
}
|
||||
|
||||
// Populate baseKey if it isn't set yet. This would only happen for the first Next.
|
||||
if len(itr.baseKey) == 0 {
|
||||
// This should be the first Next() for this block. Hence, prefix length should be zero.
|
||||
y.AssertTrue(h.plen == 0)
|
||||
itr.baseKey = itr.data[itr.pos : itr.pos+uint32(h.klen)]
|
||||
}
|
||||
itr.parseKV(h)
|
||||
}
|
||||
|
||||
func (itr *blockIterator) Prev() {
|
||||
if !itr.init {
|
||||
return
|
||||
}
|
||||
itr.err = nil
|
||||
if itr.last.prev == math.MaxUint32 {
|
||||
// This is the first element of the block!
|
||||
itr.err = io.EOF
|
||||
itr.pos = 0
|
||||
return
|
||||
}
|
||||
|
||||
// Move back using current header's prev.
|
||||
itr.pos = itr.last.prev
|
||||
|
||||
var h header
|
||||
y.AssertTruef(itr.pos < uint32(len(itr.data)), "%d %d", itr.pos, len(itr.data))
|
||||
itr.pos += uint32(h.Decode(itr.data[itr.pos:]))
|
||||
itr.parseKV(h)
|
||||
itr.last = h
|
||||
}
|
||||
|
||||
func (itr *blockIterator) Key() []byte {
|
||||
if itr.err != nil {
|
||||
return nil
|
||||
}
|
||||
return itr.key
|
||||
}
|
||||
|
||||
func (itr *blockIterator) Value() []byte {
|
||||
if itr.err != nil {
|
||||
return nil
|
||||
}
|
||||
return itr.val
|
||||
}
|
||||
|
||||
// Iterator is an iterator for a Table.
|
||||
type Iterator struct {
|
||||
t *Table
|
||||
bpos int
|
||||
bi *blockIterator
|
||||
err error
|
||||
|
||||
// Internally, Iterator is bidirectional. However, we only expose the
|
||||
// unidirectional functionality for now.
|
||||
reversed bool
|
||||
}
|
||||
|
||||
// NewIterator returns a new iterator of the Table
|
||||
func (t *Table) NewIterator(reversed bool) *Iterator {
|
||||
t.IncrRef() // Important.
|
||||
ti := &Iterator{t: t, reversed: reversed}
|
||||
ti.next()
|
||||
return ti
|
||||
}
|
||||
|
||||
// Close closes the iterator (and it must be called).
|
||||
func (itr *Iterator) Close() error {
|
||||
return itr.t.DecrRef()
|
||||
}
|
||||
|
||||
func (itr *Iterator) reset() {
|
||||
itr.bpos = 0
|
||||
itr.err = nil
|
||||
}
|
||||
|
||||
// Valid follows the y.Iterator interface
|
||||
func (itr *Iterator) Valid() bool {
|
||||
return itr.err == nil
|
||||
}
|
||||
|
||||
func (itr *Iterator) seekToFirst() {
|
||||
numBlocks := len(itr.t.blockIndex)
|
||||
if numBlocks == 0 {
|
||||
itr.err = io.EOF
|
||||
return
|
||||
}
|
||||
itr.bpos = 0
|
||||
block, err := itr.t.block(itr.bpos)
|
||||
if err != nil {
|
||||
itr.err = err
|
||||
return
|
||||
}
|
||||
itr.bi = block.NewIterator()
|
||||
itr.bi.SeekToFirst()
|
||||
itr.err = itr.bi.Error()
|
||||
}
|
||||
|
||||
func (itr *Iterator) seekToLast() {
|
||||
numBlocks := len(itr.t.blockIndex)
|
||||
if numBlocks == 0 {
|
||||
itr.err = io.EOF
|
||||
return
|
||||
}
|
||||
itr.bpos = numBlocks - 1
|
||||
block, err := itr.t.block(itr.bpos)
|
||||
if err != nil {
|
||||
itr.err = err
|
||||
return
|
||||
}
|
||||
itr.bi = block.NewIterator()
|
||||
itr.bi.SeekToLast()
|
||||
itr.err = itr.bi.Error()
|
||||
}
|
||||
|
||||
func (itr *Iterator) seekHelper(blockIdx int, key []byte) {
|
||||
itr.bpos = blockIdx
|
||||
block, err := itr.t.block(blockIdx)
|
||||
if err != nil {
|
||||
itr.err = err
|
||||
return
|
||||
}
|
||||
itr.bi = block.NewIterator()
|
||||
itr.bi.Seek(key, origin)
|
||||
itr.err = itr.bi.Error()
|
||||
}
|
||||
|
||||
// seekFrom brings us to a key that is >= input key.
|
||||
func (itr *Iterator) seekFrom(key []byte, whence int) {
|
||||
itr.err = nil
|
||||
switch whence {
|
||||
case origin:
|
||||
itr.reset()
|
||||
case current:
|
||||
}
|
||||
|
||||
idx := sort.Search(len(itr.t.blockIndex), func(idx int) bool {
|
||||
ko := itr.t.blockIndex[idx]
|
||||
return y.CompareKeys(ko.key, key) > 0
|
||||
})
|
||||
if idx == 0 {
|
||||
// The smallest key in our table is already strictly > key. We can return that.
|
||||
// This is like a SeekToFirst.
|
||||
itr.seekHelper(0, key)
|
||||
return
|
||||
}
|
||||
|
||||
// block[idx].smallest is > key.
|
||||
// Since idx>0, we know block[idx-1].smallest is <= key.
|
||||
// There are two cases.
|
||||
// 1) Everything in block[idx-1] is strictly < key. In this case, we should go to the first
|
||||
// element of block[idx].
|
||||
// 2) Some element in block[idx-1] is >= key. We should go to that element.
|
||||
itr.seekHelper(idx-1, key)
|
||||
if itr.err == io.EOF {
|
||||
// Case 1. Need to visit block[idx].
|
||||
if idx == len(itr.t.blockIndex) {
|
||||
// If idx == len(itr.t.blockIndex), then input key is greater than ANY element of table.
|
||||
// There's nothing we can do. Valid() should return false as we seek to end of table.
|
||||
return
|
||||
}
|
||||
// Since block[idx].smallest is > key. This is essentially a block[idx].SeekToFirst.
|
||||
itr.seekHelper(idx, key)
|
||||
}
|
||||
// Case 2: No need to do anything. We already did the seek in block[idx-1].
|
||||
}
|
||||
|
||||
// seek will reset iterator and seek to >= key.
|
||||
func (itr *Iterator) seek(key []byte) {
|
||||
itr.seekFrom(key, origin)
|
||||
}
|
||||
|
||||
// seekForPrev will reset iterator and seek to <= key.
|
||||
func (itr *Iterator) seekForPrev(key []byte) {
|
||||
// TODO: Optimize this. We shouldn't have to take a Prev step.
|
||||
itr.seekFrom(key, origin)
|
||||
if !bytes.Equal(itr.Key(), key) {
|
||||
itr.prev()
|
||||
}
|
||||
}
|
||||
|
||||
func (itr *Iterator) next() {
|
||||
itr.err = nil
|
||||
|
||||
if itr.bpos >= len(itr.t.blockIndex) {
|
||||
itr.err = io.EOF
|
||||
return
|
||||
}
|
||||
|
||||
if itr.bi == nil {
|
||||
block, err := itr.t.block(itr.bpos)
|
||||
if err != nil {
|
||||
itr.err = err
|
||||
return
|
||||
}
|
||||
itr.bi = block.NewIterator()
|
||||
itr.bi.SeekToFirst()
|
||||
itr.err = itr.bi.Error()
|
||||
return
|
||||
}
|
||||
|
||||
itr.bi.Next()
|
||||
if !itr.bi.Valid() {
|
||||
itr.bpos++
|
||||
itr.bi = nil
|
||||
itr.next()
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func (itr *Iterator) prev() {
|
||||
itr.err = nil
|
||||
if itr.bpos < 0 {
|
||||
itr.err = io.EOF
|
||||
return
|
||||
}
|
||||
|
||||
if itr.bi == nil {
|
||||
block, err := itr.t.block(itr.bpos)
|
||||
if err != nil {
|
||||
itr.err = err
|
||||
return
|
||||
}
|
||||
itr.bi = block.NewIterator()
|
||||
itr.bi.SeekToLast()
|
||||
itr.err = itr.bi.Error()
|
||||
return
|
||||
}
|
||||
|
||||
itr.bi.Prev()
|
||||
if !itr.bi.Valid() {
|
||||
itr.bpos--
|
||||
itr.bi = nil
|
||||
itr.prev()
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Key follows the y.Iterator interface.
|
||||
// Returns the key with timestamp.
|
||||
func (itr *Iterator) Key() []byte {
|
||||
return itr.bi.Key()
|
||||
}
|
||||
|
||||
// Value follows the y.Iterator interface
|
||||
func (itr *Iterator) Value() (ret y.ValueStruct) {
|
||||
ret.Decode(itr.bi.Value())
|
||||
return
|
||||
}
|
||||
|
||||
// Next follows the y.Iterator interface
|
||||
func (itr *Iterator) Next() {
|
||||
if !itr.reversed {
|
||||
itr.next()
|
||||
} else {
|
||||
itr.prev()
|
||||
}
|
||||
}
|
||||
|
||||
// Rewind follows the y.Iterator interface
|
||||
func (itr *Iterator) Rewind() {
|
||||
if !itr.reversed {
|
||||
itr.seekToFirst()
|
||||
} else {
|
||||
itr.seekToLast()
|
||||
}
|
||||
}
|
||||
|
||||
// Seek follows the y.Iterator interface
|
||||
func (itr *Iterator) Seek(key []byte) {
|
||||
if !itr.reversed {
|
||||
itr.seek(key)
|
||||
} else {
|
||||
itr.seekForPrev(key)
|
||||
}
|
||||
}
|
||||
|
||||
// ConcatIterator concatenates the sequences defined by several iterators. (It only works with
|
||||
// TableIterators, probably just because it's faster to not be so generic.)
|
||||
type ConcatIterator struct {
|
||||
idx int // Which iterator is active now.
|
||||
cur *Iterator
|
||||
iters []*Iterator // Corresponds to tables.
|
||||
tables []*Table // Disregarding reversed, this is in ascending order.
|
||||
reversed bool
|
||||
}
|
||||
|
||||
// NewConcatIterator creates a new concatenated iterator
|
||||
func NewConcatIterator(tbls []*Table, reversed bool) *ConcatIterator {
|
||||
iters := make([]*Iterator, len(tbls))
|
||||
for i := 0; i < len(tbls); i++ {
|
||||
// Increment the reference count. Since, we're not creating the iterator right now.
|
||||
// Here, We'll hold the reference of the tables, till the lifecycle of the iterator.
|
||||
tbls[i].IncrRef()
|
||||
|
||||
// Save cycles by not initializing the iterators until needed.
|
||||
// iters[i] = tbls[i].NewIterator(reversed)
|
||||
}
|
||||
return &ConcatIterator{
|
||||
reversed: reversed,
|
||||
iters: iters,
|
||||
tables: tbls,
|
||||
idx: -1, // Not really necessary because s.it.Valid()=false, but good to have.
|
||||
}
|
||||
}
|
||||
|
||||
func (s *ConcatIterator) setIdx(idx int) {
|
||||
s.idx = idx
|
||||
if idx < 0 || idx >= len(s.iters) {
|
||||
s.cur = nil
|
||||
return
|
||||
}
|
||||
if s.iters[idx] == nil {
|
||||
s.iters[idx] = s.tables[idx].NewIterator(s.reversed)
|
||||
}
|
||||
s.cur = s.iters[s.idx]
|
||||
}
|
||||
|
||||
// Rewind implements y.Interface
|
||||
func (s *ConcatIterator) Rewind() {
|
||||
if len(s.iters) == 0 {
|
||||
return
|
||||
}
|
||||
if !s.reversed {
|
||||
s.setIdx(0)
|
||||
} else {
|
||||
s.setIdx(len(s.iters) - 1)
|
||||
}
|
||||
s.cur.Rewind()
|
||||
}
|
||||
|
||||
// Valid implements y.Interface
|
||||
func (s *ConcatIterator) Valid() bool {
|
||||
return s.cur != nil && s.cur.Valid()
|
||||
}
|
||||
|
||||
// Key implements y.Interface
|
||||
func (s *ConcatIterator) Key() []byte {
|
||||
return s.cur.Key()
|
||||
}
|
||||
|
||||
// Value implements y.Interface
|
||||
func (s *ConcatIterator) Value() y.ValueStruct {
|
||||
return s.cur.Value()
|
||||
}
|
||||
|
||||
// Seek brings us to element >= key if reversed is false. Otherwise, <= key.
|
||||
func (s *ConcatIterator) Seek(key []byte) {
|
||||
var idx int
|
||||
if !s.reversed {
|
||||
idx = sort.Search(len(s.tables), func(i int) bool {
|
||||
return y.CompareKeys(s.tables[i].Biggest(), key) >= 0
|
||||
})
|
||||
} else {
|
||||
n := len(s.tables)
|
||||
idx = n - 1 - sort.Search(n, func(i int) bool {
|
||||
return y.CompareKeys(s.tables[n-1-i].Smallest(), key) <= 0
|
||||
})
|
||||
}
|
||||
if idx >= len(s.tables) || idx < 0 {
|
||||
s.setIdx(-1)
|
||||
return
|
||||
}
|
||||
// For reversed=false, we know s.tables[i-1].Biggest() < key. Thus, the
|
||||
// previous table cannot possibly contain key.
|
||||
s.setIdx(idx)
|
||||
s.cur.Seek(key)
|
||||
}
|
||||
|
||||
// Next advances our concat iterator.
|
||||
func (s *ConcatIterator) Next() {
|
||||
s.cur.Next()
|
||||
if s.cur.Valid() {
|
||||
// Nothing to do. Just stay with the current table.
|
||||
return
|
||||
}
|
||||
for { // In case there are empty tables.
|
||||
if !s.reversed {
|
||||
s.setIdx(s.idx + 1)
|
||||
} else {
|
||||
s.setIdx(s.idx - 1)
|
||||
}
|
||||
if s.cur == nil {
|
||||
// End of list. Valid will become false.
|
||||
return
|
||||
}
|
||||
s.cur.Rewind()
|
||||
if s.cur.Valid() {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Close implements y.Interface.
|
||||
func (s *ConcatIterator) Close() error {
|
||||
for _, t := range s.tables {
|
||||
// DeReference the tables while closing the iterator.
|
||||
if err := t.DecrRef(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, it := range s.iters {
|
||||
if it == nil {
|
||||
continue
|
||||
}
|
||||
if err := it.Close(); err != nil {
|
||||
return errors.Wrap(err, "ConcatIterator")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
229
vendor/github.com/dgraph-io/badger/table/merge_iterator.go
generated
vendored
Normal file
229
vendor/github.com/dgraph-io/badger/table/merge_iterator.go
generated
vendored
Normal file
|
@ -0,0 +1,229 @@
|
|||
/*
|
||||
* Copyright 2019 Dgraph Labs, Inc. and Contributors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package table
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
"github.com/dgraph-io/badger/y"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// MergeIterator merges multiple iterators.
|
||||
// NOTE: MergeIterator owns the array of iterators and is responsible for closing them.
|
||||
type MergeIterator struct {
|
||||
left node
|
||||
right node
|
||||
small *node
|
||||
|
||||
curKey []byte
|
||||
reverse bool
|
||||
}
|
||||
|
||||
type node struct {
|
||||
valid bool
|
||||
key []byte
|
||||
iter y.Iterator
|
||||
|
||||
// The two iterators are type asserted from `y.Iterator`, used to inline more function calls.
|
||||
// Calling functions on concrete types is much faster (about 25-30%) than calling the
|
||||
// interface's function.
|
||||
merge *MergeIterator
|
||||
concat *ConcatIterator
|
||||
}
|
||||
|
||||
func (n *node) setIterator(iter y.Iterator) {
|
||||
n.iter = iter
|
||||
// It's okay if the type assertion below fails and n.merge/n.concat are set to nil.
|
||||
// We handle the nil values of merge and concat in all the methods.
|
||||
n.merge, _ = iter.(*MergeIterator)
|
||||
n.concat, _ = iter.(*ConcatIterator)
|
||||
}
|
||||
|
||||
func (n *node) setKey() {
|
||||
if n.merge != nil {
|
||||
n.valid = n.merge.small.valid
|
||||
if n.valid {
|
||||
n.key = n.merge.small.key
|
||||
}
|
||||
} else if n.concat != nil {
|
||||
n.valid = n.concat.Valid()
|
||||
if n.valid {
|
||||
n.key = n.concat.Key()
|
||||
}
|
||||
} else {
|
||||
n.valid = n.iter.Valid()
|
||||
if n.valid {
|
||||
n.key = n.iter.Key()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (n *node) next() {
|
||||
if n.merge != nil {
|
||||
n.merge.Next()
|
||||
} else if n.concat != nil {
|
||||
n.concat.Next()
|
||||
} else {
|
||||
n.iter.Next()
|
||||
}
|
||||
n.setKey()
|
||||
}
|
||||
|
||||
func (n *node) rewind() {
|
||||
n.iter.Rewind()
|
||||
n.setKey()
|
||||
}
|
||||
|
||||
func (n *node) seek(key []byte) {
|
||||
n.iter.Seek(key)
|
||||
n.setKey()
|
||||
}
|
||||
|
||||
func (mi *MergeIterator) fix() {
|
||||
if !mi.bigger().valid {
|
||||
return
|
||||
}
|
||||
if !mi.small.valid {
|
||||
mi.swapSmall()
|
||||
return
|
||||
}
|
||||
cmp := y.CompareKeys(mi.small.key, mi.bigger().key)
|
||||
// Both the keys are equal.
|
||||
if cmp == 0 {
|
||||
// In case of same keys, move the right iterator ahead.
|
||||
mi.right.next()
|
||||
if &mi.right == mi.small {
|
||||
mi.swapSmall()
|
||||
}
|
||||
return
|
||||
} else if cmp < 0 { // Small is less than bigger().
|
||||
if mi.reverse {
|
||||
mi.swapSmall()
|
||||
} else {
|
||||
// we don't need to do anything. Small already points to the smallest.
|
||||
}
|
||||
return
|
||||
} else { // bigger() is less than small.
|
||||
if mi.reverse {
|
||||
// Do nothing since we're iterating in reverse. Small currently points to
|
||||
// the bigger key and that's okay in reverse iteration.
|
||||
} else {
|
||||
mi.swapSmall()
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func (mi *MergeIterator) bigger() *node {
|
||||
if mi.small == &mi.left {
|
||||
return &mi.right
|
||||
}
|
||||
return &mi.left
|
||||
}
|
||||
|
||||
func (mi *MergeIterator) swapSmall() {
|
||||
if mi.small == &mi.left {
|
||||
mi.small = &mi.right
|
||||
return
|
||||
}
|
||||
if mi.small == &mi.right {
|
||||
mi.small = &mi.left
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Next returns the next element. If it is the same as the current key, ignore it.
|
||||
func (mi *MergeIterator) Next() {
|
||||
for mi.Valid() {
|
||||
if !bytes.Equal(mi.small.key, mi.curKey) {
|
||||
break
|
||||
}
|
||||
mi.small.next()
|
||||
mi.fix()
|
||||
}
|
||||
mi.setCurrent()
|
||||
}
|
||||
|
||||
func (mi *MergeIterator) setCurrent() {
|
||||
mi.curKey = append(mi.curKey[:0], mi.small.key...)
|
||||
}
|
||||
|
||||
// Rewind seeks to first element (or last element for reverse iterator).
|
||||
func (mi *MergeIterator) Rewind() {
|
||||
mi.left.rewind()
|
||||
mi.right.rewind()
|
||||
mi.fix()
|
||||
mi.setCurrent()
|
||||
}
|
||||
|
||||
// Seek brings us to element with key >= given key.
|
||||
func (mi *MergeIterator) Seek(key []byte) {
|
||||
mi.left.seek(key)
|
||||
mi.right.seek(key)
|
||||
mi.fix()
|
||||
mi.setCurrent()
|
||||
}
|
||||
|
||||
// Valid returns whether the MergeIterator is at a valid element.
|
||||
func (mi *MergeIterator) Valid() bool {
|
||||
return mi.small.valid
|
||||
}
|
||||
|
||||
// Key returns the key associated with the current iterator.
|
||||
func (mi *MergeIterator) Key() []byte {
|
||||
return mi.small.key
|
||||
}
|
||||
|
||||
// Value returns the value associated with the iterator.
|
||||
func (mi *MergeIterator) Value() y.ValueStruct {
|
||||
return mi.small.iter.Value()
|
||||
}
|
||||
|
||||
// Close implements y.Iterator.
|
||||
func (mi *MergeIterator) Close() error {
|
||||
err1 := mi.left.iter.Close()
|
||||
err2 := mi.right.iter.Close()
|
||||
if err1 != nil {
|
||||
return errors.Wrap(err1, "MergeIterator")
|
||||
}
|
||||
return errors.Wrap(err2, "MergeIterator")
|
||||
}
|
||||
|
||||
// NewMergeIterator creates a merge iterator.
|
||||
func NewMergeIterator(iters []y.Iterator, reverse bool) y.Iterator {
|
||||
if len(iters) == 0 {
|
||||
return nil
|
||||
} else if len(iters) == 1 {
|
||||
return iters[0]
|
||||
} else if len(iters) == 2 {
|
||||
mi := &MergeIterator{
|
||||
reverse: reverse,
|
||||
}
|
||||
mi.left.setIterator(iters[0])
|
||||
mi.right.setIterator(iters[1])
|
||||
// Assign left iterator randomly. This will be fixed when user calls rewind/seek.
|
||||
mi.small = &mi.left
|
||||
return mi
|
||||
}
|
||||
mid := len(iters) / 2
|
||||
return NewMergeIterator(
|
||||
[]y.Iterator{
|
||||
NewMergeIterator(iters[:mid], reverse),
|
||||
NewMergeIterator(iters[mid:], reverse),
|
||||
}, reverse)
|
||||
}
|
362
vendor/github.com/dgraph-io/badger/table/table.go
generated
vendored
Normal file
362
vendor/github.com/dgraph-io/badger/table/table.go
generated
vendored
Normal file
|
@ -0,0 +1,362 @@
|
|||
/*
|
||||
* Copyright 2017 Dgraph Labs, Inc. and Contributors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package table
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/sha256"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/AndreasBriese/bbloom"
|
||||
"github.com/dgraph-io/badger/options"
|
||||
"github.com/dgraph-io/badger/y"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
const fileSuffix = ".sst"
|
||||
|
||||
type keyOffset struct {
|
||||
key []byte
|
||||
offset int
|
||||
len int
|
||||
}
|
||||
|
||||
// TableInterface is useful for testing.
|
||||
type TableInterface interface {
|
||||
Smallest() []byte
|
||||
Biggest() []byte
|
||||
DoesNotHave(key []byte) bool
|
||||
}
|
||||
|
||||
// Table represents a loaded table file with the info we have about it
|
||||
type Table struct {
|
||||
sync.Mutex
|
||||
|
||||
fd *os.File // Own fd.
|
||||
tableSize int // Initialized in OpenTable, using fd.Stat().
|
||||
|
||||
blockIndex []keyOffset
|
||||
ref int32 // For file garbage collection. Atomic.
|
||||
|
||||
loadingMode options.FileLoadingMode
|
||||
mmap []byte // Memory mapped.
|
||||
|
||||
// The following are initialized once and const.
|
||||
smallest, biggest []byte // Smallest and largest keys (with timestamps).
|
||||
id uint64 // file id, part of filename
|
||||
|
||||
bf bbloom.Bloom
|
||||
|
||||
Checksum []byte
|
||||
}
|
||||
|
||||
// IncrRef increments the refcount (having to do with whether the file should be deleted)
|
||||
func (t *Table) IncrRef() {
|
||||
atomic.AddInt32(&t.ref, 1)
|
||||
}
|
||||
|
||||
// DecrRef decrements the refcount and possibly deletes the table
|
||||
func (t *Table) DecrRef() error {
|
||||
newRef := atomic.AddInt32(&t.ref, -1)
|
||||
if newRef == 0 {
|
||||
// We can safely delete this file, because for all the current files, we always have
|
||||
// at least one reference pointing to them.
|
||||
|
||||
// It's necessary to delete windows files
|
||||
if t.loadingMode == options.MemoryMap {
|
||||
if err := y.Munmap(t.mmap); err != nil {
|
||||
return err
|
||||
}
|
||||
t.mmap = nil
|
||||
}
|
||||
if err := t.fd.Truncate(0); err != nil {
|
||||
// This is very important to let the FS know that the file is deleted.
|
||||
return err
|
||||
}
|
||||
filename := t.fd.Name()
|
||||
if err := t.fd.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.Remove(filename); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type block struct {
|
||||
offset int
|
||||
data []byte
|
||||
}
|
||||
|
||||
func (b block) NewIterator() *blockIterator {
|
||||
return &blockIterator{data: b.data}
|
||||
}
|
||||
|
||||
// OpenTable assumes file has only one table and opens it. Takes ownership of fd upon function
|
||||
// entry. Returns a table with one reference count on it (decrementing which may delete the file!
|
||||
// -- consider t.Close() instead). The fd has to writeable because we call Truncate on it before
|
||||
// deleting.
|
||||
func OpenTable(fd *os.File, mode options.FileLoadingMode, cksum []byte) (*Table, error) {
|
||||
fileInfo, err := fd.Stat()
|
||||
if err != nil {
|
||||
// It's OK to ignore fd.Close() errs in this function because we have only read
|
||||
// from the file.
|
||||
_ = fd.Close()
|
||||
return nil, y.Wrap(err)
|
||||
}
|
||||
|
||||
filename := fileInfo.Name()
|
||||
id, ok := ParseFileID(filename)
|
||||
if !ok {
|
||||
_ = fd.Close()
|
||||
return nil, errors.Errorf("Invalid filename: %s", filename)
|
||||
}
|
||||
t := &Table{
|
||||
fd: fd,
|
||||
ref: 1, // Caller is given one reference.
|
||||
id: id,
|
||||
loadingMode: mode,
|
||||
}
|
||||
|
||||
t.tableSize = int(fileInfo.Size())
|
||||
|
||||
// We first load to RAM, so we can read the index and do checksum.
|
||||
if err := t.loadToRAM(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Enforce checksum before we read index. Otherwise, if the file was
|
||||
// truncated, we'd end up with panics in readIndex.
|
||||
if len(cksum) > 0 && !bytes.Equal(t.Checksum, cksum) {
|
||||
return nil, fmt.Errorf(
|
||||
"CHECKSUM_MISMATCH: Table checksum does not match checksum in MANIFEST."+
|
||||
" NOT including table %s. This would lead to missing data."+
|
||||
"\n sha256 %x Expected\n sha256 %x Found\n", filename, cksum, t.Checksum)
|
||||
}
|
||||
if err := t.readIndex(); err != nil {
|
||||
return nil, y.Wrap(err)
|
||||
}
|
||||
|
||||
it := t.NewIterator(false)
|
||||
defer it.Close()
|
||||
it.Rewind()
|
||||
if it.Valid() {
|
||||
t.smallest = it.Key()
|
||||
}
|
||||
|
||||
it2 := t.NewIterator(true)
|
||||
defer it2.Close()
|
||||
it2.Rewind()
|
||||
if it2.Valid() {
|
||||
t.biggest = it2.Key()
|
||||
}
|
||||
|
||||
switch mode {
|
||||
case options.LoadToRAM:
|
||||
// No need to do anything. t.mmap is already filled.
|
||||
case options.MemoryMap:
|
||||
t.mmap, err = y.Mmap(fd, false, fileInfo.Size())
|
||||
if err != nil {
|
||||
_ = fd.Close()
|
||||
return nil, y.Wrapf(err, "Unable to map file: %q", fileInfo.Name())
|
||||
}
|
||||
case options.FileIO:
|
||||
t.mmap = nil
|
||||
default:
|
||||
panic(fmt.Sprintf("Invalid loading mode: %v", mode))
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
|
||||
// Close closes the open table. (Releases resources back to the OS.)
|
||||
func (t *Table) Close() error {
|
||||
if t.loadingMode == options.MemoryMap {
|
||||
if err := y.Munmap(t.mmap); err != nil {
|
||||
return err
|
||||
}
|
||||
t.mmap = nil
|
||||
}
|
||||
|
||||
return t.fd.Close()
|
||||
}
|
||||
|
||||
func (t *Table) read(off, sz int) ([]byte, error) {
|
||||
if len(t.mmap) > 0 {
|
||||
if len(t.mmap[off:]) < sz {
|
||||
return nil, y.ErrEOF
|
||||
}
|
||||
return t.mmap[off : off+sz], nil
|
||||
}
|
||||
|
||||
res := make([]byte, sz)
|
||||
nbr, err := t.fd.ReadAt(res, int64(off))
|
||||
y.NumReads.Add(1)
|
||||
y.NumBytesRead.Add(int64(nbr))
|
||||
return res, err
|
||||
}
|
||||
|
||||
func (t *Table) readNoFail(off, sz int) []byte {
|
||||
res, err := t.read(off, sz)
|
||||
y.Check(err)
|
||||
return res
|
||||
}
|
||||
|
||||
func (t *Table) readIndex() error {
|
||||
if len(t.mmap) != t.tableSize {
|
||||
panic("Table size does not match the read bytes")
|
||||
}
|
||||
readPos := t.tableSize
|
||||
|
||||
// Read bloom filter.
|
||||
readPos -= 4
|
||||
buf := t.readNoFail(readPos, 4)
|
||||
bloomLen := int(binary.BigEndian.Uint32(buf))
|
||||
readPos -= bloomLen
|
||||
data := t.readNoFail(readPos, bloomLen)
|
||||
t.bf = bbloom.JSONUnmarshal(data)
|
||||
|
||||
readPos -= 4
|
||||
buf = t.readNoFail(readPos, 4)
|
||||
restartsLen := int(binary.BigEndian.Uint32(buf))
|
||||
|
||||
readPos -= 4 * restartsLen
|
||||
buf = t.readNoFail(readPos, 4*restartsLen)
|
||||
|
||||
offsets := make([]int, restartsLen)
|
||||
for i := 0; i < restartsLen; i++ {
|
||||
offsets[i] = int(binary.BigEndian.Uint32(buf[:4]))
|
||||
buf = buf[4:]
|
||||
}
|
||||
|
||||
// The last offset stores the end of the last block.
|
||||
for i := 0; i < len(offsets); i++ {
|
||||
var o int
|
||||
if i == 0 {
|
||||
o = 0
|
||||
} else {
|
||||
o = offsets[i-1]
|
||||
}
|
||||
|
||||
ko := keyOffset{
|
||||
offset: o,
|
||||
len: offsets[i] - o,
|
||||
}
|
||||
t.blockIndex = append(t.blockIndex, ko)
|
||||
}
|
||||
|
||||
// Execute this index read serially, because we already have table data in memory.
|
||||
var h header
|
||||
for idx := range t.blockIndex {
|
||||
ko := &t.blockIndex[idx]
|
||||
|
||||
hbuf := t.readNoFail(ko.offset, h.Size())
|
||||
h.Decode(hbuf)
|
||||
y.AssertTrue(h.plen == 0)
|
||||
|
||||
key := t.readNoFail(ko.offset+len(hbuf), int(h.klen))
|
||||
ko.key = append([]byte{}, key...)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *Table) block(idx int) (block, error) {
|
||||
y.AssertTruef(idx >= 0, "idx=%d", idx)
|
||||
if idx >= len(t.blockIndex) {
|
||||
return block{}, errors.New("block out of index")
|
||||
}
|
||||
|
||||
ko := t.blockIndex[idx]
|
||||
blk := block{
|
||||
offset: ko.offset,
|
||||
}
|
||||
var err error
|
||||
blk.data, err = t.read(blk.offset, ko.len)
|
||||
return blk, err
|
||||
}
|
||||
|
||||
// Size is its file size in bytes
|
||||
func (t *Table) Size() int64 { return int64(t.tableSize) }
|
||||
|
||||
// Smallest is its smallest key, or nil if there are none
|
||||
func (t *Table) Smallest() []byte { return t.smallest }
|
||||
|
||||
// Biggest is its biggest key, or nil if there are none
|
||||
func (t *Table) Biggest() []byte { return t.biggest }
|
||||
|
||||
// Filename is NOT the file name. Just kidding, it is.
|
||||
func (t *Table) Filename() string { return t.fd.Name() }
|
||||
|
||||
// ID is the table's ID number (used to make the file name).
|
||||
func (t *Table) ID() uint64 { return t.id }
|
||||
|
||||
// DoesNotHave returns true if (but not "only if") the table does not have the key. It does a
|
||||
// bloom filter lookup.
|
||||
func (t *Table) DoesNotHave(key []byte) bool { return !t.bf.Has(key) }
|
||||
|
||||
// ParseFileID reads the file id out of a filename.
|
||||
func ParseFileID(name string) (uint64, bool) {
|
||||
name = path.Base(name)
|
||||
if !strings.HasSuffix(name, fileSuffix) {
|
||||
return 0, false
|
||||
}
|
||||
// suffix := name[len(fileSuffix):]
|
||||
name = strings.TrimSuffix(name, fileSuffix)
|
||||
id, err := strconv.Atoi(name)
|
||||
if err != nil {
|
||||
return 0, false
|
||||
}
|
||||
y.AssertTrue(id >= 0)
|
||||
return uint64(id), true
|
||||
}
|
||||
|
||||
// IDToFilename does the inverse of ParseFileID
|
||||
func IDToFilename(id uint64) string {
|
||||
return fmt.Sprintf("%06d", id) + fileSuffix
|
||||
}
|
||||
|
||||
// NewFilename should be named TableFilepath -- it combines the dir with the ID to make a table
|
||||
// filepath.
|
||||
func NewFilename(id uint64, dir string) string {
|
||||
return filepath.Join(dir, IDToFilename(id))
|
||||
}
|
||||
|
||||
func (t *Table) loadToRAM() error {
|
||||
if _, err := t.fd.Seek(0, io.SeekStart); err != nil {
|
||||
return err
|
||||
}
|
||||
t.mmap = make([]byte, t.tableSize)
|
||||
sum := sha256.New()
|
||||
tee := io.TeeReader(t.fd, sum)
|
||||
read, err := tee.Read(t.mmap)
|
||||
if err != nil || read != t.tableSize {
|
||||
return y.Wrapf(err, "Unable to load file in memory. Table file: %s", t.Filename())
|
||||
}
|
||||
t.Checksum = sum.Sum(nil)
|
||||
y.NumReads.Add(1)
|
||||
y.NumBytesRead.Add(int64(read))
|
||||
return nil
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue