521 lines
13 KiB
Go
521 lines
13 KiB
Go
|
/*
|
||
|
* Copyright 2020 Dgraph Labs, Inc. and Contributors
|
||
|
*
|
||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
* you may not use this file except in compliance with the License.
|
||
|
* You may obtain a copy of the License at
|
||
|
*
|
||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||
|
*
|
||
|
* Unless required by applicable law or agreed to in writing, software
|
||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
* See the License for the specific language governing permissions and
|
||
|
* limitations under the License.
|
||
|
*/
|
||
|
|
||
|
package z
|
||
|
|
||
|
import (
|
||
|
"encoding/binary"
|
||
|
"fmt"
|
||
|
"io/ioutil"
|
||
|
"log"
|
||
|
"math"
|
||
|
"os"
|
||
|
"sort"
|
||
|
"sync/atomic"
|
||
|
|
||
|
"github.com/pkg/errors"
|
||
|
)
|
||
|
|
||
|
// Buffer is equivalent of bytes.Buffer without the ability to read. It is NOT thread-safe.
|
||
|
//
|
||
|
// In UseCalloc mode, z.Calloc is used to allocate memory, which depending upon how the code is
|
||
|
// compiled could use jemalloc for allocations.
|
||
|
//
|
||
|
// In UseMmap mode, Buffer uses file mmap to allocate memory. This allows us to store big data
|
||
|
// structures without using physical memory.
|
||
|
//
|
||
|
// MaxSize can be set to limit the memory usage.
|
||
|
type Buffer struct {
|
||
|
padding uint64
|
||
|
offset uint64
|
||
|
buf []byte
|
||
|
curSz int
|
||
|
maxSz int
|
||
|
fd *os.File
|
||
|
bufType BufferType
|
||
|
autoMmapAfter int
|
||
|
dir string
|
||
|
}
|
||
|
|
||
|
type BufferType int
|
||
|
|
||
|
func (t BufferType) String() string {
|
||
|
switch t {
|
||
|
case UseCalloc:
|
||
|
return "UseCalloc"
|
||
|
case UseMmap:
|
||
|
return "UseMmap"
|
||
|
}
|
||
|
return "invalid"
|
||
|
}
|
||
|
|
||
|
const (
|
||
|
UseCalloc BufferType = iota
|
||
|
UseMmap
|
||
|
UseInvalid
|
||
|
)
|
||
|
|
||
|
// smallBufferSize is an initial allocation minimal capacity.
|
||
|
const smallBufferSize = 64
|
||
|
|
||
|
// NewBuffer is a helper utility, which creates a virtually unlimited Buffer in UseCalloc mode.
|
||
|
func NewBuffer(sz int) *Buffer {
|
||
|
buf, err := NewBufferWithDir(sz, MaxBufferSize, UseCalloc, "")
|
||
|
if err != nil {
|
||
|
log.Fatalf("while creating buffer: %v", err)
|
||
|
}
|
||
|
return buf
|
||
|
}
|
||
|
|
||
|
// NewBufferWith would allocate a buffer of size sz upfront, with the total size of the buffer not
|
||
|
// exceeding maxSz. Both sz and maxSz can be set to zero, in which case reasonable defaults would be
|
||
|
// used. Buffer can't be used without initialization via NewBuffer.
|
||
|
func NewBufferWith(sz, maxSz int, bufType BufferType) (*Buffer, error) {
|
||
|
buf, err := NewBufferWithDir(sz, maxSz, bufType, "")
|
||
|
return buf, err
|
||
|
}
|
||
|
|
||
|
func BufferFrom(data []byte) *Buffer {
|
||
|
return &Buffer{
|
||
|
offset: uint64(len(data)),
|
||
|
padding: 0,
|
||
|
buf: data,
|
||
|
bufType: UseInvalid,
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (b *Buffer) doMmap() error {
|
||
|
curBuf := b.buf
|
||
|
fd, err := ioutil.TempFile(b.dir, "buffer")
|
||
|
if err != nil {
|
||
|
return err
|
||
|
}
|
||
|
if err := fd.Truncate(int64(b.curSz)); err != nil {
|
||
|
return errors.Wrapf(err, "while truncating %s to size: %d", fd.Name(), b.curSz)
|
||
|
}
|
||
|
|
||
|
buf, err := Mmap(fd, true, int64(b.maxSz)) // Mmap up to max size.
|
||
|
if err != nil {
|
||
|
return errors.Wrapf(err, "while mmapping %s with size: %d", fd.Name(), b.maxSz)
|
||
|
}
|
||
|
if len(curBuf) > 0 {
|
||
|
assert(int(b.offset) == copy(buf, curBuf[:b.offset]))
|
||
|
Free(curBuf)
|
||
|
}
|
||
|
b.buf = buf
|
||
|
b.bufType = UseMmap
|
||
|
b.fd = fd
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
// NewBufferWithDir would allocate a buffer of size sz upfront, with the total size of the buffer
|
||
|
// not exceeding maxSz. Both sz and maxSz can be set to zero, in which case reasonable defaults
|
||
|
// would be used. Buffer can't be used without initialization via NewBuffer. The buffer is created
|
||
|
// inside dir. The caller should take care of existence of dir.
|
||
|
func NewBufferWithDir(sz, maxSz int, bufType BufferType, dir string) (*Buffer, error) {
|
||
|
if sz == 0 {
|
||
|
sz = smallBufferSize
|
||
|
}
|
||
|
if maxSz == 0 {
|
||
|
maxSz = math.MaxInt32
|
||
|
}
|
||
|
if len(dir) == 0 {
|
||
|
dir = tmpDir
|
||
|
}
|
||
|
b := &Buffer{
|
||
|
padding: 8,
|
||
|
offset: 8, // Use 8 bytes of padding so that the elements are aligned.
|
||
|
curSz: sz,
|
||
|
maxSz: maxSz,
|
||
|
bufType: UseCalloc, // by default.
|
||
|
dir: dir,
|
||
|
}
|
||
|
|
||
|
switch bufType {
|
||
|
case UseCalloc:
|
||
|
b.buf = Calloc(sz)
|
||
|
case UseMmap:
|
||
|
if err := b.doMmap(); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
default:
|
||
|
log.Fatalf("Invalid bufType: %q\n", bufType)
|
||
|
}
|
||
|
|
||
|
b.buf[0] = 0x00
|
||
|
return b, nil
|
||
|
}
|
||
|
|
||
|
func (b *Buffer) IsEmpty() bool {
|
||
|
return int(b.offset) == b.StartOffset()
|
||
|
}
|
||
|
|
||
|
// LenWithPadding would return the number of bytes written to the buffer so far
|
||
|
// plus the padding at the start of the buffer.
|
||
|
func (b *Buffer) LenWithPadding() int {
|
||
|
return int(atomic.LoadUint64(&b.offset))
|
||
|
}
|
||
|
|
||
|
// LenNoPadding would return the number of bytes written to the buffer so far
|
||
|
// (without the padding).
|
||
|
func (b *Buffer) LenNoPadding() int {
|
||
|
return int(atomic.LoadUint64(&b.offset) - b.padding)
|
||
|
}
|
||
|
|
||
|
// Bytes would return all the written bytes as a slice.
|
||
|
func (b *Buffer) Bytes() []byte {
|
||
|
off := atomic.LoadUint64(&b.offset)
|
||
|
return b.buf[b.padding:off]
|
||
|
}
|
||
|
|
||
|
func (b *Buffer) AutoMmapAfter(size int) {
|
||
|
b.autoMmapAfter = size
|
||
|
}
|
||
|
|
||
|
// Grow would grow the buffer to have at least n more bytes. In case the buffer is at capacity, it
|
||
|
// would reallocate twice the size of current capacity + n, to ensure n bytes can be written to the
|
||
|
// buffer without further allocation. In UseMmap mode, this might result in underlying file
|
||
|
// expansion.
|
||
|
func (b *Buffer) Grow(n int) {
|
||
|
// In this case, len and cap are the same.
|
||
|
if b.buf == nil {
|
||
|
panic("z.Buffer needs to be initialized before using")
|
||
|
}
|
||
|
if b.maxSz-int(b.offset) < n {
|
||
|
panic(fmt.Sprintf("Buffer max size exceeded: %d."+
|
||
|
" Offset: %d. Grow: %d", b.maxSz, b.offset, n))
|
||
|
}
|
||
|
if b.curSz-int(b.offset) > n {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
growBy := b.curSz + n
|
||
|
if growBy > 1<<30 {
|
||
|
growBy = 1 << 30
|
||
|
}
|
||
|
if n > growBy {
|
||
|
// Always at least allocate n, even if it exceeds the 1GB limit above.
|
||
|
growBy = n
|
||
|
}
|
||
|
b.curSz += growBy
|
||
|
|
||
|
switch b.bufType {
|
||
|
case UseCalloc:
|
||
|
if b.autoMmapAfter > 0 && b.curSz > b.autoMmapAfter {
|
||
|
// This would do copy as well.
|
||
|
check(b.doMmap())
|
||
|
|
||
|
} else {
|
||
|
newBuf := Calloc(b.curSz)
|
||
|
copy(newBuf, b.buf[:b.offset])
|
||
|
Free(b.buf)
|
||
|
b.buf = newBuf
|
||
|
}
|
||
|
case UseMmap:
|
||
|
if err := b.fd.Truncate(int64(b.curSz)); err != nil {
|
||
|
log.Fatalf("While trying to truncate file %s to size: %d error: %v\n",
|
||
|
b.fd.Name(), b.curSz, err)
|
||
|
}
|
||
|
default:
|
||
|
panic("Invalid bufType")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Allocate is a way to get a slice of size n back from the buffer. This slice can be directly
|
||
|
// written to. Warning: Allocate is not thread-safe. The byte slice returned MUST be used before
|
||
|
// further calls to Buffer.
|
||
|
func (b *Buffer) Allocate(n int) []byte {
|
||
|
b.Grow(n)
|
||
|
off := b.offset
|
||
|
b.offset += uint64(n)
|
||
|
return b.buf[off:int(b.offset)]
|
||
|
}
|
||
|
|
||
|
// AllocateOffset works the same way as allocate, but instead of returning a byte slice, it returns
|
||
|
// the offset of the allocation.
|
||
|
func (b *Buffer) AllocateOffset(n int) int {
|
||
|
b.Grow(n)
|
||
|
b.offset += uint64(n)
|
||
|
return int(b.offset) - n
|
||
|
}
|
||
|
|
||
|
func (b *Buffer) writeLen(sz int) {
|
||
|
buf := b.Allocate(4)
|
||
|
binary.BigEndian.PutUint32(buf, uint32(sz))
|
||
|
}
|
||
|
|
||
|
// SliceAllocate would encode the size provided into the buffer, followed by a call to Allocate,
|
||
|
// hence returning the slice of size sz. This can be used to allocate a lot of small buffers into
|
||
|
// this big buffer.
|
||
|
// Note that SliceAllocate should NOT be mixed with normal calls to Write.
|
||
|
func (b *Buffer) SliceAllocate(sz int) []byte {
|
||
|
b.Grow(4 + sz)
|
||
|
b.writeLen(sz)
|
||
|
return b.Allocate(sz)
|
||
|
}
|
||
|
|
||
|
func (b *Buffer) StartOffset() int { return int(b.padding) }
|
||
|
|
||
|
func (b *Buffer) WriteSlice(slice []byte) {
|
||
|
dst := b.SliceAllocate(len(slice))
|
||
|
copy(dst, slice)
|
||
|
}
|
||
|
|
||
|
func (b *Buffer) SliceIterate(f func(slice []byte) error) error {
|
||
|
if b.IsEmpty() {
|
||
|
return nil
|
||
|
}
|
||
|
slice, next := []byte{}, b.StartOffset()
|
||
|
for next >= 0 {
|
||
|
slice, next = b.Slice(next)
|
||
|
if len(slice) == 0 {
|
||
|
continue
|
||
|
}
|
||
|
if err := f(slice); err != nil {
|
||
|
return err
|
||
|
}
|
||
|
}
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
type LessFunc func(a, b []byte) bool
|
||
|
type sortHelper struct {
|
||
|
offsets []int
|
||
|
b *Buffer
|
||
|
tmp *Buffer
|
||
|
less LessFunc
|
||
|
small []int
|
||
|
}
|
||
|
|
||
|
func (s *sortHelper) sortSmall(start, end int) {
|
||
|
s.tmp.Reset()
|
||
|
s.small = s.small[:0]
|
||
|
next := start
|
||
|
for next >= 0 && next < end {
|
||
|
s.small = append(s.small, next)
|
||
|
_, next = s.b.Slice(next)
|
||
|
}
|
||
|
|
||
|
// We are sorting the slices pointed to by s.small offsets, but only moving the offsets around.
|
||
|
sort.Slice(s.small, func(i, j int) bool {
|
||
|
left, _ := s.b.Slice(s.small[i])
|
||
|
right, _ := s.b.Slice(s.small[j])
|
||
|
return s.less(left, right)
|
||
|
})
|
||
|
// Now we iterate over the s.small offsets and copy over the slices. The result is now in order.
|
||
|
for _, off := range s.small {
|
||
|
s.tmp.Write(rawSlice(s.b.buf[off:]))
|
||
|
}
|
||
|
assert(end-start == copy(s.b.buf[start:end], s.tmp.Bytes()))
|
||
|
}
|
||
|
|
||
|
func assert(b bool) {
|
||
|
if !b {
|
||
|
log.Fatalf("%+v", errors.Errorf("Assertion failure"))
|
||
|
}
|
||
|
}
|
||
|
func check(err error) {
|
||
|
if err != nil {
|
||
|
log.Fatalf("%+v", err)
|
||
|
}
|
||
|
}
|
||
|
func check2(_ interface{}, err error) {
|
||
|
check(err)
|
||
|
}
|
||
|
|
||
|
func (s *sortHelper) merge(left, right []byte, start, end int) {
|
||
|
if len(left) == 0 || len(right) == 0 {
|
||
|
return
|
||
|
}
|
||
|
s.tmp.Reset()
|
||
|
check2(s.tmp.Write(left))
|
||
|
left = s.tmp.Bytes()
|
||
|
|
||
|
var ls, rs []byte
|
||
|
|
||
|
copyLeft := func() {
|
||
|
assert(len(ls) == copy(s.b.buf[start:], ls))
|
||
|
left = left[len(ls):]
|
||
|
start += len(ls)
|
||
|
}
|
||
|
copyRight := func() {
|
||
|
assert(len(rs) == copy(s.b.buf[start:], rs))
|
||
|
right = right[len(rs):]
|
||
|
start += len(rs)
|
||
|
}
|
||
|
|
||
|
for start < end {
|
||
|
if len(left) == 0 {
|
||
|
assert(len(right) == copy(s.b.buf[start:end], right))
|
||
|
return
|
||
|
}
|
||
|
if len(right) == 0 {
|
||
|
assert(len(left) == copy(s.b.buf[start:end], left))
|
||
|
return
|
||
|
}
|
||
|
ls = rawSlice(left)
|
||
|
rs = rawSlice(right)
|
||
|
|
||
|
// We skip the first 4 bytes in the rawSlice, because that stores the length.
|
||
|
if s.less(ls[4:], rs[4:]) {
|
||
|
copyLeft()
|
||
|
} else {
|
||
|
copyRight()
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (s *sortHelper) sort(lo, hi int) []byte {
|
||
|
assert(lo <= hi)
|
||
|
|
||
|
mid := lo + (hi-lo)/2
|
||
|
loff, hoff := s.offsets[lo], s.offsets[hi]
|
||
|
if lo == mid {
|
||
|
// No need to sort, just return the buffer.
|
||
|
return s.b.buf[loff:hoff]
|
||
|
}
|
||
|
|
||
|
// lo, mid would sort from [offset[lo], offset[mid]) .
|
||
|
left := s.sort(lo, mid)
|
||
|
// Typically we'd use mid+1, but here mid represents an offset in the buffer. Each offset
|
||
|
// contains a thousand entries. So, if we do mid+1, we'd skip over those entries.
|
||
|
right := s.sort(mid, hi)
|
||
|
|
||
|
s.merge(left, right, loff, hoff)
|
||
|
return s.b.buf[loff:hoff]
|
||
|
}
|
||
|
|
||
|
// SortSlice is like SortSliceBetween but sorting over the entire buffer.
|
||
|
func (b *Buffer) SortSlice(less func(left, right []byte) bool) {
|
||
|
b.SortSliceBetween(b.StartOffset(), int(b.offset), less)
|
||
|
}
|
||
|
func (b *Buffer) SortSliceBetween(start, end int, less LessFunc) {
|
||
|
if start >= end {
|
||
|
return
|
||
|
}
|
||
|
if start == 0 {
|
||
|
panic("start can never be zero")
|
||
|
}
|
||
|
|
||
|
var offsets []int
|
||
|
next, count := start, 0
|
||
|
for next >= 0 && next < end {
|
||
|
if count%1024 == 0 {
|
||
|
offsets = append(offsets, next)
|
||
|
}
|
||
|
_, next = b.Slice(next)
|
||
|
count++
|
||
|
}
|
||
|
assert(len(offsets) > 0)
|
||
|
if offsets[len(offsets)-1] != end {
|
||
|
offsets = append(offsets, end)
|
||
|
}
|
||
|
|
||
|
szTmp := int(float64((end-start)/2) * 1.1)
|
||
|
s := &sortHelper{
|
||
|
offsets: offsets,
|
||
|
b: b,
|
||
|
less: less,
|
||
|
small: make([]int, 0, 1024),
|
||
|
tmp: NewBuffer(szTmp),
|
||
|
}
|
||
|
defer s.tmp.Release()
|
||
|
|
||
|
left := offsets[0]
|
||
|
for _, off := range offsets[1:] {
|
||
|
s.sortSmall(left, off)
|
||
|
left = off
|
||
|
}
|
||
|
s.sort(0, len(offsets)-1)
|
||
|
}
|
||
|
|
||
|
func rawSlice(buf []byte) []byte {
|
||
|
sz := binary.BigEndian.Uint32(buf)
|
||
|
return buf[:4+int(sz)]
|
||
|
}
|
||
|
|
||
|
// Slice would return the slice written at offset.
|
||
|
func (b *Buffer) Slice(offset int) ([]byte, int) {
|
||
|
if offset >= int(b.offset) {
|
||
|
return nil, -1
|
||
|
}
|
||
|
|
||
|
sz := binary.BigEndian.Uint32(b.buf[offset:])
|
||
|
start := offset + 4
|
||
|
next := start + int(sz)
|
||
|
res := b.buf[start:next]
|
||
|
if next >= int(b.offset) {
|
||
|
next = -1
|
||
|
}
|
||
|
return res, next
|
||
|
}
|
||
|
|
||
|
// SliceOffsets is an expensive function. Use sparingly.
|
||
|
func (b *Buffer) SliceOffsets() []int {
|
||
|
next := b.StartOffset()
|
||
|
var offsets []int
|
||
|
for next >= 0 {
|
||
|
offsets = append(offsets, next)
|
||
|
_, next = b.Slice(next)
|
||
|
}
|
||
|
return offsets
|
||
|
}
|
||
|
|
||
|
func (b *Buffer) Data(offset int) []byte {
|
||
|
if offset > b.curSz {
|
||
|
panic("offset beyond current size")
|
||
|
}
|
||
|
return b.buf[offset:b.curSz]
|
||
|
}
|
||
|
|
||
|
// Write would write p bytes to the buffer.
|
||
|
func (b *Buffer) Write(p []byte) (n int, err error) {
|
||
|
b.Grow(len(p))
|
||
|
n = copy(b.buf[b.offset:], p)
|
||
|
b.offset += uint64(n)
|
||
|
return n, nil
|
||
|
}
|
||
|
|
||
|
// Reset would reset the buffer to be reused.
|
||
|
func (b *Buffer) Reset() {
|
||
|
b.offset = uint64(b.StartOffset())
|
||
|
}
|
||
|
|
||
|
// Release would free up the memory allocated by the buffer. Once the usage of buffer is done, it is
|
||
|
// important to call Release, otherwise a memory leak can happen.
|
||
|
func (b *Buffer) Release() error {
|
||
|
switch b.bufType {
|
||
|
case UseCalloc:
|
||
|
Free(b.buf)
|
||
|
|
||
|
case UseMmap:
|
||
|
fname := b.fd.Name()
|
||
|
if err := Munmap(b.buf); err != nil {
|
||
|
return errors.Wrapf(err, "while munmap file %s", fname)
|
||
|
}
|
||
|
if err := b.fd.Truncate(0); err != nil {
|
||
|
return errors.Wrapf(err, "while truncating file %s", fname)
|
||
|
}
|
||
|
if err := b.fd.Close(); err != nil {
|
||
|
return errors.Wrapf(err, "while closing file %s", fname)
|
||
|
}
|
||
|
if err := os.Remove(b.fd.Name()); err != nil {
|
||
|
return errors.Wrapf(err, "while deleting file %s", fname)
|
||
|
}
|
||
|
}
|
||
|
return nil
|
||
|
}
|