Add pushover notifications, this should be a super basic MVP

2021-05-18 09:04:15 -04:00 · 2021-05-18 09:04:15 -04:00 · d9917ab8b0
commit d9917ab8b0
parent ed13a5994f
505 changed files with 195741 additions and 9 deletions
--- a/vendor/github.com/dgraph-io/badger/skl/README.md
+++ b/vendor/github.com/dgraph-io/badger/skl/README.md
@ -0,0 +1,113 @@
+This is much better than `skiplist` and `slist`.
+
+```
+BenchmarkReadWrite/frac_0-8         	 3000000	       537 ns/op
+BenchmarkReadWrite/frac_1-8         	 3000000	       503 ns/op
+BenchmarkReadWrite/frac_2-8         	 3000000	       492 ns/op
+BenchmarkReadWrite/frac_3-8         	 3000000	       475 ns/op
+BenchmarkReadWrite/frac_4-8         	 3000000	       440 ns/op
+BenchmarkReadWrite/frac_5-8         	 5000000	       442 ns/op
+BenchmarkReadWrite/frac_6-8         	 5000000	       380 ns/op
+BenchmarkReadWrite/frac_7-8         	 5000000	       338 ns/op
+BenchmarkReadWrite/frac_8-8         	 5000000	       294 ns/op
+BenchmarkReadWrite/frac_9-8         	10000000	       268 ns/op
+BenchmarkReadWrite/frac_10-8        	100000000	        26.3 ns/op
+```
+
+And even better than a simple map with read-write lock:
+
+```
+BenchmarkReadWriteMap/frac_0-8         	 2000000	       774 ns/op
+BenchmarkReadWriteMap/frac_1-8         	 2000000	       647 ns/op
+BenchmarkReadWriteMap/frac_2-8         	 3000000	       605 ns/op
+BenchmarkReadWriteMap/frac_3-8         	 3000000	       603 ns/op
+BenchmarkReadWriteMap/frac_4-8         	 3000000	       556 ns/op
+BenchmarkReadWriteMap/frac_5-8         	 3000000	       472 ns/op
+BenchmarkReadWriteMap/frac_6-8         	 3000000	       476 ns/op
+BenchmarkReadWriteMap/frac_7-8         	 3000000	       457 ns/op
+BenchmarkReadWriteMap/frac_8-8         	 5000000	       444 ns/op
+BenchmarkReadWriteMap/frac_9-8         	 5000000	       361 ns/op
+BenchmarkReadWriteMap/frac_10-8        	10000000	       212 ns/op
+```
+
+# Node Pooling
+
+Command used
+
+```
+rm -Rf tmp && /usr/bin/time -l ./populate -keys_mil 10
+```
+
+For pprof results, we run without using /usr/bin/time. There are four runs below.
+
+Results seem to vary quite a bit between runs.
+
+## Before node pooling
+
+```
+1311.53MB of 1338.69MB total (97.97%)
+Dropped 30 nodes (cum <= 6.69MB)
+Showing top 10 nodes out of 37 (cum >= 12.50MB)
+      flat  flat%   sum%        cum   cum%
+  523.04MB 39.07% 39.07%   523.04MB 39.07%  github.com/dgraph-io/badger/skl.(*Skiplist).Put
+  184.51MB 13.78% 52.85%   184.51MB 13.78%  runtime.stringtoslicebyte
+  166.01MB 12.40% 65.25%   689.04MB 51.47%  github.com/dgraph-io/badger/mem.(*Table).Put
+     165MB 12.33% 77.58%      165MB 12.33%  runtime.convT2E
+  116.92MB  8.73% 86.31%   116.92MB  8.73%  bytes.makeSlice
+   62.50MB  4.67% 90.98%    62.50MB  4.67%  main.newValue
+   34.50MB  2.58% 93.56%    34.50MB  2.58%  github.com/dgraph-io/badger/table.(*BlockIterator).parseKV
+   25.50MB  1.90% 95.46%   100.06MB  7.47%  github.com/dgraph-io/badger/y.(*MergeIterator).Next
+   21.06MB  1.57% 97.04%    21.06MB  1.57%  github.com/dgraph-io/badger/table.(*Table).read
+   12.50MB  0.93% 97.97%    12.50MB  0.93%  github.com/dgraph-io/badger/table.header.Encode
+
+      128.31 real       329.37 user        17.11 sys
+3355660288  maximum resident set size
+         0  average shared memory size
+         0  average unshared data size
+         0  average unshared stack size
+   2203080  page reclaims
+       764  page faults
+         0  swaps
+       275  block input operations
+        76  block output operations
+         0  messages sent
+         0  messages received
+         0  signals received
+     49173  voluntary context switches
+    599922  involuntary context switches
+```
+
+## After node pooling
+
+```
+1963.13MB of 2026.09MB total (96.89%)
+Dropped 29 nodes (cum <= 10.13MB)
+Showing top 10 nodes out of 41 (cum >= 185.62MB)
+      flat  flat%   sum%        cum   cum%
+  658.05MB 32.48% 32.48%   658.05MB 32.48%  github.com/dgraph-io/badger/skl.glob..func1
+  297.51MB 14.68% 47.16%   297.51MB 14.68%  runtime.convT2E
+  257.51MB 12.71% 59.87%   257.51MB 12.71%  runtime.stringtoslicebyte
+  249.01MB 12.29% 72.16%  1007.06MB 49.70%  github.com/dgraph-io/badger/mem.(*Table).Put
+  142.43MB  7.03% 79.19%   142.43MB  7.03%  bytes.makeSlice
+     100MB  4.94% 84.13%   758.05MB 37.41%  github.com/dgraph-io/badger/skl.newNode
+   99.50MB  4.91% 89.04%    99.50MB  4.91%  main.newValue
+      75MB  3.70% 92.74%       75MB  3.70%  github.com/dgraph-io/badger/table.(*BlockIterator).parseKV
+   44.62MB  2.20% 94.94%    44.62MB  2.20%  github.com/dgraph-io/badger/table.(*Table).read
+   39.50MB  1.95% 96.89%   185.62MB  9.16%  github.com/dgraph-io/badger/y.(*MergeIterator).Next
+
+      135.58 real       374.29 user        17.65 sys
+3740614656  maximum resident set size
+         0  average shared memory size
+         0  average unshared data size
+         0  average unshared stack size
+   2276566  page reclaims
+       770  page faults
+         0  swaps
+       128  block input operations
+        90  block output operations
+         0  messages sent
+         0  messages received
+         0  signals received
+     46434  voluntary context switches
+    597049  involuntary context switches
+```
--- a/vendor/github.com/dgraph-io/badger/skl/arena.go
+++ b/vendor/github.com/dgraph-io/badger/skl/arena.go
@ -0,0 +1,136 @@
+/*
+ * Copyright 2017 Dgraph Labs, Inc. and Contributors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package skl
+
+import (
+	"sync/atomic"
+	"unsafe"
+
+	"github.com/dgraph-io/badger/y"
+)
+
+const (
+	offsetSize = int(unsafe.Sizeof(uint32(0)))
+
+	// Always align nodes on 64-bit boundaries, even on 32-bit architectures,
+	// so that the node.value field is 64-bit aligned. This is necessary because
+	// node.getValueOffset uses atomic.LoadUint64, which expects its input
+	// pointer to be 64-bit aligned.
+	nodeAlign = int(unsafe.Sizeof(uint64(0))) - 1
+)
+
+// Arena should be lock-free.
+type Arena struct {
+	n   uint32
+	buf []byte
+}
+
+// newArena returns a new arena.
+func newArena(n int64) *Arena {
+	// Don't store data at position 0 in order to reserve offset=0 as a kind
+	// of nil pointer.
+	out := &Arena{
+		n:   1,
+		buf: make([]byte, n),
+	}
+	return out
+}
+
+func (s *Arena) size() int64 {
+	return int64(atomic.LoadUint32(&s.n))
+}
+
+func (s *Arena) reset() {
+	atomic.StoreUint32(&s.n, 0)
+}
+
+// putNode allocates a node in the arena. The node is aligned on a pointer-sized
+// boundary. The arena offset of the node is returned.
+func (s *Arena) putNode(height int) uint32 {
+	// Compute the amount of the tower that will never be used, since the height
+	// is less than maxHeight.
+	unusedSize := (maxHeight - height) * offsetSize
+
+	// Pad the allocation with enough bytes to ensure pointer alignment.
+	l := uint32(MaxNodeSize - unusedSize + nodeAlign)
+	n := atomic.AddUint32(&s.n, l)
+	y.AssertTruef(int(n) <= len(s.buf),
+		"Arena too small, toWrite:%d newTotal:%d limit:%d",
+		l, n, len(s.buf))
+
+	// Return the aligned offset.
+	m := (n - l + uint32(nodeAlign)) & ^uint32(nodeAlign)
+	return m
+}
+
+// Put will *copy* val into arena. To make better use of this, reuse your input
+// val buffer. Returns an offset into buf. User is responsible for remembering
+// size of val. We could also store this size inside arena but the encoding and
+// decoding will incur some overhead.
+func (s *Arena) putVal(v y.ValueStruct) uint32 {
+	l := uint32(v.EncodedSize())
+	n := atomic.AddUint32(&s.n, l)
+	y.AssertTruef(int(n) <= len(s.buf),
+		"Arena too small, toWrite:%d newTotal:%d limit:%d",
+		l, n, len(s.buf))
+	m := n - l
+	v.Encode(s.buf[m:])
+	return m
+}
+
+func (s *Arena) putKey(key []byte) uint32 {
+	l := uint32(len(key))
+	n := atomic.AddUint32(&s.n, l)
+	y.AssertTruef(int(n) <= len(s.buf),
+		"Arena too small, toWrite:%d newTotal:%d limit:%d",
+		l, n, len(s.buf))
+	m := n - l
+	y.AssertTrue(len(key) == copy(s.buf[m:n], key))
+	return m
+}
+
+// getNode returns a pointer to the node located at offset. If the offset is
+// zero, then the nil node pointer is returned.
+func (s *Arena) getNode(offset uint32) *node {
+	if offset == 0 {
+		return nil
+	}
+
+	return (*node)(unsafe.Pointer(&s.buf[offset]))
+}
+
+// getKey returns byte slice at offset.
+func (s *Arena) getKey(offset uint32, size uint16) []byte {
+	return s.buf[offset : offset+uint32(size)]
+}
+
+// getVal returns byte slice at offset. The given size should be just the value
+// size and should NOT include the meta bytes.
+func (s *Arena) getVal(offset uint32, size uint16) (ret y.ValueStruct) {
+	ret.Decode(s.buf[offset : offset+uint32(size)])
+	return
+}
+
+// getNodeOffset returns the offset of node in the arena. If the node pointer is
+// nil, then the zero offset is returned.
+func (s *Arena) getNodeOffset(nd *node) uint32 {
+	if nd == nil {
+		return 0
+	}
+
+	return uint32(uintptr(unsafe.Pointer(nd)) - uintptr(unsafe.Pointer(&s.buf[0])))
+}
--- a/vendor/github.com/dgraph-io/badger/skl/skl.go
+++ b/vendor/github.com/dgraph-io/badger/skl/skl.go
@ -0,0 +1,517 @@
+/*
+ * Copyright 2017 Dgraph Labs, Inc. and Contributors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+Adapted from RocksDB inline skiplist.
+
+Key differences:
+- No optimization for sequential inserts (no "prev").
+- No custom comparator.
+- Support overwrites. This requires care when we see the same key when inserting.
+  For RocksDB or LevelDB, overwrites are implemented as a newer sequence number in the key, so
+	there is no need for values. We don't intend to support versioning. In-place updates of values
+	would be more efficient.
+- We discard all non-concurrent code.
+- We do not support Splices. This simplifies the code a lot.
+- No AllocateNode or other pointer arithmetic.
+- We combine the findLessThan, findGreaterOrEqual, etc into one function.
+*/
+
+package skl
+
+import (
+	"math"
+	"sync/atomic"
+	"unsafe"
+
+	"github.com/dgraph-io/badger/y"
+	"github.com/dgraph-io/ristretto/z"
+)
+
+const (
+	maxHeight      = 20
+	heightIncrease = math.MaxUint32 / 3
+)
+
+// MaxNodeSize is the memory footprint of a node of maximum height.
+const MaxNodeSize = int(unsafe.Sizeof(node{}))
+
+type node struct {
+	// Multiple parts of the value are encoded as a single uint64 so that it
+	// can be atomically loaded and stored:
+	//   value offset: uint32 (bits 0-31)
+	//   value size  : uint16 (bits 32-47)
+	value uint64
+
+	// A byte slice is 24 bytes. We are trying to save space here.
+	keyOffset uint32 // Immutable. No need to lock to access key.
+	keySize   uint16 // Immutable. No need to lock to access key.
+
+	// Height of the tower.
+	height uint16
+
+	// Most nodes do not need to use the full height of the tower, since the
+	// probability of each successive level decreases exponentially. Because
+	// these elements are never accessed, they do not need to be allocated.
+	// Therefore, when a node is allocated in the arena, its memory footprint
+	// is deliberately truncated to not include unneeded tower elements.
+	//
+	// All accesses to elements should use CAS operations, with no need to lock.
+	tower [maxHeight]uint32
+}
+
+// Skiplist maps keys to values (in memory)
+type Skiplist struct {
+	height int32 // Current height. 1 <= height <= kMaxHeight. CAS.
+	head   *node
+	ref    int32
+	arena  *Arena
+}
+
+// IncrRef increases the refcount
+func (s *Skiplist) IncrRef() {
+	atomic.AddInt32(&s.ref, 1)
+}
+
+// DecrRef decrements the refcount, deallocating the Skiplist when done using it
+func (s *Skiplist) DecrRef() {
+	newRef := atomic.AddInt32(&s.ref, -1)
+	if newRef > 0 {
+		return
+	}
+
+	s.arena.reset()
+	// Indicate we are closed. Good for testing.  Also, lets GC reclaim memory. Race condition
+	// here would suggest we are accessing skiplist when we are supposed to have no reference!
+	s.arena = nil
+	// Since the head references the arena's buf, as long as the head is kept around
+	// GC can't release the buf.
+	s.head = nil
+}
+
+func newNode(arena *Arena, key []byte, v y.ValueStruct, height int) *node {
+	// The base level is already allocated in the node struct.
+	offset := arena.putNode(height)
+	node := arena.getNode(offset)
+	node.keyOffset = arena.putKey(key)
+	node.keySize = uint16(len(key))
+	node.height = uint16(height)
+	node.value = encodeValue(arena.putVal(v), v.EncodedSize())
+	return node
+}
+
+func encodeValue(valOffset uint32, valSize uint16) uint64 {
+	return uint64(valSize)<<32 | uint64(valOffset)
+}
+
+func decodeValue(value uint64) (valOffset uint32, valSize uint16) {
+	valOffset = uint32(value)
+	valSize = uint16(value >> 32)
+	return
+}
+
+// NewSkiplist makes a new empty skiplist, with a given arena size
+func NewSkiplist(arenaSize int64) *Skiplist {
+	arena := newArena(arenaSize)
+	head := newNode(arena, nil, y.ValueStruct{}, maxHeight)
+	return &Skiplist{
+		height: 1,
+		head:   head,
+		arena:  arena,
+		ref:    1,
+	}
+}
+
+func (s *node) getValueOffset() (uint32, uint16) {
+	value := atomic.LoadUint64(&s.value)
+	return decodeValue(value)
+}
+
+func (s *node) key(arena *Arena) []byte {
+	return arena.getKey(s.keyOffset, s.keySize)
+}
+
+func (s *node) setValue(arena *Arena, v y.ValueStruct) {
+	valOffset := arena.putVal(v)
+	value := encodeValue(valOffset, v.EncodedSize())
+	atomic.StoreUint64(&s.value, value)
+}
+
+func (s *node) getNextOffset(h int) uint32 {
+	return atomic.LoadUint32(&s.tower[h])
+}
+
+func (s *node) casNextOffset(h int, old, val uint32) bool {
+	return atomic.CompareAndSwapUint32(&s.tower[h], old, val)
+}
+
+// Returns true if key is strictly > n.key.
+// If n is nil, this is an "end" marker and we return false.
+//func (s *Skiplist) keyIsAfterNode(key []byte, n *node) bool {
+//	y.AssertTrue(n != s.head)
+//	return n != nil && y.CompareKeys(key, n.key) > 0
+//}
+
+func (s *Skiplist) randomHeight() int {
+	h := 1
+	for h < maxHeight && z.FastRand() <= heightIncrease {
+		h++
+	}
+	return h
+}
+
+func (s *Skiplist) getNext(nd *node, height int) *node {
+	return s.arena.getNode(nd.getNextOffset(height))
+}
+
+// findNear finds the node near to key.
+// If less=true, it finds rightmost node such that node.key < key (if allowEqual=false) or
+// node.key <= key (if allowEqual=true).
+// If less=false, it finds leftmost node such that node.key > key (if allowEqual=false) or
+// node.key >= key (if allowEqual=true).
+// Returns the node found. The bool returned is true if the node has key equal to given key.
+func (s *Skiplist) findNear(key []byte, less bool, allowEqual bool) (*node, bool) {
+	x := s.head
+	level := int(s.getHeight() - 1)
+	for {
+		// Assume x.key < key.
+		next := s.getNext(x, level)
+		if next == nil {
+			// x.key < key < END OF LIST
+			if level > 0 {
+				// Can descend further to iterate closer to the end.
+				level--
+				continue
+			}
+			// Level=0. Cannot descend further. Let's return something that makes sense.
+			if !less {
+				return nil, false
+			}
+			// Try to return x. Make sure it is not a head node.
+			if x == s.head {
+				return nil, false
+			}
+			return x, false
+		}
+
+		nextKey := next.key(s.arena)
+		cmp := y.CompareKeys(key, nextKey)
+		if cmp > 0 {
+			// x.key < next.key < key. We can continue to move right.
+			x = next
+			continue
+		}
+		if cmp == 0 {
+			// x.key < key == next.key.
+			if allowEqual {
+				return next, true
+			}
+			if !less {
+				// We want >, so go to base level to grab the next bigger note.
+				return s.getNext(next, 0), false
+			}
+			// We want <. If not base level, we should go closer in the next level.
+			if level > 0 {
+				level--
+				continue
+			}
+			// On base level. Return x.
+			if x == s.head {
+				return nil, false
+			}
+			return x, false
+		}
+		// cmp < 0. In other words, x.key < key < next.
+		if level > 0 {
+			level--
+			continue
+		}
+		// At base level. Need to return something.
+		if !less {
+			return next, false
+		}
+		// Try to return x. Make sure it is not a head node.
+		if x == s.head {
+			return nil, false
+		}
+		return x, false
+	}
+}
+
+// findSpliceForLevel returns (outBefore, outAfter) with outBefore.key <= key <= outAfter.key.
+// The input "before" tells us where to start looking.
+// If we found a node with the same key, then we return outBefore = outAfter.
+// Otherwise, outBefore.key < key < outAfter.key.
+func (s *Skiplist) findSpliceForLevel(key []byte, before *node, level int) (*node, *node) {
+	for {
+		// Assume before.key < key.
+		next := s.getNext(before, level)
+		if next == nil {
+			return before, next
+		}
+		nextKey := next.key(s.arena)
+		cmp := y.CompareKeys(key, nextKey)
+		if cmp == 0 {
+			// Equality case.
+			return next, next
+		}
+		if cmp < 0 {
+			// before.key < key < next.key. We are done for this level.
+			return before, next
+		}
+		before = next // Keep moving right on this level.
+	}
+}
+
+func (s *Skiplist) getHeight() int32 {
+	return atomic.LoadInt32(&s.height)
+}
+
+// Put inserts the key-value pair.
+func (s *Skiplist) Put(key []byte, v y.ValueStruct) {
+	// Since we allow overwrite, we may not need to create a new node. We might not even need to
+	// increase the height. Let's defer these actions.
+
+	listHeight := s.getHeight()
+	var prev [maxHeight + 1]*node
+	var next [maxHeight + 1]*node
+	prev[listHeight] = s.head
+	next[listHeight] = nil
+	for i := int(listHeight) - 1; i >= 0; i-- {
+		// Use higher level to speed up for current level.
+		prev[i], next[i] = s.findSpliceForLevel(key, prev[i+1], i)
+		if prev[i] == next[i] {
+			prev[i].setValue(s.arena, v)
+			return
+		}
+	}
+
+	// We do need to create a new node.
+	height := s.randomHeight()
+	x := newNode(s.arena, key, v, height)
+
+	// Try to increase s.height via CAS.
+	listHeight = s.getHeight()
+	for height > int(listHeight) {
+		if atomic.CompareAndSwapInt32(&s.height, listHeight, int32(height)) {
+			// Successfully increased skiplist.height.
+			break
+		}
+		listHeight = s.getHeight()
+	}
+
+	// We always insert from the base level and up. After you add a node in base level, we cannot
+	// create a node in the level above because it would have discovered the node in the base level.
+	for i := 0; i < height; i++ {
+		for {
+			if prev[i] == nil {
+				y.AssertTrue(i > 1) // This cannot happen in base level.
+				// We haven't computed prev, next for this level because height exceeds old listHeight.
+				// For these levels, we expect the lists to be sparse, so we can just search from head.
+				prev[i], next[i] = s.findSpliceForLevel(key, s.head, i)
+				// Someone adds the exact same key before we are able to do so. This can only happen on
+				// the base level. But we know we are not on the base level.
+				y.AssertTrue(prev[i] != next[i])
+			}
+			nextOffset := s.arena.getNodeOffset(next[i])
+			x.tower[i] = nextOffset
+			if prev[i].casNextOffset(i, nextOffset, s.arena.getNodeOffset(x)) {
+				// Managed to insert x between prev[i] and next[i]. Go to the next level.
+				break
+			}
+			// CAS failed. We need to recompute prev and next.
+			// It is unlikely to be helpful to try to use a different level as we redo the search,
+			// because it is unlikely that lots of nodes are inserted between prev[i] and next[i].
+			prev[i], next[i] = s.findSpliceForLevel(key, prev[i], i)
+			if prev[i] == next[i] {
+				y.AssertTruef(i == 0, "Equality can happen only on base level: %d", i)
+				prev[i].setValue(s.arena, v)
+				return
+			}
+		}
+	}
+}
+
+// Empty returns if the Skiplist is empty.
+func (s *Skiplist) Empty() bool {
+	return s.findLast() == nil
+}
+
+// findLast returns the last element. If head (empty list), we return nil. All the find functions
+// will NEVER return the head nodes.
+func (s *Skiplist) findLast() *node {
+	n := s.head
+	level := int(s.getHeight()) - 1
+	for {
+		next := s.getNext(n, level)
+		if next != nil {
+			n = next
+			continue
+		}
+		if level == 0 {
+			if n == s.head {
+				return nil
+			}
+			return n
+		}
+		level--
+	}
+}
+
+// Get gets the value associated with the key. It returns a valid value if it finds equal or earlier
+// version of the same key.
+func (s *Skiplist) Get(key []byte) y.ValueStruct {
+	n, _ := s.findNear(key, false, true) // findGreaterOrEqual.
+	if n == nil {
+		return y.ValueStruct{}
+	}
+
+	nextKey := s.arena.getKey(n.keyOffset, n.keySize)
+	if !y.SameKey(key, nextKey) {
+		return y.ValueStruct{}
+	}
+
+	valOffset, valSize := n.getValueOffset()
+	vs := s.arena.getVal(valOffset, valSize)
+	vs.Version = y.ParseTs(nextKey)
+	return vs
+}
+
+// NewIterator returns a skiplist iterator.  You have to Close() the iterator.
+func (s *Skiplist) NewIterator() *Iterator {
+	s.IncrRef()
+	return &Iterator{list: s}
+}
+
+// MemSize returns the size of the Skiplist in terms of how much memory is used within its internal
+// arena.
+func (s *Skiplist) MemSize() int64 { return s.arena.size() }
+
+// Iterator is an iterator over skiplist object. For new objects, you just
+// need to initialize Iterator.list.
+type Iterator struct {
+	list *Skiplist
+	n    *node
+}
+
+// Close frees the resources held by the iterator
+func (s *Iterator) Close() error {
+	s.list.DecrRef()
+	return nil
+}
+
+// Valid returns true iff the iterator is positioned at a valid node.
+func (s *Iterator) Valid() bool { return s.n != nil }
+
+// Key returns the key at the current position.
+func (s *Iterator) Key() []byte {
+	return s.list.arena.getKey(s.n.keyOffset, s.n.keySize)
+}
+
+// Value returns value.
+func (s *Iterator) Value() y.ValueStruct {
+	valOffset, valSize := s.n.getValueOffset()
+	return s.list.arena.getVal(valOffset, valSize)
+}
+
+// Next advances to the next position.
+func (s *Iterator) Next() {
+	y.AssertTrue(s.Valid())
+	s.n = s.list.getNext(s.n, 0)
+}
+
+// Prev advances to the previous position.
+func (s *Iterator) Prev() {
+	y.AssertTrue(s.Valid())
+	s.n, _ = s.list.findNear(s.Key(), true, false) // find <. No equality allowed.
+}
+
+// Seek advances to the first entry with a key >= target.
+func (s *Iterator) Seek(target []byte) {
+	s.n, _ = s.list.findNear(target, false, true) // find >=.
+}
+
+// SeekForPrev finds an entry with key <= target.
+func (s *Iterator) SeekForPrev(target []byte) {
+	s.n, _ = s.list.findNear(target, true, true) // find <=.
+}
+
+// SeekToFirst seeks position at the first entry in list.
+// Final state of iterator is Valid() iff list is not empty.
+func (s *Iterator) SeekToFirst() {
+	s.n = s.list.getNext(s.list.head, 0)
+}
+
+// SeekToLast seeks position at the last entry in list.
+// Final state of iterator is Valid() iff list is not empty.
+func (s *Iterator) SeekToLast() {
+	s.n = s.list.findLast()
+}
+
+// UniIterator is a unidirectional memtable iterator. It is a thin wrapper around
+// Iterator. We like to keep Iterator as before, because it is more powerful and
+// we might support bidirectional iterators in the future.
+type UniIterator struct {
+	iter     *Iterator
+	reversed bool
+}
+
+// NewUniIterator returns a UniIterator.
+func (s *Skiplist) NewUniIterator(reversed bool) *UniIterator {
+	return &UniIterator{
+		iter:     s.NewIterator(),
+		reversed: reversed,
+	}
+}
+
+// Next implements y.Interface
+func (s *UniIterator) Next() {
+	if !s.reversed {
+		s.iter.Next()
+	} else {
+		s.iter.Prev()
+	}
+}
+
+// Rewind implements y.Interface
+func (s *UniIterator) Rewind() {
+	if !s.reversed {
+		s.iter.SeekToFirst()
+	} else {
+		s.iter.SeekToLast()
+	}
+}
+
+// Seek implements y.Interface
+func (s *UniIterator) Seek(key []byte) {
+	if !s.reversed {
+		s.iter.Seek(key)
+	} else {
+		s.iter.SeekForPrev(key)
+	}
+}
+
+// Key implements y.Interface
+func (s *UniIterator) Key() []byte { return s.iter.Key() }
+
+// Value implements y.Interface
+func (s *UniIterator) Value() y.ValueStruct { return s.iter.Value() }
+
+// Valid implements y.Interface
+func (s *UniIterator) Valid() bool { return s.iter.Valid() }
+
+// Close implements y.Interface (and frees up the iter's resources)
+func (s *UniIterator) Close() error { return s.iter.Close() }