aboutsummaryrefslogtreecommitdiff
path: root/vendor/github.com/syndtr/goleveldb/leveldb/table.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/syndtr/goleveldb/leveldb/table.go')
-rw-r--r--vendor/github.com/syndtr/goleveldb/leveldb/table.go529
1 files changed, 529 insertions, 0 deletions
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/table.go b/vendor/github.com/syndtr/goleveldb/leveldb/table.go
new file mode 100644
index 0000000..81d18a5
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/table.go
@@ -0,0 +1,529 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+ "fmt"
+ "sort"
+ "sync/atomic"
+
+ "github.com/syndtr/goleveldb/leveldb/cache"
+ "github.com/syndtr/goleveldb/leveldb/iterator"
+ "github.com/syndtr/goleveldb/leveldb/opt"
+ "github.com/syndtr/goleveldb/leveldb/storage"
+ "github.com/syndtr/goleveldb/leveldb/table"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// tFile holds basic information about a table.
+type tFile struct {
+ fd storage.FileDesc
+ seekLeft int32
+ size int64
+ imin, imax internalKey
+}
+
+// Returns true if given key is after largest key of this table.
+func (t *tFile) after(icmp *iComparer, ukey []byte) bool {
+ return ukey != nil && icmp.uCompare(ukey, t.imax.ukey()) > 0
+}
+
+// Returns true if given key is before smallest key of this table.
+func (t *tFile) before(icmp *iComparer, ukey []byte) bool {
+ return ukey != nil && icmp.uCompare(ukey, t.imin.ukey()) < 0
+}
+
+// Returns true if given key range overlaps with this table key range.
+func (t *tFile) overlaps(icmp *iComparer, umin, umax []byte) bool {
+ return !t.after(icmp, umin) && !t.before(icmp, umax)
+}
+
+// Cosumes one seek and return current seeks left.
+func (t *tFile) consumeSeek() int32 {
+ return atomic.AddInt32(&t.seekLeft, -1)
+}
+
+// Creates new tFile.
+func newTableFile(fd storage.FileDesc, size int64, imin, imax internalKey) *tFile {
+ f := &tFile{
+ fd: fd,
+ size: size,
+ imin: imin,
+ imax: imax,
+ }
+
+ // We arrange to automatically compact this file after
+ // a certain number of seeks. Let's assume:
+ // (1) One seek costs 10ms
+ // (2) Writing or reading 1MB costs 10ms (100MB/s)
+ // (3) A compaction of 1MB does 25MB of IO:
+ // 1MB read from this level
+ // 10-12MB read from next level (boundaries may be misaligned)
+ // 10-12MB written to next level
+ // This implies that 25 seeks cost the same as the compaction
+ // of 1MB of data. I.e., one seek costs approximately the
+ // same as the compaction of 40KB of data. We are a little
+ // conservative and allow approximately one seek for every 16KB
+ // of data before triggering a compaction.
+ f.seekLeft = int32(size / 16384)
+ if f.seekLeft < 100 {
+ f.seekLeft = 100
+ }
+
+ return f
+}
+
+func tableFileFromRecord(r atRecord) *tFile {
+ return newTableFile(storage.FileDesc{storage.TypeTable, r.num}, r.size, r.imin, r.imax)
+}
+
+// tFiles hold multiple tFile.
+type tFiles []*tFile
+
+func (tf tFiles) Len() int { return len(tf) }
+func (tf tFiles) Swap(i, j int) { tf[i], tf[j] = tf[j], tf[i] }
+
+func (tf tFiles) nums() string {
+ x := "[ "
+ for i, f := range tf {
+ if i != 0 {
+ x += ", "
+ }
+ x += fmt.Sprint(f.fd.Num)
+ }
+ x += " ]"
+ return x
+}
+
+// Returns true if i smallest key is less than j.
+// This used for sort by key in ascending order.
+func (tf tFiles) lessByKey(icmp *iComparer, i, j int) bool {
+ a, b := tf[i], tf[j]
+ n := icmp.Compare(a.imin, b.imin)
+ if n == 0 {
+ return a.fd.Num < b.fd.Num
+ }
+ return n < 0
+}
+
+// Returns true if i file number is greater than j.
+// This used for sort by file number in descending order.
+func (tf tFiles) lessByNum(i, j int) bool {
+ return tf[i].fd.Num > tf[j].fd.Num
+}
+
+// Sorts tables by key in ascending order.
+func (tf tFiles) sortByKey(icmp *iComparer) {
+ sort.Sort(&tFilesSortByKey{tFiles: tf, icmp: icmp})
+}
+
+// Sorts tables by file number in descending order.
+func (tf tFiles) sortByNum() {
+ sort.Sort(&tFilesSortByNum{tFiles: tf})
+}
+
+// Returns sum of all tables size.
+func (tf tFiles) size() (sum int64) {
+ for _, t := range tf {
+ sum += t.size
+ }
+ return sum
+}
+
+// Searches smallest index of tables whose its smallest
+// key is after or equal with given key.
+func (tf tFiles) searchMin(icmp *iComparer, ikey internalKey) int {
+ return sort.Search(len(tf), func(i int) bool {
+ return icmp.Compare(tf[i].imin, ikey) >= 0
+ })
+}
+
+// Searches smallest index of tables whose its largest
+// key is after or equal with given key.
+func (tf tFiles) searchMax(icmp *iComparer, ikey internalKey) int {
+ return sort.Search(len(tf), func(i int) bool {
+ return icmp.Compare(tf[i].imax, ikey) >= 0
+ })
+}
+
+// Returns true if given key range overlaps with one or more
+// tables key range. If unsorted is true then binary search will not be used.
+func (tf tFiles) overlaps(icmp *iComparer, umin, umax []byte, unsorted bool) bool {
+ if unsorted {
+ // Check against all files.
+ for _, t := range tf {
+ if t.overlaps(icmp, umin, umax) {
+ return true
+ }
+ }
+ return false
+ }
+
+ i := 0
+ if len(umin) > 0 {
+ // Find the earliest possible internal key for min.
+ i = tf.searchMax(icmp, makeInternalKey(nil, umin, keyMaxSeq, keyTypeSeek))
+ }
+ if i >= len(tf) {
+ // Beginning of range is after all files, so no overlap.
+ return false
+ }
+ return !tf[i].before(icmp, umax)
+}
+
+// Returns tables whose its key range overlaps with given key range.
+// Range will be expanded if ukey found hop across tables.
+// If overlapped is true then the search will be restarted if umax
+// expanded.
+// The dst content will be overwritten.
+func (tf tFiles) getOverlaps(dst tFiles, icmp *iComparer, umin, umax []byte, overlapped bool) tFiles {
+ dst = dst[:0]
+ for i := 0; i < len(tf); {
+ t := tf[i]
+ if t.overlaps(icmp, umin, umax) {
+ if umin != nil && icmp.uCompare(t.imin.ukey(), umin) < 0 {
+ umin = t.imin.ukey()
+ dst = dst[:0]
+ i = 0
+ continue
+ } else if umax != nil && icmp.uCompare(t.imax.ukey(), umax) > 0 {
+ umax = t.imax.ukey()
+ // Restart search if it is overlapped.
+ if overlapped {
+ dst = dst[:0]
+ i = 0
+ continue
+ }
+ }
+
+ dst = append(dst, t)
+ }
+ i++
+ }
+
+ return dst
+}
+
+// Returns tables key range.
+func (tf tFiles) getRange(icmp *iComparer) (imin, imax internalKey) {
+ for i, t := range tf {
+ if i == 0 {
+ imin, imax = t.imin, t.imax
+ continue
+ }
+ if icmp.Compare(t.imin, imin) < 0 {
+ imin = t.imin
+ }
+ if icmp.Compare(t.imax, imax) > 0 {
+ imax = t.imax
+ }
+ }
+
+ return
+}
+
+// Creates iterator index from tables.
+func (tf tFiles) newIndexIterator(tops *tOps, icmp *iComparer, slice *util.Range, ro *opt.ReadOptions) iterator.IteratorIndexer {
+ if slice != nil {
+ var start, limit int
+ if slice.Start != nil {
+ start = tf.searchMax(icmp, internalKey(slice.Start))
+ }
+ if slice.Limit != nil {
+ limit = tf.searchMin(icmp, internalKey(slice.Limit))
+ } else {
+ limit = tf.Len()
+ }
+ tf = tf[start:limit]
+ }
+ return iterator.NewArrayIndexer(&tFilesArrayIndexer{
+ tFiles: tf,
+ tops: tops,
+ icmp: icmp,
+ slice: slice,
+ ro: ro,
+ })
+}
+
+// Tables iterator index.
+type tFilesArrayIndexer struct {
+ tFiles
+ tops *tOps
+ icmp *iComparer
+ slice *util.Range
+ ro *opt.ReadOptions
+}
+
+func (a *tFilesArrayIndexer) Search(key []byte) int {
+ return a.searchMax(a.icmp, internalKey(key))
+}
+
+func (a *tFilesArrayIndexer) Get(i int) iterator.Iterator {
+ if i == 0 || i == a.Len()-1 {
+ return a.tops.newIterator(a.tFiles[i], a.slice, a.ro)
+ }
+ return a.tops.newIterator(a.tFiles[i], nil, a.ro)
+}
+
+// Helper type for sortByKey.
+type tFilesSortByKey struct {
+ tFiles
+ icmp *iComparer
+}
+
+func (x *tFilesSortByKey) Less(i, j int) bool {
+ return x.lessByKey(x.icmp, i, j)
+}
+
+// Helper type for sortByNum.
+type tFilesSortByNum struct {
+ tFiles
+}
+
+func (x *tFilesSortByNum) Less(i, j int) bool {
+ return x.lessByNum(i, j)
+}
+
+// Table operations.
+type tOps struct {
+ s *session
+ noSync bool
+ cache *cache.Cache
+ bcache *cache.Cache
+ bpool *util.BufferPool
+}
+
+// Creates an empty table and returns table writer.
+func (t *tOps) create() (*tWriter, error) {
+ fd := storage.FileDesc{storage.TypeTable, t.s.allocFileNum()}
+ fw, err := t.s.stor.Create(fd)
+ if err != nil {
+ return nil, err
+ }
+ return &tWriter{
+ t: t,
+ fd: fd,
+ w: fw,
+ tw: table.NewWriter(fw, t.s.o.Options),
+ }, nil
+}
+
+// Builds table from src iterator.
+func (t *tOps) createFrom(src iterator.Iterator) (f *tFile, n int, err error) {
+ w, err := t.create()
+ if err != nil {
+ return
+ }
+
+ defer func() {
+ if err != nil {
+ w.drop()
+ }
+ }()
+
+ for src.Next() {
+ err = w.append(src.Key(), src.Value())
+ if err != nil {
+ return
+ }
+ }
+ err = src.Error()
+ if err != nil {
+ return
+ }
+
+ n = w.tw.EntriesLen()
+ f, err = w.finish()
+ return
+}
+
+// Opens table. It returns a cache handle, which should
+// be released after use.
+func (t *tOps) open(f *tFile) (ch *cache.Handle, err error) {
+ ch = t.cache.Get(0, uint64(f.fd.Num), func() (size int, value cache.Value) {
+ var r storage.Reader
+ r, err = t.s.stor.Open(f.fd)
+ if err != nil {
+ return 0, nil
+ }
+
+ var bcache *cache.NamespaceGetter
+ if t.bcache != nil {
+ bcache = &cache.NamespaceGetter{Cache: t.bcache, NS: uint64(f.fd.Num)}
+ }
+
+ var tr *table.Reader
+ tr, err = table.NewReader(r, f.size, f.fd, bcache, t.bpool, t.s.o.Options)
+ if err != nil {
+ r.Close()
+ return 0, nil
+ }
+ return 1, tr
+
+ })
+ if ch == nil && err == nil {
+ err = ErrClosed
+ }
+ return
+}
+
+// Finds key/value pair whose key is greater than or equal to the
+// given key.
+func (t *tOps) find(f *tFile, key []byte, ro *opt.ReadOptions) (rkey, rvalue []byte, err error) {
+ ch, err := t.open(f)
+ if err != nil {
+ return nil, nil, err
+ }
+ defer ch.Release()
+ return ch.Value().(*table.Reader).Find(key, true, ro)
+}
+
+// Finds key that is greater than or equal to the given key.
+func (t *tOps) findKey(f *tFile, key []byte, ro *opt.ReadOptions) (rkey []byte, err error) {
+ ch, err := t.open(f)
+ if err != nil {
+ return nil, err
+ }
+ defer ch.Release()
+ return ch.Value().(*table.Reader).FindKey(key, true, ro)
+}
+
+// Returns approximate offset of the given key.
+func (t *tOps) offsetOf(f *tFile, key []byte) (offset int64, err error) {
+ ch, err := t.open(f)
+ if err != nil {
+ return
+ }
+ defer ch.Release()
+ return ch.Value().(*table.Reader).OffsetOf(key)
+}
+
+// Creates an iterator from the given table.
+func (t *tOps) newIterator(f *tFile, slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+ ch, err := t.open(f)
+ if err != nil {
+ return iterator.NewEmptyIterator(err)
+ }
+ iter := ch.Value().(*table.Reader).NewIterator(slice, ro)
+ iter.SetReleaser(ch)
+ return iter
+}
+
+// Removes table from persistent storage. It waits until
+// no one use the the table.
+func (t *tOps) remove(f *tFile) {
+ t.cache.Delete(0, uint64(f.fd.Num), func() {
+ if err := t.s.stor.Remove(f.fd); err != nil {
+ t.s.logf("table@remove removing @%d %q", f.fd.Num, err)
+ } else {
+ t.s.logf("table@remove removed @%d", f.fd.Num)
+ }
+ if t.bcache != nil {
+ t.bcache.EvictNS(uint64(f.fd.Num))
+ }
+ })
+}
+
+// Closes the table ops instance. It will close all tables,
+// regadless still used or not.
+func (t *tOps) close() {
+ t.bpool.Close()
+ t.cache.Close()
+ if t.bcache != nil {
+ t.bcache.CloseWeak()
+ }
+}
+
+// Creates new initialized table ops instance.
+func newTableOps(s *session) *tOps {
+ var (
+ cacher cache.Cacher
+ bcache *cache.Cache
+ bpool *util.BufferPool
+ )
+ if s.o.GetOpenFilesCacheCapacity() > 0 {
+ cacher = cache.NewLRU(s.o.GetOpenFilesCacheCapacity())
+ }
+ if !s.o.GetDisableBlockCache() {
+ var bcacher cache.Cacher
+ if s.o.GetBlockCacheCapacity() > 0 {
+ bcacher = cache.NewLRU(s.o.GetBlockCacheCapacity())
+ }
+ bcache = cache.NewCache(bcacher)
+ }
+ if !s.o.GetDisableBufferPool() {
+ bpool = util.NewBufferPool(s.o.GetBlockSize() + 5)
+ }
+ return &tOps{
+ s: s,
+ noSync: s.o.GetNoSync(),
+ cache: cache.NewCache(cacher),
+ bcache: bcache,
+ bpool: bpool,
+ }
+}
+
+// tWriter wraps the table writer. It keep track of file descriptor
+// and added key range.
+type tWriter struct {
+ t *tOps
+
+ fd storage.FileDesc
+ w storage.Writer
+ tw *table.Writer
+
+ first, last []byte
+}
+
+// Append key/value pair to the table.
+func (w *tWriter) append(key, value []byte) error {
+ if w.first == nil {
+ w.first = append([]byte{}, key...)
+ }
+ w.last = append(w.last[:0], key...)
+ return w.tw.Append(key, value)
+}
+
+// Returns true if the table is empty.
+func (w *tWriter) empty() bool {
+ return w.first == nil
+}
+
+// Closes the storage.Writer.
+func (w *tWriter) close() {
+ if w.w != nil {
+ w.w.Close()
+ w.w = nil
+ }
+}
+
+// Finalizes the table and returns table file.
+func (w *tWriter) finish() (f *tFile, err error) {
+ defer w.close()
+ err = w.tw.Close()
+ if err != nil {
+ return
+ }
+ if !w.t.noSync {
+ err = w.w.Sync()
+ if err != nil {
+ return
+ }
+ }
+ f = newTableFile(w.fd, int64(w.tw.BytesLen()), internalKey(w.first), internalKey(w.last))
+ return
+}
+
+// Drops the table.
+func (w *tWriter) drop() {
+ w.close()
+ w.t.s.stor.Remove(w.fd)
+ w.t.s.reuseFileNum(w.fd.Num)
+ w.tw = nil
+ w.first = nil
+ w.last = nil
+}