syncthing/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session.go

397 lines
10 KiB
Go
Raw Normal View History

2014-07-06 14:46:48 +02:00
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package leveldb
import (
"errors"
"io"
"os"
"sync"
"sync/atomic"
"github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/syndtr/goleveldb/leveldb/journal"
"github.com/syndtr/goleveldb/leveldb/opt"
"github.com/syndtr/goleveldb/leveldb/storage"
"github.com/syndtr/goleveldb/leveldb/util"
)
// session represent a persistent database session.
type session struct {
// Need 64-bit alignment.
stFileNum uint64 // current unused file number
stJournalNum uint64 // current journal file number; need external synchronization
stPrevJournalNum uint64 // prev journal file number; no longer used; for compatibility with older version of leveldb
stSeq uint64 // last mem compacted seq; need external synchronization
stTempFileNum uint64
stor storage.Storage
storLock util.Releaser
o *opt.Options
2014-07-06 23:13:10 +02:00
icmp *iComparer
2014-07-06 14:46:48 +02:00
tops *tOps
manifest *journal.Writer
manifestWriter storage.Writer
manifestFile storage.File
2014-07-23 08:31:36 +02:00
stCptrs [kNumLevels]iKey // compact pointers; need external synchronization
2014-07-06 14:46:48 +02:00
stVersion *version // current version
vmu sync.Mutex
}
2014-07-23 08:31:36 +02:00
// Creates new initialized session instance.
2014-07-06 14:46:48 +02:00
func newSession(stor storage.Storage, o *opt.Options) (s *session, err error) {
if stor == nil {
return nil, os.ErrInvalid
}
storLock, err := stor.Lock()
if err != nil {
return
}
s = &session{
stor: stor,
storLock: storLock,
}
s.setOptions(o)
s.tops = newTableOps(s, s.o.GetCachedOpenFiles())
2014-07-06 14:46:48 +02:00
s.setVersion(&version{s: s})
s.log("log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock D·DeletedEntry L·Level Q·SeqNum T·TimeElapsed")
return
}
// Close session.
func (s *session) close() {
s.tops.close()
if bc := s.o.GetBlockCache(); bc != nil {
bc.Purge(nil)
}
if s.manifest != nil {
s.manifest.Close()
}
if s.manifestWriter != nil {
s.manifestWriter.Close()
}
s.manifest = nil
s.manifestWriter = nil
s.manifestFile = nil
s.stVersion = nil
}
2014-07-23 08:31:36 +02:00
// Release session lock.
2014-07-06 14:46:48 +02:00
func (s *session) release() {
s.storLock.Release()
}
// Create a new database session; need external synchronization.
func (s *session) create() error {
// create manifest
return s.newManifest(nil, nil)
}
// Recover a database session; need external synchronization.
func (s *session) recover() (err error) {
defer func() {
if os.IsNotExist(err) {
// Don't return os.ErrNotExist if the underlying storage contains
// other files that belong to LevelDB. So the DB won't get trashed.
if files, _ := s.stor.GetFiles(storage.TypeAll); len(files) > 0 {
err = ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: manifest file missing")}
}
}
}()
file, err := s.stor.GetManifest()
if err != nil {
return
}
reader, err := file.Open()
if err != nil {
return
}
defer reader.Close()
strict := s.o.GetStrict(opt.StrictManifest)
jr := journal.NewReader(reader, dropper{s, file}, strict, true)
staging := s.version_NB().newStaging()
rec := &sessionRecord{}
for {
var r io.Reader
r, err = jr.Next()
if err != nil {
if err == io.EOF {
err = nil
break
}
return
}
err = rec.decode(r)
if err == nil {
// save compact pointers
2014-07-23 08:31:36 +02:00
for _, r := range rec.compactionPointers {
s.stCptrs[r.level] = iKey(r.ikey)
2014-07-06 14:46:48 +02:00
}
// commit record to version staging
staging.commit(rec)
} else if strict {
return ErrCorrupted{Type: CorruptedManifest, Err: err}
} else {
s.logf("manifest error: %v (skipped)", err)
}
rec.resetCompactionPointers()
rec.resetAddedTables()
rec.resetDeletedTables()
}
switch {
case !rec.has(recComparer):
return ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: manifest missing comparer name")}
2014-07-06 23:13:10 +02:00
case rec.comparer != s.icmp.uName():
return ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: comparer mismatch, " + "want '" + s.icmp.uName() + "', " + "got '" + rec.comparer + "'")}
2014-07-06 14:46:48 +02:00
case !rec.has(recNextNum):
return ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: manifest missing next file number")}
case !rec.has(recJournalNum):
return ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: manifest missing journal file number")}
case !rec.has(recSeq):
return ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: manifest missing seq number")}
}
s.manifestFile = file
s.setVersion(staging.finish())
s.setFileNum(rec.nextNum)
s.recordCommited(rec)
return nil
}
// Commit session; need external synchronization.
func (s *session) commit(r *sessionRecord) (err error) {
// spawn new version based on current version
nv := s.version_NB().spawn(r)
if s.manifest == nil {
// manifest journal writer not yet created, create one
err = s.newManifest(r, nv)
} else {
err = s.flushManifest(r)
}
// finally, apply new version if no error rise
if err == nil {
s.setVersion(nv)
}
return
}
// Pick a compaction based on current state; need external synchronization.
func (s *session) pickCompaction() *compaction {
v := s.version_NB()
var level int
var t0 tFiles
if v.cScore >= 1 {
level = v.cLevel
2014-07-23 08:31:36 +02:00
cptr := s.stCptrs[level]
tables := v.tables[level]
for _, t := range tables {
if cptr == nil || s.icmp.Compare(t.imax, cptr) > 0 {
2014-07-06 14:46:48 +02:00
t0 = append(t0, t)
break
}
}
if len(t0) == 0 {
2014-07-23 08:31:36 +02:00
t0 = append(t0, tables[0])
2014-07-06 14:46:48 +02:00
}
} else {
if p := atomic.LoadPointer(&v.cSeek); p != nil {
ts := (*tSet)(p)
level = ts.level
t0 = append(t0, ts.table)
} else {
return nil
}
}
2014-07-23 08:31:36 +02:00
c := &compaction{s: s, v: v, level: level}
2014-07-06 14:46:48 +02:00
if level == 0 {
2014-07-23 08:31:36 +02:00
imin, imax := t0.getRange(s.icmp)
t0 = v.tables[0].getOverlaps(t0[:0], s.icmp, imin.ukey(), imax.ukey(), true)
2014-07-06 14:46:48 +02:00
}
c.tables[0] = t0
c.expand()
return c
}
// Create compaction from given level and range; need external synchronization.
2014-07-23 08:31:36 +02:00
func (s *session) getCompactionRange(level int, umin, umax []byte) *compaction {
2014-07-06 14:46:48 +02:00
v := s.version_NB()
2014-07-23 08:31:36 +02:00
t0 := v.tables[level].getOverlaps(nil, s.icmp, umin, umax, level == 0)
2014-07-06 14:46:48 +02:00
if len(t0) == 0 {
return nil
}
// Avoid compacting too much in one shot in case the range is large.
// But we cannot do this for level-0 since level-0 files can overlap
// and we must not pick one file and drop another older file if the
// two files overlap.
if level > 0 {
limit := uint64(kMaxTableSize)
total := uint64(0)
for i, t := range t0 {
total += t.size
if total >= limit {
s.logf("table@compaction limiting F·%d -> F·%d", len(t0), i+1)
t0 = t0[:i+1]
break
}
}
}
2014-07-23 08:31:36 +02:00
c := &compaction{s: s, v: v, level: level}
2014-07-06 14:46:48 +02:00
c.tables[0] = t0
c.expand()
return c
}
2014-07-23 08:31:36 +02:00
// compaction represent a compaction state.
2014-07-06 14:46:48 +02:00
type compaction struct {
2014-07-23 08:31:36 +02:00
s *session
v *version
2014-07-06 14:46:48 +02:00
level int
tables [2]tFiles
gp tFiles
gpidx int
seenKey bool
overlappedBytes uint64
2014-07-23 08:31:36 +02:00
imin, imax iKey
2014-07-06 14:46:48 +02:00
tPtrs [kNumLevels]int
}
// Expand compacted tables; need external synchronization.
func (c *compaction) expand() {
level := c.level
2014-07-23 08:31:36 +02:00
vt0, vt1 := c.v.tables[level], c.v.tables[level+1]
2014-07-06 14:46:48 +02:00
t0, t1 := c.tables[0], c.tables[1]
2014-07-23 08:31:36 +02:00
imin, imax := t0.getRange(c.s.icmp)
t1 = vt1.getOverlaps(t1, c.s.icmp, imin.ukey(), imax.ukey(), false)
// Get entire range covered by compaction.
amin, amax := append(t0, t1...).getRange(c.s.icmp)
2014-07-06 14:46:48 +02:00
// See if we can grow the number of inputs in "level" without
// changing the number of "level+1" files we pick up.
if len(t1) > 0 {
2014-07-23 08:31:36 +02:00
exp0 := vt0.getOverlaps(nil, c.s.icmp, amin.ukey(), amax.ukey(), level == 0)
2014-07-06 14:46:48 +02:00
if len(exp0) > len(t0) && t1.size()+exp0.size() < kExpCompactionMaxBytes {
2014-07-23 08:31:36 +02:00
xmin, xmax := exp0.getRange(c.s.icmp)
exp1 := vt1.getOverlaps(nil, c.s.icmp, xmin.ukey(), xmax.ukey(), false)
2014-07-06 14:46:48 +02:00
if len(exp1) == len(t1) {
2014-07-23 08:31:36 +02:00
c.s.logf("table@compaction expanding L%d+L%d (F·%d S·%s)+(F·%d S·%s) -> (F·%d S·%s)+(F·%d S·%s)",
2014-07-06 14:46:48 +02:00
level, level+1, len(t0), shortenb(int(t0.size())), len(t1), shortenb(int(t1.size())),
len(exp0), shortenb(int(exp0.size())), len(exp1), shortenb(int(exp1.size())))
2014-07-23 08:31:36 +02:00
imin, imax = xmin, xmax
2014-07-06 14:46:48 +02:00
t0, t1 = exp0, exp1
2014-07-23 08:31:36 +02:00
amin, amax = append(t0, t1...).getRange(c.s.icmp)
2014-07-06 14:46:48 +02:00
}
}
}
// Compute the set of grandparent files that overlap this compaction
// (parent == level+1; grandparent == level+2)
if level+2 < kNumLevels {
2014-07-23 08:31:36 +02:00
c.gp = c.v.tables[level+2].getOverlaps(c.gp, c.s.icmp, amin.ukey(), amax.ukey(), false)
2014-07-06 14:46:48 +02:00
}
c.tables[0], c.tables[1] = t0, t1
2014-07-23 08:31:36 +02:00
c.imin, c.imax = imin, imax
2014-07-06 14:46:48 +02:00
}
// Check whether compaction is trivial.
func (c *compaction) trivial() bool {
return len(c.tables[0]) == 1 && len(c.tables[1]) == 0 && c.gp.size() <= kMaxGrandParentOverlapBytes
}
2014-07-23 08:31:36 +02:00
func (c *compaction) baseLevelForKey(ukey []byte) bool {
for level, tables := range c.v.tables[c.level+2:] {
for c.tPtrs[level] < len(tables) {
t := tables[c.tPtrs[level]]
if c.s.icmp.uCompare(ukey, t.imax.ukey()) <= 0 {
// We've advanced far enough.
if c.s.icmp.uCompare(ukey, t.imin.ukey()) >= 0 {
// Key falls in this file's range, so definitely not base level.
2014-07-06 14:46:48 +02:00
return false
}
break
}
c.tPtrs[level]++
}
}
return true
}
2014-07-23 08:31:36 +02:00
func (c *compaction) shouldStopBefore(ikey iKey) bool {
2014-07-06 14:46:48 +02:00
for ; c.gpidx < len(c.gp); c.gpidx++ {
gp := c.gp[c.gpidx]
2014-07-23 08:31:36 +02:00
if c.s.icmp.Compare(ikey, gp.imax) <= 0 {
2014-07-06 14:46:48 +02:00
break
}
if c.seenKey {
c.overlappedBytes += gp.size
}
}
c.seenKey = true
if c.overlappedBytes > kMaxGrandParentOverlapBytes {
2014-07-23 08:31:36 +02:00
// Too much overlap for current output; start new output.
2014-07-06 14:46:48 +02:00
c.overlappedBytes = 0
return true
}
return false
}
2014-07-23 08:31:36 +02:00
// Creates an iterator.
2014-07-06 14:46:48 +02:00
func (c *compaction) newIterator() iterator.Iterator {
2014-07-23 08:31:36 +02:00
// Creates iterator slice.
icap := len(c.tables)
2014-07-06 14:46:48 +02:00
if c.level == 0 {
2014-07-23 08:31:36 +02:00
// Special case for level-0
2014-07-06 14:46:48 +02:00
icap = len(c.tables[0]) + 1
}
its := make([]iterator.Iterator, 0, icap)
2014-07-23 08:31:36 +02:00
// Options.
2014-07-06 14:46:48 +02:00
ro := &opt.ReadOptions{
DontFillCache: true,
}
2014-07-23 08:31:36 +02:00
strict := c.s.o.GetStrict(opt.StrictIterator)
2014-07-06 14:46:48 +02:00
2014-07-23 08:31:36 +02:00
for i, tables := range c.tables {
if len(tables) == 0 {
2014-07-06 14:46:48 +02:00
continue
}
2014-07-23 08:31:36 +02:00
// Level-0 is not sorted and may overlaps each other.
if c.level+i == 0 {
for _, t := range tables {
its = append(its, c.s.tops.newIterator(t, nil, ro))
2014-07-06 14:46:48 +02:00
}
} else {
2014-07-23 08:31:36 +02:00
it := iterator.NewIndexedIterator(tables.newIndexIterator(c.s.tops, c.s.icmp, nil, ro), strict, true)
2014-07-06 14:46:48 +02:00
its = append(its, it)
}
}
2014-07-23 08:31:36 +02:00
return iterator.NewMergedIterator(its, c.s.icmp, true)
2014-07-06 14:46:48 +02:00
}