65aaa607ab
Change made by: - running "gvt fetch" on each of the packages mentioned in Godeps/Godeps.json - `rm -rf Godeps` - tweaking the build scripts to not mention Godeps - tweaking the build scripts to test `./lib/...`, `./cmd/...` explicitly (to avoid testing vendor) - tweaking the build scripts to not juggle GOPATH for Godeps and instead set GO15VENDOREXPERIMENT. This also results in some updated packages at the same time I bet. Building with Go 1.3 and 1.4 still *works* but won't use our vendored dependencies - the user needs to have the actual packages in their GOPATH then, which they'll get with a normal "go get". Building with Go 1.6+ will get our vendored dependencies by default even when not using our build script, which is nice. By doing this we gain some freedom in that we can pick and choose manually what to include in vendor, as it's not based on just dependency analysis of our own code. This is also a risk as we might pick up dependencies we are unaware of, as the build may work locally with those packages present in GOPATH. On the other hand the build server will detect this as it has no packages in it's GOPATH beyond what is included in the repo. Recommended tool to manage dependencies is github.com/FiloSottile/gvt.
117 lines
2.9 KiB
Go
117 lines
2.9 KiB
Go
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
|
|
// All rights reserved.
|
|
//
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file.
|
|
|
|
package filter
|
|
|
|
import (
|
|
"github.com/syndtr/goleveldb/leveldb/util"
|
|
)
|
|
|
|
func bloomHash(key []byte) uint32 {
|
|
return util.Hash(key, 0xbc9f1d34)
|
|
}
|
|
|
|
type bloomFilter int
|
|
|
|
// The bloom filter serializes its parameters and is backward compatible
|
|
// with respect to them. Therefor, its parameters are not added to its
|
|
// name.
|
|
func (bloomFilter) Name() string {
|
|
return "leveldb.BuiltinBloomFilter"
|
|
}
|
|
|
|
func (f bloomFilter) Contains(filter, key []byte) bool {
|
|
nBytes := len(filter) - 1
|
|
if nBytes < 1 {
|
|
return false
|
|
}
|
|
nBits := uint32(nBytes * 8)
|
|
|
|
// Use the encoded k so that we can read filters generated by
|
|
// bloom filters created using different parameters.
|
|
k := filter[nBytes]
|
|
if k > 30 {
|
|
// Reserved for potentially new encodings for short bloom filters.
|
|
// Consider it a match.
|
|
return true
|
|
}
|
|
|
|
kh := bloomHash(key)
|
|
delta := (kh >> 17) | (kh << 15) // Rotate right 17 bits
|
|
for j := uint8(0); j < k; j++ {
|
|
bitpos := kh % nBits
|
|
if (uint32(filter[bitpos/8]) & (1 << (bitpos % 8))) == 0 {
|
|
return false
|
|
}
|
|
kh += delta
|
|
}
|
|
return true
|
|
}
|
|
|
|
func (f bloomFilter) NewGenerator() FilterGenerator {
|
|
// Round down to reduce probing cost a little bit.
|
|
k := uint8(f * 69 / 100) // 0.69 =~ ln(2)
|
|
if k < 1 {
|
|
k = 1
|
|
} else if k > 30 {
|
|
k = 30
|
|
}
|
|
return &bloomFilterGenerator{
|
|
n: int(f),
|
|
k: k,
|
|
}
|
|
}
|
|
|
|
type bloomFilterGenerator struct {
|
|
n int
|
|
k uint8
|
|
|
|
keyHashes []uint32
|
|
}
|
|
|
|
func (g *bloomFilterGenerator) Add(key []byte) {
|
|
// Use double-hashing to generate a sequence of hash values.
|
|
// See analysis in [Kirsch,Mitzenmacher 2006].
|
|
g.keyHashes = append(g.keyHashes, bloomHash(key))
|
|
}
|
|
|
|
func (g *bloomFilterGenerator) Generate(b Buffer) {
|
|
// Compute bloom filter size (in both bits and bytes)
|
|
nBits := uint32(len(g.keyHashes) * g.n)
|
|
// For small n, we can see a very high false positive rate. Fix it
|
|
// by enforcing a minimum bloom filter length.
|
|
if nBits < 64 {
|
|
nBits = 64
|
|
}
|
|
nBytes := (nBits + 7) / 8
|
|
nBits = nBytes * 8
|
|
|
|
dest := b.Alloc(int(nBytes) + 1)
|
|
dest[nBytes] = g.k
|
|
for _, kh := range g.keyHashes {
|
|
delta := (kh >> 17) | (kh << 15) // Rotate right 17 bits
|
|
for j := uint8(0); j < g.k; j++ {
|
|
bitpos := kh % nBits
|
|
dest[bitpos/8] |= (1 << (bitpos % 8))
|
|
kh += delta
|
|
}
|
|
}
|
|
|
|
g.keyHashes = g.keyHashes[:0]
|
|
}
|
|
|
|
// NewBloomFilter creates a new initialized bloom filter for given
|
|
// bitsPerKey.
|
|
//
|
|
// Since bitsPerKey is persisted individually for each bloom filter
|
|
// serialization, bloom filters are backwards compatible with respect to
|
|
// changing bitsPerKey. This means that no big performance penalty will
|
|
// be experienced when changing the parameter. See documentation for
|
|
// opt.Options.Filter for more information.
|
|
func NewBloomFilter(bitsPerKey int) Filter {
|
|
return bloomFilter(bitsPerKey)
|
|
}
|