lib/fs: Cache all real-case results (fixes #7270) (#7286)

This commit is contained in:
Simon Frei 2021-01-27 19:25:34 +01:00 committed by GitHub
parent ef0473c091
commit e19d6e993d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 141 additions and 67 deletions

1
go.mod
View File

@ -18,6 +18,7 @@ require (
github.com/gogo/protobuf v1.3.1 github.com/gogo/protobuf v1.3.1
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e
github.com/greatroar/blobloom v0.5.0 github.com/greatroar/blobloom v0.5.0
github.com/hashicorp/golang-lru v0.5.1
github.com/jackpal/gateway v1.0.6 github.com/jackpal/gateway v1.0.6
github.com/jackpal/go-nat-pmp v1.0.2 github.com/jackpal/go-nat-pmp v1.0.2
github.com/julienschmidt/httprouter v1.3.0 github.com/julienschmidt/httprouter v1.3.0

1
go.sum
View File

@ -212,6 +212,7 @@ github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/b
github.com/hashicorp/go-version v1.2.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= github.com/hashicorp/go-version v1.2.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
github.com/hashicorp/go.net v0.0.1/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA0oac0k90= github.com/hashicorp/go.net v0.0.1/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA0oac0k90=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.1 h1:0hERBMJE1eitiLkihrMvRVBYAkpHzc/J3QdDN+dAcgU=
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64=

View File

@ -509,7 +509,7 @@ func TestFolderCheckPath(t *testing.T) {
} }
if err := cfg.CheckPath(); testcase.err != err { if err := cfg.CheckPath(); testcase.err != err {
t.Errorf("unexpected error in case %s: %s != %s", testcase.path, err, testcase.err) t.Errorf("unexpected error in case %s: %s != %v", testcase.path, err, testcase.err)
} }
} }
} }

View File

@ -14,11 +14,14 @@ import (
"strings" "strings"
"sync" "sync"
"time" "time"
lru "github.com/hashicorp/golang-lru"
) )
const ( const (
// How long to consider cached dirnames valid // How long to consider cached dirnames valid
caseCacheTimeout = time.Second caseCacheTimeout = time.Second
caseCacheItemLimit = 4 << 10
) )
type ErrCaseConflict struct { type ErrCaseConflict struct {
@ -352,95 +355,117 @@ func (f *caseFilesystem) checkCaseExisting(name string) error {
type defaultRealCaser struct { type defaultRealCaser struct {
fs Filesystem fs Filesystem
root *caseNode cache caseCache
mut sync.RWMutex
} }
func newDefaultRealCaser(fs Filesystem) *defaultRealCaser { func newDefaultRealCaser(fs Filesystem) *defaultRealCaser {
cache, err := lru.New2Q(caseCacheItemLimit)
// New2Q only errors if given invalid parameters, which we don't.
if err != nil {
panic(err)
}
caser := &defaultRealCaser{ caser := &defaultRealCaser{
fs: fs, fs: fs,
root: &caseNode{name: "."}, cache: caseCache{
TwoQueueCache: cache,
},
} }
return caser return caser
} }
func (r *defaultRealCaser) realCase(name string) (string, error) { func (r *defaultRealCaser) realCase(name string) (string, error) {
out := "." realName := "."
if name == out { if name == realName {
return out, nil return realName, nil
} }
r.mut.Lock()
defer r.mut.Unlock()
node := r.root
for _, comp := range strings.Split(name, string(PathSeparator)) { for _, comp := range strings.Split(name, string(PathSeparator)) {
if node.dirNames == nil || node.expires.Before(time.Now()) { node := r.cache.getExpireAdd(realName)
// Haven't called DirNames yet, or the node has expired
var err error node.once.Do(func() {
node.dirNames, err = r.fs.DirNames(out) dirNames, err := r.fs.DirNames(realName)
if err != nil { if err != nil {
return "", err r.cache.Remove(realName)
node.err = err
return
} }
node.dirNamesLower = make([]string, len(node.dirNames)) num := len(dirNames)
for i, n := range node.dirNames { node.children = make(map[string]struct{}, num)
node.dirNamesLower[i] = UnicodeLowercase(n) node.lowerToReal = make(map[string]string, num)
lastLower := ""
for _, n := range dirNames {
node.children[n] = struct{}{}
lower := UnicodeLowercase(n)
if lower != lastLower {
node.lowerToReal[lower] = n
lastLower = n
}
}
})
if node.err != nil {
return "", node.err
} }
node.expires = time.Now().Add(caseCacheTimeout) // Try to find a direct or case match
node.child = nil if _, ok := node.children[comp]; !ok {
comp, ok = node.lowerToReal[UnicodeLowercase(comp)]
if !ok {
return "", ErrNotExist
}
} }
// If we don't already have a correct cached child, try to find it. realName = filepath.Join(realName, comp)
if node.child == nil || node.child.name != comp {
// Actually loop dirNames to search for a match.
n, err := findCaseInsensitiveMatch(comp, node.dirNames, node.dirNamesLower)
if err != nil {
return "", err
}
node.child = &caseNode{name: n}
} }
node = node.child return realName, nil
out = filepath.Join(out, node.name)
}
return out, nil
} }
func (r *defaultRealCaser) dropCache() { func (r *defaultRealCaser) dropCache() {
r.mut.Lock() r.cache.Purge()
r.root = &caseNode{name: "."}
r.mut.Unlock()
} }
// Both name and the key to children are "Real", case resolved names of the path func newCaseNode() *caseNode {
return &caseNode{
expires: time.Now().Add(caseCacheTimeout),
}
}
// The keys to children are "real", case resolved names of the path
// component this node represents (i.e. containing no path separator). // component this node represents (i.e. containing no path separator).
// The key to results is also a path component, but as given to RealCase, not // lowerToReal is a map of lowercase path components (as in UnicodeLowercase)
// case resolved. // to their corresponding "real", case resolved names.
// A node is created empty and populated using once. If an error occurs the node
// is removed from cache and the error stored in err, such that anyone that
// already got the node doesn't try to access the nil maps.
type caseNode struct { type caseNode struct {
name string
expires time.Time expires time.Time
dirNames []string lowerToReal map[string]string
dirNamesLower []string children map[string]struct{}
child *caseNode once sync.Once
err error
} }
func findCaseInsensitiveMatch(name string, names, namesLower []string) (string, error) { type caseCache struct {
lower := UnicodeLowercase(name) *lru.TwoQueueCache
candidate := "" mut sync.Mutex
for i, n := range names {
if n == name {
return n, nil
} }
if candidate == "" && namesLower[i] == lower {
candidate = n // getExpireAdd gets an entry for the given key. If no entry exists, or it is
// expired a new one is created and added to the cache.
func (c *caseCache) getExpireAdd(key string) *caseNode {
c.mut.Lock()
defer c.mut.Unlock()
v, ok := c.Get(key)
if !ok {
node := newCaseNode()
c.Add(key, node)
return node
} }
node := v.(*caseNode)
if node.expires.Before(time.Now()) {
node = newCaseNode()
c.Add(key, node)
} }
if candidate == "" { return node
return "", ErrNotExist
}
return candidate, nil
} }

View File

@ -163,7 +163,7 @@ func BenchmarkWalkCaseFakeFS100k(b *testing.B) {
b.Run("rawfs", func(b *testing.B) { b.Run("rawfs", func(b *testing.B) {
fakefs := unwrapFilesystem(fsys).(*fakefs) fakefs := unwrapFilesystem(fsys).(*fakefs)
fakefs.resetCounters() fakefs.resetCounters()
benchmarkWalkFakeFS(b, fsys, paths) benchmarkWalkFakeFS(b, fsys, paths, 0, "")
fakefs.reportMetricsPerOp(b) fakefs.reportMetricsPerOp(b)
fakefs.reportMetricsPer(b, entries, "entry") fakefs.reportMetricsPer(b, entries, "entry")
b.ReportAllocs() b.ReportAllocs()
@ -176,14 +176,37 @@ func BenchmarkWalkCaseFakeFS100k(b *testing.B) {
} }
fakefs := unwrapFilesystem(fsys).(*fakefs) fakefs := unwrapFilesystem(fsys).(*fakefs)
fakefs.resetCounters() fakefs.resetCounters()
benchmarkWalkFakeFS(b, casefs, paths) benchmarkWalkFakeFS(b, casefs, paths, 0, "")
fakefs.reportMetricsPerOp(b)
fakefs.reportMetricsPer(b, entries, "entry")
b.ReportAllocs()
})
var otherOpPath string
sep := string(PathSeparator)
longest := 0
for _, p := range paths {
if length := len(strings.Split(p, sep)); length > longest {
otherOpPath = p
longest = length
}
}
otherOpEvery := 1000
b.Run(fmt.Sprintf("casefs-otherOpEvery%v", otherOpEvery), func(b *testing.B) {
// Construct the casefs manually or it will get cached and the benchmark is invalid.
casefs := &caseFilesystem{
Filesystem: fsys,
realCaser: newDefaultRealCaser(fsys),
}
fakefs := unwrapFilesystem(fsys).(*fakefs)
fakefs.resetCounters()
benchmarkWalkFakeFS(b, casefs, paths, otherOpEvery, otherOpPath)
fakefs.reportMetricsPerOp(b) fakefs.reportMetricsPerOp(b)
fakefs.reportMetricsPer(b, entries, "entry") fakefs.reportMetricsPer(b, entries, "entry")
b.ReportAllocs() b.ReportAllocs()
}) })
} }
func benchmarkWalkFakeFS(b *testing.B, fsys Filesystem, paths []string) { func benchmarkWalkFakeFS(b *testing.B, fsys Filesystem, paths []string, otherOpEvery int, otherOpPath string) {
// Simulate a scanner pass over the filesystem. First walk it to // Simulate a scanner pass over the filesystem. First walk it to
// discover all names, then stat each name individually to check if it's // discover all names, then stat each name individually to check if it's
// been deleted or not (pretending that they all existed in the // been deleted or not (pretending that they all existed in the
@ -194,7 +217,7 @@ func benchmarkWalkFakeFS(b *testing.B, fsys Filesystem, paths []string) {
t0 := time.Now() t0 := time.Now()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
if err := doubleWalkFS(fsys, paths); err != nil { if err := doubleWalkFSWithOtherOps(fsys, paths, otherOpEvery, otherOpPath); err != nil {
b.Fatal(err) b.Fatal(err)
} }
} }
@ -250,16 +273,40 @@ func TestStressCaseFS(t *testing.T) {
} }
func doubleWalkFS(fsys Filesystem, paths []string) error { func doubleWalkFS(fsys Filesystem, paths []string) error {
return doubleWalkFSWithOtherOps(fsys, paths, 0, "")
}
func doubleWalkFSWithOtherOps(fsys Filesystem, paths []string, otherOpEvery int, otherOpPath string) error {
i := 0
if err := fsys.Walk("/", func(path string, info FileInfo, err error) error { if err := fsys.Walk("/", func(path string, info FileInfo, err error) error {
i++
if otherOpEvery != 0 && i%otherOpEvery == 0 {
// l.Infoln("AAA", otherOpPath)
if _, err := fsys.Lstat(otherOpPath); err != nil {
return err
}
}
// l.Infoln("CCC", path)
return err return err
}); err != nil { }); err != nil {
return err return err
} }
for _, p := range paths { for _, p := range paths {
for p != "." {
i++
if otherOpEvery != 0 && i%otherOpEvery == 0 {
if _, err := fsys.Lstat(otherOpPath); err != nil {
// l.Infoln("AAA", otherOpPath)
return err
}
}
// l.Infoln("CCC", p)
if _, err := fsys.Lstat(p); err != nil { if _, err := fsys.Lstat(p); err != nil {
return err return err
} }
p = filepath.Dir(p)
}
} }
return nil return nil
} }