Merge pull request #842 from AudriusButkevicius/ignorecache

Cache ignore file matches
This commit is contained in:
Jakob Borg 2014-10-14 12:43:21 +02:00
commit 964d17d05a
9 changed files with 302 additions and 36 deletions

View File

@ -173,6 +173,7 @@ type OptionsConfiguration struct {
RestartOnWakeup bool `xml:"restartOnWakeup" default:"true"`
AutoUpgradeIntervalH int `xml:"autoUpgradeIntervalH" default:"12"` // 0 for off
KeepTemporariesH int `xml:"keepTemporariesH" default:"24"` // 0 for off
CacheIgnoredFiles bool `xml:"cacheIgnoredFiles" default:"true"`
Deprecated_RescanIntervalS int `xml:"rescanIntervalS,omitempty" json:"-"`
Deprecated_UREnabled bool `xml:"urEnabled,omitempty" json:"-"`

View File

@ -51,6 +51,7 @@ func TestDefaultValues(t *testing.T) {
RestartOnWakeup: true,
AutoUpgradeIntervalH: 12,
KeepTemporariesH: 24,
CacheIgnoredFiles: true,
}
cfg := New(device1)
@ -148,6 +149,7 @@ func TestOverriddenValues(t *testing.T) {
RestartOnWakeup: false,
AutoUpgradeIntervalH: 24,
KeepTemporariesH: 48,
CacheIgnoredFiles: false,
}
cfg, err := Load("testdata/overridenvalues.xml", device1)

View File

@ -18,5 +18,6 @@
<restartOnWakeup>false</restartOnWakeup>
<autoUpgradeIntervalH>24</autoUpgradeIntervalH>
<keepTemporariesH>48</keepTemporariesH>
<cacheIgnoredFiles>false</cacheIgnoredFiles>
</options>
</configuration>

View File

@ -23,31 +23,94 @@ import (
"path/filepath"
"regexp"
"strings"
"sync"
"github.com/syncthing/syncthing/internal/fnmatch"
)
var caches = make(map[string]MatcherCache)
type Pattern struct {
match *regexp.Regexp
include bool
}
type Patterns []Pattern
type Matcher struct {
patterns []Pattern
oldMatches map[string]bool
func Load(file string) (Patterns, error) {
seen := make(map[string]bool)
return loadIgnoreFile(file, seen)
newMatches map[string]bool
mut sync.Mutex
}
func Parse(r io.Reader, file string) (Patterns, error) {
type MatcherCache struct {
patterns []Pattern
matches *map[string]bool
}
func Load(file string, cache bool) (*Matcher, error) {
seen := make(map[string]bool)
matcher, err := loadIgnoreFile(file, seen)
if !cache || err != nil {
return matcher, err
}
// Get the current cache object for the given file
cached, ok := caches[file]
if !ok || !patternsEqual(cached.patterns, matcher.patterns) {
// Nothing in cache or a cache mismatch, create a new cache which will
// store matches for the given set of patterns.
// Initialize oldMatches to indicate that we are interested in
// caching.
matcher.oldMatches = make(map[string]bool)
matcher.newMatches = make(map[string]bool)
caches[file] = MatcherCache{
patterns: matcher.patterns,
matches: &matcher.newMatches,
}
return matcher, nil
}
// Patterns haven't changed, so we can reuse the old matches, create a new
// matches map and update the pointer. (This prevents matches map from
// growing indefinately, as we only cache whatever we've matched in the last
// iteration, rather than through runtime history)
matcher.oldMatches = *cached.matches
matcher.newMatches = make(map[string]bool)
cached.matches = &matcher.newMatches
caches[file] = cached
return matcher, nil
}
func Parse(r io.Reader, file string) (*Matcher, error) {
seen := map[string]bool{
file: true,
}
return parseIgnoreFile(r, file, seen)
}
func (l Patterns) Match(file string) bool {
for _, pattern := range l {
func (m *Matcher) Match(file string) (result bool) {
if len(m.patterns) == 0 {
return false
}
// We have old matches map set, means we should do caching
if m.oldMatches != nil {
// Capture the result to the new matches regardless of who returns it
defer func() {
m.mut.Lock()
m.newMatches[file] = result
m.mut.Unlock()
}()
// Check perhaps we've seen this file before, and we already know
// what the outcome is going to be.
result, ok := m.oldMatches[file]
if ok {
return result
}
}
for _, pattern := range m.patterns {
if pattern.match.MatchString(file) {
return pattern.include
}
@ -55,7 +118,7 @@ func (l Patterns) Match(file string) bool {
return false
}
func loadIgnoreFile(file string, seen map[string]bool) (Patterns, error) {
func loadIgnoreFile(file string, seen map[string]bool) (*Matcher, error) {
if seen[file] {
return nil, fmt.Errorf("Multiple include of ignore file %q", file)
}
@ -70,8 +133,8 @@ func loadIgnoreFile(file string, seen map[string]bool) (Patterns, error) {
return parseIgnoreFile(fd, file, seen)
}
func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (Patterns, error) {
var exps Patterns
func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (*Matcher, error) {
var exps Matcher
addPattern := func(line string) error {
include := true
@ -86,27 +149,27 @@ func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (Pa
if err != nil {
return fmt.Errorf("Invalid pattern %q in ignore file", line)
}
exps = append(exps, Pattern{exp, include})
exps.patterns = append(exps.patterns, Pattern{exp, include})
} else if strings.HasPrefix(line, "**/") {
// Add the pattern as is, and without **/ so it matches in current dir
exp, err := fnmatch.Convert(line, fnmatch.FNM_PATHNAME)
if err != nil {
return fmt.Errorf("Invalid pattern %q in ignore file", line)
}
exps = append(exps, Pattern{exp, include})
exps.patterns = append(exps.patterns, Pattern{exp, include})
exp, err = fnmatch.Convert(line[3:], fnmatch.FNM_PATHNAME)
if err != nil {
return fmt.Errorf("Invalid pattern %q in ignore file", line)
}
exps = append(exps, Pattern{exp, include})
exps.patterns = append(exps.patterns, Pattern{exp, include})
} else if strings.HasPrefix(line, "#include ") {
includeFile := filepath.Join(filepath.Dir(currentFile), line[len("#include "):])
includes, err := loadIgnoreFile(includeFile, seen)
if err != nil {
return err
} else {
exps = append(exps, includes...)
exps.patterns = append(exps.patterns, includes.patterns...)
}
} else {
// Path name or pattern, add it so it matches files both in
@ -115,13 +178,13 @@ func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (Pa
if err != nil {
return fmt.Errorf("Invalid pattern %q in ignore file", line)
}
exps = append(exps, Pattern{exp, include})
exps.patterns = append(exps.patterns, Pattern{exp, include})
exp, err = fnmatch.Convert("**/"+line, fnmatch.FNM_PATHNAME)
if err != nil {
return fmt.Errorf("Invalid pattern %q in ignore file", line)
}
exps = append(exps, Pattern{exp, include})
exps.patterns = append(exps.patterns, Pattern{exp, include})
}
return nil
}
@ -155,5 +218,17 @@ func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (Pa
}
}
return exps, nil
return &exps, nil
}
func patternsEqual(a, b []Pattern) bool {
if len(a) != len(b) {
return false
}
for i := range a {
if a[i].include != b[i].include || a[i].match.String() != b[i].match.String() {
return false
}
}
return true
}

View File

@ -13,19 +13,19 @@
// You should have received a copy of the GNU General Public License along
// with this program. If not, see <http://www.gnu.org/licenses/>.
package ignore_test
package ignore
import (
"bytes"
"io/ioutil"
"os"
"path/filepath"
"runtime"
"testing"
"github.com/syncthing/syncthing/internal/ignore"
)
func TestIgnore(t *testing.T) {
pats, err := ignore.Load("testdata/.stignore")
pats, err := Load("testdata/.stignore", true)
if err != nil {
t.Fatal(err)
}
@ -72,7 +72,7 @@ func TestExcludes(t *testing.T) {
i*2
!ign2
`
pats, err := ignore.Parse(bytes.NewBufferString(stignore), ".stignore")
pats, err := Parse(bytes.NewBufferString(stignore), ".stignore")
if err != nil {
t.Fatal(err)
}
@ -112,7 +112,7 @@ func TestBadPatterns(t *testing.T) {
}
for _, pat := range badPatterns {
parsed, err := ignore.Parse(bytes.NewBufferString(pat), ".stignore")
parsed, err := Parse(bytes.NewBufferString(pat), ".stignore")
if err == nil {
t.Errorf("No error for pattern %q: %v", pat, parsed)
}
@ -120,7 +120,7 @@ func TestBadPatterns(t *testing.T) {
}
func TestCaseSensitivity(t *testing.T) {
ign, _ := ignore.Parse(bytes.NewBufferString("test"), ".stignore")
ign, _ := Parse(bytes.NewBufferString("test"), ".stignore")
match := []string{"test"}
dontMatch := []string{"foo"}
@ -145,6 +145,144 @@ func TestCaseSensitivity(t *testing.T) {
}
}
func TestCaching(t *testing.T) {
fd1, err := ioutil.TempFile("", "")
if err != nil {
t.Fatal(err)
}
fd2, err := ioutil.TempFile("", "")
if err != nil {
t.Fatal(err)
}
defer fd1.Close()
defer fd2.Close()
defer os.Remove(fd1.Name())
defer os.Remove(fd2.Name())
_, err = fd1.WriteString("/x/\n#include " + filepath.Base(fd2.Name()) + "\n")
if err != nil {
t.Fatal(err)
}
fd2.WriteString("/y/\n")
pats, err := Load(fd1.Name(), true)
if err != nil {
t.Fatal(err)
}
if pats.oldMatches == nil || len(pats.oldMatches) != 0 {
t.Fatal("Expected empty map")
}
if pats.newMatches == nil || len(pats.newMatches) != 0 {
t.Fatal("Expected empty map")
}
if len(pats.patterns) != 4 {
t.Fatal("Incorrect number of patterns loaded", len(pats.patterns), "!=", 4)
}
// Cache some outcomes
for _, letter := range []string{"a", "b", "x", "y"} {
pats.Match(letter)
}
if len(pats.newMatches) != 4 {
t.Fatal("Expected 4 cached results")
}
// Reload file, expect old outcomes to be provided
pats, err = Load(fd1.Name(), true)
if err != nil {
t.Fatal(err)
}
if len(pats.oldMatches) != 4 {
t.Fatal("Expected 4 cached results")
}
// Match less this time
for _, letter := range []string{"b", "x", "y"} {
pats.Match(letter)
}
if len(pats.newMatches) != 3 {
t.Fatal("Expected 3 cached results")
}
// Reload file, expect the new outcomes to be provided
pats, err = Load(fd1.Name(), true)
if err != nil {
t.Fatal(err)
}
if len(pats.oldMatches) != 3 {
t.Fatal("Expected 3 cached results", len(pats.oldMatches))
}
// Modify the include file, expect empty cache
fd2.WriteString("/z/\n")
pats, err = Load(fd1.Name(), true)
if err != nil {
t.Fatal(err)
}
if len(pats.oldMatches) != 0 {
t.Fatal("Expected 0 cached results")
}
// Cache some outcomes again
for _, letter := range []string{"b", "x", "y"} {
pats.Match(letter)
}
// Verify that outcomes provided on next laod
pats, err = Load(fd1.Name(), true)
if err != nil {
t.Fatal(err)
}
if len(pats.oldMatches) != 3 {
t.Fatal("Expected 3 cached results")
}
// Modify the root file, expect cache to be invalidated
fd1.WriteString("/a/\n")
pats, err = Load(fd1.Name(), true)
if err != nil {
t.Fatal(err)
}
if len(pats.oldMatches) != 0 {
t.Fatal("Expected cache invalidation")
}
// Cache some outcomes again
for _, letter := range []string{"b", "x", "y"} {
pats.Match(letter)
}
// Verify that outcomes provided on next laod
pats, err = Load(fd1.Name(), true)
if err != nil {
t.Fatal(err)
}
if len(pats.oldMatches) != 3 {
t.Fatal("Expected 3 cached results")
}
}
func TestCommentsAndBlankLines(t *testing.T) {
stignore := `
// foo
@ -157,8 +295,8 @@ func TestCommentsAndBlankLines(t *testing.T) {
`
pats, _ := ignore.Parse(bytes.NewBufferString(stignore), ".stignore")
if len(pats) > 0 {
pats, _ := Parse(bytes.NewBufferString(stignore), ".stignore")
if len(pats.patterns) > 0 {
t.Errorf("Expected no patterns")
}
}
@ -181,10 +319,59 @@ flamingo
*.crow
*.crow
`
pats, _ := ignore.Parse(bytes.NewBufferString(stignore), ".stignore")
pats, _ := Parse(bytes.NewBufferString(stignore), ".stignore")
b.ResetTimer()
for i := 0; i < b.N; i++ {
result = pats.Match("filename")
}
}
func BenchmarkMatchCached(b *testing.B) {
stignore := `
.frog
.frog*
.frogfox
.whale
.whale/*
.dolphin
.dolphin/*
~ferret~.*
.ferret.*
flamingo.*
flamingo
*.crow
*.crow
`
// Caches per file, hence write the patterns to a file.
fd, err := ioutil.TempFile("", "")
if err != nil {
b.Fatal(err)
}
_, err = fd.WriteString(stignore)
defer fd.Close()
defer os.Remove(fd.Name())
if err != nil {
b.Fatal(err)
}
// Load the patterns
pats, err := Load(fd.Name(), true)
if err != nil {
b.Fatal(err)
}
// Cache the outcome for "filename"
pats.Match("filename")
// This load should now load the cached outcomes as the set of patterns
// has not changed.
pats, err = Load(fd.Name(), true)
if err != nil {
b.Fatal(err)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
result = pats.Match("filename")
}
}

View File

@ -93,7 +93,7 @@ type Model struct {
folderDevices map[string][]protocol.DeviceID // folder -> deviceIDs
deviceFolders map[protocol.DeviceID][]string // deviceID -> folders
deviceStatRefs map[protocol.DeviceID]*stats.DeviceStatisticsReference // deviceID -> statsRef
folderIgnores map[string]ignore.Patterns // folder -> list of ignore patterns
folderIgnores map[string]*ignore.Matcher // folder -> matcher object
folderRunners map[string]service // folder -> puller or scanner
fmut sync.RWMutex // protects the above
@ -130,7 +130,7 @@ func NewModel(cfg *config.ConfigWrapper, deviceName, clientName, clientVersion s
folderDevices: make(map[string][]protocol.DeviceID),
deviceFolders: make(map[protocol.DeviceID][]string),
deviceStatRefs: make(map[protocol.DeviceID]*stats.DeviceStatisticsReference),
folderIgnores: make(map[string]ignore.Patterns),
folderIgnores: make(map[string]*ignore.Matcher),
folderRunners: make(map[string]service),
folderState: make(map[string]folderState),
folderStateChanged: make(map[string]time.Time),
@ -834,7 +834,7 @@ func (m *Model) deviceWasSeen(deviceID protocol.DeviceID) {
m.deviceStatRef(deviceID).WasSeen()
}
func sendIndexes(conn protocol.Connection, folder string, fs *files.Set, ignores ignore.Patterns) {
func sendIndexes(conn protocol.Connection, folder string, fs *files.Set, ignores *ignore.Matcher) {
deviceID := conn.ID()
name := conn.Name()
var err error
@ -859,7 +859,7 @@ func sendIndexes(conn protocol.Connection, folder string, fs *files.Set, ignores
}
}
func sendIndexTo(initial bool, minLocalVer uint64, conn protocol.Connection, folder string, fs *files.Set, ignores ignore.Patterns) (uint64, error) {
func sendIndexTo(initial bool, minLocalVer uint64, conn protocol.Connection, folder string, fs *files.Set, ignores *ignore.Matcher) (uint64, error) {
deviceID := conn.ID()
name := conn.Name()
batch := make([]protocol.FileInfo, 0, indexBatchSize)
@ -1011,7 +1011,7 @@ func (m *Model) ScanFolderSub(folder, sub string) error {
fs, ok := m.folderFiles[folder]
dir := m.folderCfgs[folder].Path
ignores, _ := ignore.Load(filepath.Join(dir, ".stignore"))
ignores, _ := ignore.Load(filepath.Join(dir, ".stignore"), m.cfg.Options().CacheIgnoredFiles)
m.folderIgnores[folder] = ignores
w := &scanner.Walker{

View File

@ -390,7 +390,7 @@ func TestIgnores(t *testing.T) {
}
db, _ := leveldb.Open(storage.NewMemStorage(), nil)
m := NewModel(nil, "device", "syncthing", "dev", db)
m := NewModel(config.Wrap("", config.Configuration{}), "device", "syncthing", "dev", db)
m.AddFolder(config.FolderConfiguration{ID: "default", Path: "testdata"})
expected := []string{

View File

@ -36,7 +36,7 @@ type Walker struct {
// BlockSize controls the size of the block used when hashing.
BlockSize int
// List of patterns to ignore
Ignores ignore.Patterns
Ignores *ignore.Matcher
// If TempNamer is not nil, it is used to ignore tempory files when walking.
TempNamer TempNamer
// If CurrentFiler is not nil, it is queried for the current file before rescanning.

View File

@ -58,7 +58,7 @@ func init() {
}
func TestWalkSub(t *testing.T) {
ignores, err := ignore.Load("testdata/.stignore")
ignores, err := ignore.Load("testdata/.stignore", false)
if err != nil {
t.Fatal(err)
}
@ -93,7 +93,7 @@ func TestWalkSub(t *testing.T) {
}
func TestWalk(t *testing.T) {
ignores, err := ignore.Load("testdata/.stignore")
ignores, err := ignore.Load("testdata/.stignore", false)
if err != nil {
t.Fatal(err)
}