diff --git a/internal/config/config.go b/internal/config/config.go index d791c0623..9819d155d 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -145,6 +145,7 @@ type OptionsConfiguration struct { URAccepted int `xml:"urAccepted"` // Accepted usage reporting version; 0 for off (undecided), -1 for off (permanently) RestartOnWakeup bool `xml:"restartOnWakeup" default:"true"` AutoUpgradeIntervalH int `xml:"autoUpgradeIntervalH" default:"12"` // 0 for off + KeepTemporariesH int `xml:"keepTemporariesH" default:"24"` // 0 for off Deprecated_RescanIntervalS int `xml:"rescanIntervalS,omitempty" json:"-"` Deprecated_UREnabled bool `xml:"urEnabled,omitempty" json:"-"` diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 68c4213c2..eb488040f 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -49,6 +49,7 @@ func TestDefaultValues(t *testing.T) { UPnPRenewal: 30, RestartOnWakeup: true, AutoUpgradeIntervalH: 12, + KeepTemporariesH: 24, } cfg := New("test", device1) @@ -141,6 +142,7 @@ func TestOverriddenValues(t *testing.T) { UPnPRenewal: 15, RestartOnWakeup: false, AutoUpgradeIntervalH: 24, + KeepTemporariesH: 48, } cfg, err := Load("testdata/overridenvalues.xml", device1) diff --git a/internal/config/testdata/overridenvalues.xml b/internal/config/testdata/overridenvalues.xml index 84d2999ff..9d5340fbd 100755 --- a/internal/config/testdata/overridenvalues.xml +++ b/internal/config/testdata/overridenvalues.xml @@ -17,5 +17,6 @@ 15 false 24 + 48 diff --git a/internal/model/model.go b/internal/model/model.go index 8d38c6f6b..410034422 100644 --- a/internal/model/model.go +++ b/internal/model/model.go @@ -1001,7 +1001,7 @@ func (m *Model) ScanFolderSub(folder, sub string) error { Dir: dir, Sub: sub, Ignores: ignores, - BlockSize: scanner.StandardBlockSize, + BlockSize: protocol.BlockSize, TempNamer: defTempNamer, CurrentFiler: cFiler{m, folder}, IgnorePerms: m.folderCfgs[folder].IgnorePerms, diff --git a/internal/model/puller.go b/internal/model/puller.go index d2f3c854d..b2b3dfd06 100644 --- a/internal/model/puller.go +++ b/internal/model/puller.go @@ -412,12 +412,62 @@ func (p *Puller) handleFile(file protocol.FileInfo, copyChan chan<- copyBlocksSt tempName := filepath.Join(p.dir, defTempNamer.TempName(file.Name)) realName := filepath.Join(p.dir, file.Name) + var reuse bool + + // Check for an old temporary file which might have some blocks we could + // reuse. + tempBlocks, err := scanner.HashFile(tempName, protocol.BlockSize) + if err == nil { + // Check for any reusable blocks in the temp file + tempCopyBlocks, _ := scanner.BlockDiff(tempBlocks, file.Blocks) + + // block.String() returns a string unique to the block + existingBlocks := make(map[string]bool, len(tempCopyBlocks)) + for _, block := range tempCopyBlocks { + existingBlocks[block.String()] = true + } + + // Since the blocks are already there, we don't need to copy them + // nor we need to pull them, hence discard blocks which are already + // there, if they are exactly the same... + var newCopyBlocks []protocol.BlockInfo + for _, block := range copyBlocks { + _, ok := existingBlocks[block.String()] + if !ok { + newCopyBlocks = append(newCopyBlocks, block) + } + } + + var newPullBlocks []protocol.BlockInfo + for _, block := range pullBlocks { + _, ok := existingBlocks[block.String()] + if !ok { + newPullBlocks = append(newPullBlocks, block) + } + } + + // If any blocks could be reused, let the sharedpullerstate know + // which flags it is expected to set on the file. + // Also update the list of work for the routines. + if len(copyBlocks) != len(newCopyBlocks) || len(pullBlocks) != len(newPullBlocks) { + reuse = true + copyBlocks = newCopyBlocks + pullBlocks = newPullBlocks + } else { + // Otherwise, discard the file ourselves in order for the + // sharedpuller not to panic when it fails to exlusively create a + // file which already exists + os.Remove(tempName) + } + } + s := sharedPullerState{ file: file, folder: p.folder, tempName: tempName, realName: realName, pullNeeded: len(pullBlocks), + reuse: reuse, } if len(copyBlocks) > 0 { s.copyNeeded = 1 @@ -469,7 +519,7 @@ func (p *Puller) shortcutFile(file protocol.FileInfo) { // copierRoutine reads pullerStates until the in channel closes and performs // the relevant copy. func (p *Puller) copierRoutine(in <-chan copyBlocksState, out chan<- *sharedPullerState) { - buf := make([]byte, scanner.StandardBlockSize) + buf := make([]byte, protocol.BlockSize) nextFile: for state := range in { @@ -575,7 +625,7 @@ func (p *Puller) finisherRoutine(in <-chan *sharedPullerState) { l.Warnln("puller: final:", err) continue } - err = scanner.Verify(fd, scanner.StandardBlockSize, state.file.Blocks) + err = scanner.Verify(fd, protocol.BlockSize, state.file.Blocks) fd.Close() if err != nil { os.Remove(state.tempName) @@ -628,12 +678,14 @@ func (p *Puller) finisherRoutine(in <-chan *sharedPullerState) { // clean deletes orphaned temporary files func (p *Puller) clean() { + keep := time.Duration(p.model.cfg.Options.KeepTemporariesH) * time.Hour + now := time.Now() filepath.Walk(p.dir, func(path string, info os.FileInfo, err error) error { if err != nil { return err } - if info.Mode().IsRegular() && defTempNamer.IsTemporary(path) { + if info.Mode().IsRegular() && defTempNamer.IsTemporary(path) && info.ModTime().Add(keep).Before(now) { os.Remove(path) } diff --git a/internal/model/puller_test.go b/internal/model/puller_test.go new file mode 100644 index 000000000..fe08f324c --- /dev/null +++ b/internal/model/puller_test.go @@ -0,0 +1,173 @@ +// Copyright (C) 2014 Jakob Borg and Contributors (see the CONTRIBUTORS file). +// +// This program is free software: you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along +// with this program. If not, see . + +package model + +import ( + "testing" + + "github.com/syncthing/syncthing/internal/config" + "github.com/syncthing/syncthing/internal/protocol" + + "github.com/syndtr/goleveldb/leveldb" + "github.com/syndtr/goleveldb/leveldb/storage" +) + +var blocks = []protocol.BlockInfo{ + {Hash: []uint8{0xfa, 0x43, 0x23, 0x9b, 0xce, 0xe7, 0xb9, 0x7c, 0xa6, 0x2f, 0x0, 0x7c, 0xc6, 0x84, 0x87, 0x56, 0xa, 0x39, 0xe1, 0x9f, 0x74, 0xf3, 0xdd, 0xe7, 0x48, 0x6d, 0xb3, 0xf9, 0x8d, 0xf8, 0xe4, 0x71}}, // Zero'ed out block + {Offset: 0, Size: 0x20000, Hash: []uint8{0x7e, 0xad, 0xbc, 0x36, 0xae, 0xbb, 0xcf, 0x74, 0x43, 0xe2, 0x7a, 0x5a, 0x4b, 0xb8, 0x5b, 0xce, 0xe6, 0x9e, 0x1e, 0x10, 0xf9, 0x8a, 0xbc, 0x77, 0x95, 0x2, 0x29, 0x60, 0x9e, 0x96, 0xae, 0x6c}}, + {Offset: 131072, Size: 0x20000, Hash: []uint8{0x3c, 0xc4, 0x20, 0xf4, 0xb, 0x2e, 0xcb, 0xb9, 0x5d, 0xce, 0x34, 0xa8, 0xc3, 0x92, 0xea, 0xf3, 0xda, 0x88, 0x33, 0xee, 0x7a, 0xb6, 0xe, 0xf1, 0x82, 0x5e, 0xb0, 0xa9, 0x26, 0xa9, 0xc0, 0xef}}, + {Offset: 262144, Size: 0x20000, Hash: []uint8{0x76, 0xa8, 0xc, 0x69, 0xd7, 0x5c, 0x52, 0xfd, 0xdf, 0x55, 0xef, 0x44, 0xc1, 0xd6, 0x25, 0x48, 0x4d, 0x98, 0x48, 0x4d, 0xaa, 0x50, 0xf6, 0x6b, 0x32, 0x47, 0x55, 0x81, 0x6b, 0xed, 0xee, 0xfb}}, + {Offset: 393216, Size: 0x20000, Hash: []uint8{0x44, 0x1e, 0xa4, 0xf2, 0x8d, 0x1f, 0xc3, 0x1b, 0x9d, 0xa5, 0x18, 0x5e, 0x59, 0x1b, 0xd8, 0x5c, 0xba, 0x7d, 0xb9, 0x8d, 0x70, 0x11, 0x5c, 0xea, 0xa1, 0x57, 0x4d, 0xcb, 0x3c, 0x5b, 0xf8, 0x6c}}, + {Offset: 524288, Size: 0x20000, Hash: []uint8{0x8, 0x40, 0xd0, 0x5e, 0x80, 0x0, 0x0, 0x7c, 0x8b, 0xb3, 0x8b, 0xf7, 0x7b, 0x23, 0x26, 0x28, 0xab, 0xda, 0xcf, 0x86, 0x8f, 0xc2, 0x8a, 0x39, 0xc6, 0xe6, 0x69, 0x59, 0x97, 0xb6, 0x1a, 0x43}}, + {Offset: 655360, Size: 0x20000, Hash: []uint8{0x38, 0x8e, 0x44, 0xcb, 0x30, 0xd8, 0x90, 0xf, 0xce, 0x7, 0x4b, 0x58, 0x86, 0xde, 0xce, 0x59, 0xa2, 0x46, 0xd2, 0xf9, 0xba, 0xaf, 0x35, 0x87, 0x38, 0xdf, 0xd2, 0xd, 0xf9, 0x45, 0xed, 0x91}}, + {Offset: 786432, Size: 0x20000, Hash: []uint8{0x32, 0x28, 0xcd, 0xf, 0x37, 0x21, 0xe5, 0xd4, 0x1e, 0x58, 0x87, 0x73, 0x8e, 0x36, 0xdf, 0xb2, 0x70, 0x78, 0x56, 0xc3, 0x42, 0xff, 0xf7, 0x8f, 0x37, 0x95, 0x0, 0x26, 0xa, 0xac, 0x54, 0x72}}, + {Offset: 917504, Size: 0x20000, Hash: []uint8{0x96, 0x6b, 0x15, 0x6b, 0xc4, 0xf, 0x19, 0x18, 0xca, 0xbb, 0x5f, 0xd6, 0xbb, 0xa2, 0xc6, 0x2a, 0xac, 0xbb, 0x8a, 0xb9, 0xce, 0xec, 0x4c, 0xdb, 0x78, 0xec, 0x57, 0x5d, 0x33, 0xf9, 0x8e, 0xaf}}, +} + +// Layout of the files: (indexes from the above array) +// 12345678 - Required file +// 02005008 - Existing file (currently in the index) +// 02340070 - Temp file on the disk + +func TestHandleFile(t *testing.T) { + // After the diff between required and existing we should: + // Copy: 2, 5, 8 + // Pull: 1, 3, 4, 6, 7 + + // Create existing file, and update local index + existingFile := protocol.FileInfo{ + Name: "filex", + Flags: 0, + Modified: 0, + Blocks: []protocol.BlockInfo{ + blocks[0], blocks[2], blocks[0], blocks[0], + blocks[5], blocks[0], blocks[0], blocks[8], + }, + } + + // Create target file + requiredFile := existingFile + requiredFile.Blocks = blocks[1:] + + db, _ := leveldb.Open(storage.NewMemStorage(), nil) + m := NewModel(&config.Configuration{}, "device", "syncthing", "dev", db) + m.AddFolder(config.FolderConfiguration{ID: "default", Path: "testdata"}) + m.updateLocal("default", existingFile) + + p := Puller{ + folder: "default", + dir: "testdata", + model: m, + } + + copyChan := make(chan copyBlocksState, 1) // Copy chan gets all blocks needed to copy in a wrapper struct + pullChan := make(chan pullBlockState, 5) // Pull chan gets blocks one by one + + p.handleFile(requiredFile, copyChan, pullChan) + + // Receive the results + toCopy := <-copyChan + toPull := []pullBlockState{<-pullChan, <-pullChan, <-pullChan, <-pullChan, <-pullChan} + + select { + case <-pullChan: + t.Error("Channel not empty!") + default: + } + + if len(toCopy.blocks) != 3 { + t.Errorf("Unexpected count of copy blocks: %d != 3", len(toCopy.blocks)) + } + + for i, eq := range []int{2, 5, 8} { + if string(toCopy.blocks[i].Hash) != string(blocks[eq].Hash) { + t.Errorf("Block mismatch: %s != %s", toCopy.blocks[i].String(), blocks[eq].String()) + } + } + + for i, eq := range []int{1, 3, 4, 6, 7} { + if string(toPull[i].block.Hash) != string(blocks[eq].Hash) { + t.Errorf("Block mismatch: %s != %s", toPull[i].block.String(), blocks[eq].String()) + } + } +} + +func TestHandleFileWithTemp(t *testing.T) { + // After diff between required and existing we should: + // Copy: 2, 5, 8 + // Pull: 1, 3, 4, 6, 7 + + // After dropping out blocks already on the temp file we should: + // Copy: 5, 8 + // Pull: 1, 6 + + // Create existing file, and update local index + existingFile := protocol.FileInfo{ + Name: "file", + Flags: 0, + Modified: 0, + Blocks: []protocol.BlockInfo{ + blocks[0], blocks[2], blocks[0], blocks[0], + blocks[5], blocks[0], blocks[0], blocks[8], + }, + } + + // Create target file + requiredFile := existingFile + requiredFile.Blocks = blocks[1:] + + db, _ := leveldb.Open(storage.NewMemStorage(), nil) + m := NewModel(&config.Configuration{}, "device", "syncthing", "dev", db) + m.AddFolder(config.FolderConfiguration{ID: "default", Path: "testdata"}) + m.updateLocal("default", existingFile) + + p := Puller{ + folder: "default", + dir: "testdata", + model: m, + } + + copyChan := make(chan copyBlocksState, 1) // Copy chan gets all blocks needed to copy in a wrapper struct + pullChan := make(chan pullBlockState, 2) // Pull chan gets blocks one by one + + p.handleFile(requiredFile, copyChan, pullChan) + + // Receive the results + toCopy := <-copyChan + toPull := []pullBlockState{<-pullChan, <-pullChan} + + select { + case <-pullChan: + t.Error("Channel not empty!") + default: + } + + if len(toCopy.blocks) != 2 { + t.Errorf("Unexpected count of copy blocks: %d != 2", len(toCopy.blocks)) + } + + for i, eq := range []int{5, 8} { + if string(toCopy.blocks[i].Hash) != string(blocks[eq].Hash) { + t.Errorf("Block mismatch: %s != %s", toCopy.blocks[i].String(), blocks[eq].String()) + } + } + + for i, eq := range []int{1, 6} { + if string(toPull[i].block.Hash) != string(blocks[eq].Hash) { + t.Errorf("Block mismatch: %s != %s", toPull[i].block.String(), blocks[eq].String()) + } + } +} diff --git a/internal/model/sharedpullerstate.go b/internal/model/sharedpullerstate.go index 61585006b..1347e6274 100644 --- a/internal/model/sharedpullerstate.go +++ b/internal/model/sharedpullerstate.go @@ -31,6 +31,7 @@ type sharedPullerState struct { folder string tempName string realName string + reuse bool // Mutable, must be locked for access err error // The first error we hit @@ -77,7 +78,11 @@ func (s *sharedPullerState) tempFile() (*os.File, error) { } // Attempt to create the temp file - fd, err := os.OpenFile(s.tempName, os.O_CREATE|os.O_WRONLY|os.O_EXCL, 0644) + flags := os.O_WRONLY + if !s.reuse { + flags |= os.O_CREATE | os.O_EXCL + } + fd, err := os.OpenFile(s.tempName, flags, 0644) if err != nil { s.earlyCloseLocked("dst create", err) return nil, err diff --git a/internal/model/testdata/.syncthing.file b/internal/model/testdata/.syncthing.file new file mode 100644 index 000000000..c434e26aa Binary files /dev/null and b/internal/model/testdata/.syncthing.file differ diff --git a/internal/model/testdata/~syncthing~.file.tmp b/internal/model/testdata/~syncthing~.file.tmp new file mode 100644 index 000000000..c434e26aa Binary files /dev/null and b/internal/model/testdata/~syncthing~.file.tmp differ diff --git a/internal/scanner/blockqueue.go b/internal/scanner/blockqueue.go index fd4497a61..4ce67b86a 100644 --- a/internal/scanner/blockqueue.go +++ b/internal/scanner/blockqueue.go @@ -34,7 +34,7 @@ func newParallelHasher(dir string, blockSize, workers int, outbox, inbox chan pr for i := 0; i < workers; i++ { go func() { - hashFile(dir, blockSize, outbox, inbox) + hashFiles(dir, blockSize, outbox, inbox) wg.Done() }() } @@ -45,32 +45,35 @@ func newParallelHasher(dir string, blockSize, workers int, outbox, inbox chan pr }() } -func hashFile(dir string, blockSize int, outbox, inbox chan protocol.FileInfo) { +func HashFile(path string, blockSize int) ([]protocol.BlockInfo, error) { + fd, err := os.Open(path) + if err != nil { + if debug { + l.Debugln("open:", err) + } + return []protocol.BlockInfo{}, err + } + + fi, err := fd.Stat() + if err != nil { + fd.Close() + if debug { + l.Debugln("stat:", err) + } + return []protocol.BlockInfo{}, err + } + defer fd.Close() + return Blocks(fd, blockSize, fi.Size()) +} + +func hashFiles(dir string, blockSize int, outbox, inbox chan protocol.FileInfo) { for f := range inbox { if protocol.IsDirectory(f.Flags) || protocol.IsDeleted(f.Flags) { outbox <- f continue } - fd, err := os.Open(filepath.Join(dir, f.Name)) - if err != nil { - if debug { - l.Debugln("open:", err) - } - continue - } - - fi, err := fd.Stat() - if err != nil { - fd.Close() - if debug { - l.Debugln("stat:", err) - } - continue - } - blocks, err := Blocks(fd, blockSize, fi.Size()) - fd.Close() - + blocks, err := HashFile(filepath.Join(dir, f.Name), blockSize) if err != nil { if debug { l.Debugln("hash error:", f.Name, err) diff --git a/internal/scanner/blocks.go b/internal/scanner/blocks.go index de38b2236..c247fcaaa 100644 --- a/internal/scanner/blocks.go +++ b/internal/scanner/blocks.go @@ -24,8 +24,6 @@ import ( "github.com/syncthing/syncthing/internal/protocol" ) -const StandardBlockSize = 128 * 1024 - var sha256OfNothing = []uint8{0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55} // Blocks returns the blockwise hash of the reader.