From 7aaa1dd8a3a2b8e6b9c69b5e147858cefbb4e708 Mon Sep 17 00:00:00 2001 From: Jakob Borg Date: Tue, 26 Jul 2016 08:51:39 +0000 Subject: [PATCH] lib/scanner: Recheck file size and modification time after hashing (ref #3440) To catch the case where the file changed. Also make sure we never let a size-vs-blocklist mismatch slip through. GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/3443 --- lib/model/rwfolder.go | 2 +- lib/model/rwfolder_test.go | 2 +- lib/scanner/blockqueue.go | 54 ++++++++++++++++++++++++++++++-------- lib/scanner/blocks.go | 2 +- lib/scanner/blocks_test.go | 6 ++--- lib/scanner/walk_test.go | 4 +-- 6 files changed, 51 insertions(+), 19 deletions(-) diff --git a/lib/model/rwfolder.go b/lib/model/rwfolder.go index 85d42ba2c..f921232c5 100644 --- a/lib/model/rwfolder.go +++ b/lib/model/rwfolder.go @@ -941,7 +941,7 @@ func (f *rwFolder) handleFile(file protocol.FileInfo, copyChan chan<- copyBlocks // Check for an old temporary file which might have some blocks we could // reuse. - tempBlocks, err := scanner.HashFile(tempName, protocol.BlockSize, 0, nil) + tempBlocks, err := scanner.HashFile(tempName, protocol.BlockSize, nil) if err == nil { // Check for any reusable blocks in the temp file tempCopyBlocks, _ := scanner.BlockDiff(tempBlocks, file.Blocks) diff --git a/lib/model/rwfolder_test.go b/lib/model/rwfolder_test.go index 81df425f4..697d2ccef 100644 --- a/lib/model/rwfolder_test.go +++ b/lib/model/rwfolder_test.go @@ -223,7 +223,7 @@ func TestCopierFinder(t *testing.T) { } // Verify that the fetched blocks have actually been written to the temp file - blks, err := scanner.HashFile(tempFile, protocol.BlockSize, 0, nil) + blks, err := scanner.HashFile(tempFile, protocol.BlockSize, nil) if err != nil { t.Log(err) } diff --git a/lib/scanner/blockqueue.go b/lib/scanner/blockqueue.go index 840fc6d2b..596724228 100644 --- a/lib/scanner/blockqueue.go +++ b/lib/scanner/blockqueue.go @@ -7,6 +7,7 @@ package scanner import ( + "errors" "os" "path/filepath" @@ -39,24 +40,45 @@ func newParallelHasher(dir string, blockSize, workers int, outbox, inbox chan pr }() } -func HashFile(path string, blockSize int, sizeHint int64, counter Counter) ([]protocol.BlockInfo, error) { +func HashFile(path string, blockSize int, counter Counter) ([]protocol.BlockInfo, error) { fd, err := os.Open(path) if err != nil { l.Debugln("open:", err) - return []protocol.BlockInfo{}, err + return nil, err } defer fd.Close() - if sizeHint == 0 { - fi, err := fd.Stat() - if err != nil { - l.Debugln("stat:", err) - return []protocol.BlockInfo{}, err - } - sizeHint = fi.Size() + // Get the size and modtime of the file before we start hashing it. + + fi, err := fd.Stat() + if err != nil { + l.Debugln("stat before:", err) + return nil, err + } + size := fi.Size() + modTime := fi.ModTime() + + // Hash the file. This may take a while for large files. + + blocks, err := Blocks(fd, blockSize, size, counter) + if err != nil { + l.Debugln("blocks:", err) + return nil, err } - return Blocks(fd, blockSize, sizeHint, counter) + // Recheck the size and modtime again. If they differ, the file changed + // while we were reading it and our hash results are invalid. + + fi, err = fd.Stat() + if err != nil { + l.Debugln("stat after:", err) + return nil, err + } + if size != fi.Size() || !modTime.Equal(fi.ModTime()) { + return nil, errors.New("file changed during hashing") + } + + return blocks, nil } func hashFiles(dir string, blockSize int, outbox, inbox chan protocol.FileInfo, counter Counter, cancel chan struct{}) { @@ -71,13 +93,23 @@ func hashFiles(dir string, blockSize int, outbox, inbox chan protocol.FileInfo, panic("Bug. Asked to hash a directory or a deleted file.") } - blocks, err := HashFile(filepath.Join(dir, f.Name), blockSize, f.Size, counter) + blocks, err := HashFile(filepath.Join(dir, f.Name), blockSize, counter) if err != nil { l.Debugln("hash error:", f.Name, err) continue } f.Blocks = blocks + + // The size we saw when initially deciding to hash the file + // might not have been the size it actually had when we hashed + // it. Update the size from the block list. + + f.Size = 0 + for _, b := range blocks { + f.Size += int64(b.Size) + } + select { case outbox <- f: case <-cancel: diff --git a/lib/scanner/blocks.go b/lib/scanner/blocks.go index ff670f572..fc847f143 100644 --- a/lib/scanner/blocks.go +++ b/lib/scanner/blocks.go @@ -29,7 +29,7 @@ func Blocks(r io.Reader, blocksize int, sizehint int64, counter Counter) ([]prot var blocks []protocol.BlockInfo var hashes, thisHash []byte - if sizehint > 0 { + if sizehint >= 0 { // Allocate contiguous blocks for the BlockInfo structures and their // hashes once and for all, and stick to the specified size. r = io.LimitReader(r, sizehint) diff --git a/lib/scanner/blocks_test.go b/lib/scanner/blocks_test.go index 349ea017d..0bc208a1c 100644 --- a/lib/scanner/blocks_test.go +++ b/lib/scanner/blocks_test.go @@ -51,7 +51,7 @@ var blocksTestData = []struct { func TestBlocks(t *testing.T) { for _, test := range blocksTestData { buf := bytes.NewBuffer(test.data) - blocks, err := Blocks(buf, test.blocksize, 0, nil) + blocks, err := Blocks(buf, test.blocksize, -1, nil) if err != nil { t.Fatal(err) @@ -105,8 +105,8 @@ var diffTestData = []struct { func TestDiff(t *testing.T) { for i, test := range diffTestData { - a, _ := Blocks(bytes.NewBufferString(test.a), test.s, 0, nil) - b, _ := Blocks(bytes.NewBufferString(test.b), test.s, 0, nil) + a, _ := Blocks(bytes.NewBufferString(test.a), test.s, -1, nil) + b, _ := Blocks(bytes.NewBufferString(test.b), test.s, -1, nil) _, d := BlockDiff(a, b) if len(d) != len(test.d) { t.Fatalf("Incorrect length for diff %d; %d != %d", i, len(d), len(test.d)) diff --git a/lib/scanner/walk_test.go b/lib/scanner/walk_test.go index 0a09695e5..0fba5e121 100644 --- a/lib/scanner/walk_test.go +++ b/lib/scanner/walk_test.go @@ -148,7 +148,7 @@ func TestVerify(t *testing.T) { progress := newByteCounter() defer progress.Close() - blocks, err := Blocks(buf, blocksize, 0, progress) + blocks, err := Blocks(buf, blocksize, -1, progress) if err != nil { t.Fatal(err) } @@ -380,7 +380,7 @@ func BenchmarkHashFile(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - if _, err := HashFile(testdataName, protocol.BlockSize, testdataSize, nil); err != nil { + if _, err := HashFile(testdataName, protocol.BlockSize, nil); err != nil { b.Fatal(err) } }