syncthing/lib/scanner/blockqueue.go
Simon Frei 6d3f9d5154
all: Simultaneously walk fs and db on scan (fixes #2571, fixes #4573) (#4584)
When scanner.Walk detects a change, it now returns the new file info as well as the old file info. It also finds deleted and ignored files while scanning.
Also directory deletions are now always committed to db after their children to prevent temporary failure on remote due to non-empty directory.
2018-02-10 16:56:53 +01:00

147 lines
3.5 KiB
Go

// Copyright (C) 2014 The Syncthing Authors.
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.
package scanner
import (
"context"
"errors"
"github.com/syncthing/syncthing/lib/fs"
"github.com/syncthing/syncthing/lib/protocol"
"github.com/syncthing/syncthing/lib/sync"
)
// HashFile hashes the files and returns a list of blocks representing the file.
func HashFile(ctx context.Context, fs fs.Filesystem, path string, blockSize int, counter Counter, useWeakHashes bool) ([]protocol.BlockInfo, error) {
fd, err := fs.Open(path)
if err != nil {
l.Debugln("open:", err)
return nil, err
}
defer fd.Close()
// Get the size and modtime of the file before we start hashing it.
fi, err := fd.Stat()
if err != nil {
l.Debugln("stat before:", err)
return nil, err
}
size := fi.Size()
modTime := fi.ModTime()
// Hash the file. This may take a while for large files.
blocks, err := Blocks(ctx, fd, blockSize, size, counter, useWeakHashes)
if err != nil {
l.Debugln("blocks:", err)
return nil, err
}
// Recheck the size and modtime again. If they differ, the file changed
// while we were reading it and our hash results are invalid.
fi, err = fd.Stat()
if err != nil {
l.Debugln("stat after:", err)
return nil, err
}
if size != fi.Size() || !modTime.Equal(fi.ModTime()) {
return nil, errors.New("file changed during hashing")
}
return blocks, nil
}
// The parallel hasher reads FileInfo structures from the inbox, hashes the
// file to populate the Blocks element and sends it to the outbox. A number of
// workers are used in parallel. The outbox will become closed when the inbox
// is closed and all items handled.
type parallelHasher struct {
fs fs.Filesystem
blockSize int
workers int
outbox chan<- ScanResult
inbox <-chan ScanResult
counter Counter
done chan<- struct{}
useWeakHashes bool
wg sync.WaitGroup
}
func newParallelHasher(ctx context.Context, fs fs.Filesystem, blockSize, workers int, outbox chan<- ScanResult, inbox <-chan ScanResult, counter Counter, done chan<- struct{}, useWeakHashes bool) {
ph := &parallelHasher{
fs: fs,
blockSize: blockSize,
workers: workers,
outbox: outbox,
inbox: inbox,
counter: counter,
done: done,
useWeakHashes: useWeakHashes,
wg: sync.NewWaitGroup(),
}
for i := 0; i < workers; i++ {
ph.wg.Add(1)
go ph.hashFiles(ctx)
}
go ph.closeWhenDone()
}
func (ph *parallelHasher) hashFiles(ctx context.Context) {
defer ph.wg.Done()
for {
select {
case f, ok := <-ph.inbox:
if !ok {
return
}
if f.New.IsDirectory() || f.New.IsDeleted() {
panic("Bug. Asked to hash a directory or a deleted file.")
}
blocks, err := HashFile(ctx, ph.fs, f.New.Name, ph.blockSize, ph.counter, ph.useWeakHashes)
if err != nil {
l.Debugln("hash error:", f.New.Name, err)
continue
}
f.New.Blocks = blocks
// The size we saw when initially deciding to hash the file
// might not have been the size it actually had when we hashed
// it. Update the size from the block list.
f.New.Size = 0
for _, b := range blocks {
f.New.Size += int64(b.Size)
}
select {
case ph.outbox <- f:
case <-ctx.Done():
return
}
case <-ctx.Done():
return
}
}
}
func (ph *parallelHasher) closeWhenDone() {
ph.wg.Wait()
if ph.done != nil {
close(ph.done)
}
close(ph.outbox)
}