syncthing/lib/scanner/blocks.go
Jakob Borg bd1c29ee32 lib/scanner, vendor: Fix previous commit
Can't do what I did, as the rolling function is not the same as the
non-rolling one. Instead this uses an improved version of the rolling
adler32 to accomplish the same thing. (PR filed on upstream, so should
be able to use that directly in the future.)
2017-01-18 11:57:01 +01:00

191 lines
4.5 KiB
Go

// Copyright (C) 2014 The Syncthing Authors.
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at http://mozilla.org/MPL/2.0/.
package scanner
import (
"bytes"
"fmt"
"io"
"github.com/chmduquesne/rollinghash/adler32"
"github.com/syncthing/syncthing/lib/protocol"
"github.com/syncthing/syncthing/lib/sha256"
)
var SHA256OfNothing = []uint8{0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55}
type Counter interface {
Update(bytes int64)
}
// Blocks returns the blockwise hash of the reader.
func Blocks(r io.Reader, blocksize int, sizehint int64, counter Counter) ([]protocol.BlockInfo, error) {
hf := sha256.New()
hashLength := hf.Size()
whf := adler32.New()
mhf := io.MultiWriter(hf, whf)
var blocks []protocol.BlockInfo
var hashes, thisHash []byte
if sizehint >= 0 {
// Allocate contiguous blocks for the BlockInfo structures and their
// hashes once and for all, and stick to the specified size.
r = io.LimitReader(r, sizehint)
numBlocks := int(sizehint / int64(blocksize))
blocks = make([]protocol.BlockInfo, 0, numBlocks)
hashes = make([]byte, 0, hashLength*numBlocks)
}
// A 32k buffer is used for copying into the hash function.
buf := make([]byte, 32<<10)
var offset int64
for {
lr := io.LimitReader(r, int64(blocksize))
n, err := io.CopyBuffer(mhf, lr, buf)
if err != nil {
return nil, err
}
if n == 0 {
break
}
if counter != nil {
counter.Update(n)
}
// Carve out a hash-sized chunk of "hashes" to store the hash for this
// block.
hashes = hf.Sum(hashes)
thisHash, hashes = hashes[:hashLength], hashes[hashLength:]
b := protocol.BlockInfo{
Size: int32(n),
Offset: offset,
Hash: thisHash,
WeakHash: whf.Sum32(),
}
blocks = append(blocks, b)
offset += n
hf.Reset()
whf.Reset()
}
if len(blocks) == 0 {
// Empty file
blocks = append(blocks, protocol.BlockInfo{
Offset: 0,
Size: 0,
Hash: SHA256OfNothing,
})
}
return blocks, nil
}
// PopulateOffsets sets the Offset field on each block
func PopulateOffsets(blocks []protocol.BlockInfo) {
var offset int64
for i := range blocks {
blocks[i].Offset = offset
offset += int64(blocks[i].Size)
}
}
// BlockDiff returns lists of common and missing (to transform src into tgt)
// blocks. Both block lists must have been created with the same block size.
func BlockDiff(src, tgt []protocol.BlockInfo) (have, need []protocol.BlockInfo) {
if len(tgt) == 0 && len(src) != 0 {
return nil, nil
}
if len(tgt) != 0 && len(src) == 0 {
// Copy the entire file
return nil, tgt
}
for i := range tgt {
if i >= len(src) || !bytes.Equal(tgt[i].Hash, src[i].Hash) {
// Copy differing block
need = append(need, tgt[i])
} else {
have = append(have, tgt[i])
}
}
return have, need
}
// Verify returns nil or an error describing the mismatch between the block
// list and actual reader contents
func Verify(r io.Reader, blocksize int, blocks []protocol.BlockInfo) error {
hf := sha256.New()
// A 32k buffer is used for copying into the hash function.
buf := make([]byte, 32<<10)
for i, block := range blocks {
lr := &io.LimitedReader{R: r, N: int64(blocksize)}
_, err := io.CopyBuffer(hf, lr, buf)
if err != nil {
return err
}
hash := hf.Sum(nil)
hf.Reset()
if !bytes.Equal(hash, block.Hash) {
return fmt.Errorf("hash mismatch %x != %x for block %d", hash, block.Hash, i)
}
}
// We should have reached the end now
bs := make([]byte, 1)
n, err := r.Read(bs)
if n != 0 || err != io.EOF {
return fmt.Errorf("file continues past end of blocks")
}
return nil
}
func VerifyBuffer(buf []byte, block protocol.BlockInfo) ([]byte, error) {
if len(buf) != int(block.Size) {
return nil, fmt.Errorf("length mismatch %d != %d", len(buf), block.Size)
}
hf := sha256.New()
_, err := hf.Write(buf)
if err != nil {
return nil, err
}
hash := hf.Sum(nil)
if !bytes.Equal(hash, block.Hash) {
return hash, fmt.Errorf("hash mismatch %x != %x", hash, block.Hash)
}
return hash, nil
}
// BlocksEqual returns whether two slices of blocks are exactly the same hash
// and index pair wise.
func BlocksEqual(src, tgt []protocol.BlockInfo) bool {
if len(tgt) != len(src) {
return false
}
for i, sblk := range src {
if !bytes.Equal(sblk.Hash, tgt[i].Hash) {
return false
}
}
return true
}