syncthing/lib/scanner/blocks_test.go
Jakob Borg 9b1c592fb7 lib/scanner: Speed up weak hash
The rolling version of adler32 is just a wrapper around the standard
hash/adler32 when used in a non-rolling fashion, but it's inefficient as
it allocates a new hash instance for every Write(). This uses the
default version instead in the block hasher, and adds a test to verify
the result is the same as they were before. It reduces allocations by
88% and increases speed about 5%.

	benchmark               old ns/op     new ns/op     delta
	BenchmarkHashFile-8     64434698      61303647      -4.86%

	benchmark               old MB/s     new MB/s     speedup
	BenchmarkHashFile-8     276.65       290.78       1.05x

	benchmark               old allocs     new allocs     delta
	BenchmarkHashFile-8     1238           150            -87.88%

	benchmark               old bytes     new bytes     delta
	BenchmarkHashFile-8     17877363      49292         -99.72%
2017-01-18 10:33:17 +01:00

163 lines
5.3 KiB
Go

// Copyright (C) 2014 The Syncthing Authors.
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at http://mozilla.org/MPL/2.0/.
package scanner
import (
"bytes"
"fmt"
"testing"
"github.com/syncthing/syncthing/lib/protocol"
)
var blocksTestData = []struct {
data []byte
blocksize int
hash []string
weakhash []uint32
}{
{[]byte(""), 1024, []string{
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"},
[]uint32{0},
},
{[]byte("contents"), 1024, []string{
"d1b2a59fbea7e20077af9f91b27e95e865061b270be03ff539ab3b73587882e8"},
[]uint32{0x0f3a036f},
},
{[]byte("contents"), 9, []string{
"d1b2a59fbea7e20077af9f91b27e95e865061b270be03ff539ab3b73587882e8"},
[]uint32{0x0f3a036f},
},
{[]byte("contents"), 8, []string{
"d1b2a59fbea7e20077af9f91b27e95e865061b270be03ff539ab3b73587882e8"},
[]uint32{0x0f3a036f},
},
{[]byte("contents"), 7, []string{
"ed7002b439e9ac845f22357d822bac1444730fbdb6016d3ec9432297b9ec9f73",
"043a718774c572bd8a25adbeb1bfcd5c0256ae11cecf9f9c3f925d0e52beaf89"},
[]uint32{0x0bcb02fc, 0x00740074},
},
{[]byte("contents"), 3, []string{
"1143da2bc54c495c4be31d3868785d39ffdfd56df5668f0645d8f14d47647952",
"e4432baa90819aaef51d2a7f8e148bf7e679610f3173752fabb4dcb2d0f418d3",
"44ad63f60af0f6db6fdde6d5186ef78176367df261fa06be3079b6c80c8adba4"},
[]uint32{0x02780141, 0x02970148, 0x015d00e8},
},
{[]byte("conconts"), 3, []string{
"1143da2bc54c495c4be31d3868785d39ffdfd56df5668f0645d8f14d47647952",
"1143da2bc54c495c4be31d3868785d39ffdfd56df5668f0645d8f14d47647952",
"44ad63f60af0f6db6fdde6d5186ef78176367df261fa06be3079b6c80c8adba4"},
[]uint32{0x02780141, 0x02780141, 0x015d00e8},
},
{[]byte("contenten"), 3, []string{
"1143da2bc54c495c4be31d3868785d39ffdfd56df5668f0645d8f14d47647952",
"e4432baa90819aaef51d2a7f8e148bf7e679610f3173752fabb4dcb2d0f418d3",
"e4432baa90819aaef51d2a7f8e148bf7e679610f3173752fabb4dcb2d0f418d3"},
[]uint32{0x02780141, 0x02970148, 0x02970148},
},
}
func TestBlocks(t *testing.T) {
for testNo, test := range blocksTestData {
buf := bytes.NewBuffer(test.data)
blocks, err := Blocks(buf, test.blocksize, -1, nil)
if err != nil {
t.Fatal(err)
}
if l := len(blocks); l != len(test.hash) {
t.Fatalf("%d: Incorrect number of blocks %d != %d", testNo, l, len(test.hash))
} else {
i := 0
for off := int64(0); off < int64(len(test.data)); off += int64(test.blocksize) {
if blocks[i].Offset != off {
t.Errorf("%d/%d: Incorrect offset %d != %d", testNo, i, blocks[i].Offset, off)
}
bs := test.blocksize
if rem := len(test.data) - int(off); bs > rem {
bs = rem
}
if int(blocks[i].Size) != bs {
t.Errorf("%d/%d: Incorrect length %d != %d", testNo, i, blocks[i].Size, bs)
}
if h := fmt.Sprintf("%x", blocks[i].Hash); h != test.hash[i] {
t.Errorf("%d/%d: Incorrect block hash %q != %q", testNo, i, h, test.hash[i])
}
if h := blocks[i].WeakHash; h != test.weakhash[i] {
t.Errorf("%d/%d: Incorrect block weakhash 0x%08x != 0x%08x", testNo, i, h, test.weakhash[i])
}
i++
}
}
}
}
var diffTestData = []struct {
a string
b string
s int
d []protocol.BlockInfo
}{
{"contents", "contents", 1024, []protocol.BlockInfo{}},
{"", "", 1024, []protocol.BlockInfo{}},
{"contents", "contents", 3, []protocol.BlockInfo{}},
{"contents", "cantents", 3, []protocol.BlockInfo{{Offset: 0, Size: 3}}},
{"contents", "contants", 3, []protocol.BlockInfo{{Offset: 3, Size: 3}}},
{"contents", "cantants", 3, []protocol.BlockInfo{{Offset: 0, Size: 3}, {Offset: 3, Size: 3}}},
{"contents", "", 3, []protocol.BlockInfo{{Offset: 0, Size: 0}}},
{"", "contents", 3, []protocol.BlockInfo{{Offset: 0, Size: 3}, {Offset: 3, Size: 3}, {Offset: 6, Size: 2}}},
{"con", "contents", 3, []protocol.BlockInfo{{Offset: 3, Size: 3}, {Offset: 6, Size: 2}}},
{"contents", "con", 3, nil},
{"contents", "cont", 3, []protocol.BlockInfo{{Offset: 3, Size: 1}}},
{"cont", "contents", 3, []protocol.BlockInfo{{Offset: 3, Size: 3}, {Offset: 6, Size: 2}}},
}
func TestDiff(t *testing.T) {
for i, test := range diffTestData {
a, _ := Blocks(bytes.NewBufferString(test.a), test.s, -1, nil)
b, _ := Blocks(bytes.NewBufferString(test.b), test.s, -1, nil)
_, d := BlockDiff(a, b)
if len(d) != len(test.d) {
t.Fatalf("Incorrect length for diff %d; %d != %d", i, len(d), len(test.d))
} else {
for j := range test.d {
if d[j].Offset != test.d[j].Offset {
t.Errorf("Incorrect offset for diff %d block %d; %d != %d", i, j, d[j].Offset, test.d[j].Offset)
}
if d[j].Size != test.d[j].Size {
t.Errorf("Incorrect length for diff %d block %d; %d != %d", i, j, d[j].Size, test.d[j].Size)
}
}
}
}
}
func TestDiffEmpty(t *testing.T) {
emptyCases := []struct {
a []protocol.BlockInfo
b []protocol.BlockInfo
need int
have int
}{
{nil, nil, 0, 0},
{[]protocol.BlockInfo{{Offset: 3, Size: 1}}, nil, 0, 0},
{nil, []protocol.BlockInfo{{Offset: 3, Size: 1}}, 1, 0},
}
for _, emptyCase := range emptyCases {
h, n := BlockDiff(emptyCase.a, emptyCase.b)
if len(h) != emptyCase.have {
t.Errorf("incorrect have: %d != %d", len(h), emptyCase.have)
}
if len(n) != emptyCase.need {
t.Errorf("incorrect have: %d != %d", len(h), emptyCase.have)
}
}
}