syncthing/cmd/stindex/idxck.go
Jakob Borg 4f7a77597e
lib/db: Slightly improve indirection (ref #6372) (#6373)
I was working on indirecting version vectors, and that resulted in some
refactoring and improving the existing block indirection stuff. We may
or may not end up doing the version vector indirection, but I think
these changes are reasonable anyhow and will simplify the diff
significantly if we do go there. The main points are:

- A bunch of renaming to make the indirection and GC not about "blocks"
  but about "indirection".

- Adding a cutoff so that we don't actually indirect for small block
  lists. This gets us better performance when handling small files as it
  cuts out the indirection for quite small loss in space efficiency.

- Being paranoid and always recalculating the hash on put. This costs
  some CPU, but the consequences if a buggy or malicious implementation
  silently substituted the block list by lying about the hash would be bad.
2020-02-27 11:19:21 +01:00

297 lines
8.3 KiB
Go

// Copyright (C) 2018 The Syncthing Authors.
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.
package main
import (
"bytes"
"encoding/binary"
"fmt"
"log"
"sort"
"github.com/syncthing/syncthing/lib/db"
"github.com/syncthing/syncthing/lib/db/backend"
"github.com/syncthing/syncthing/lib/protocol"
)
type fileInfoKey struct {
folder uint32
device uint32
name string
}
type globalKey struct {
folder uint32
name string
}
type sequenceKey struct {
folder uint32
sequence uint64
}
func idxck(ldb backend.Backend) (success bool) {
folders := make(map[uint32]string)
devices := make(map[uint32]string)
deviceToIDs := make(map[string]uint32)
fileInfos := make(map[fileInfoKey]protocol.FileInfo)
globals := make(map[globalKey]db.VersionList)
sequences := make(map[sequenceKey]string)
needs := make(map[globalKey]struct{})
blocklists := make(map[string]struct{})
usedBlocklists := make(map[string]struct{})
var localDeviceKey uint32
success = true
it, err := ldb.NewPrefixIterator(nil)
if err != nil {
log.Fatal(err)
}
for it.Next() {
key := it.Key()
switch key[0] {
case db.KeyTypeDevice:
folder := binary.BigEndian.Uint32(key[1:])
device := binary.BigEndian.Uint32(key[1+4:])
name := nulString(key[1+4+4:])
var f protocol.FileInfo
err := f.Unmarshal(it.Value())
if err != nil {
fmt.Println("Unable to unmarshal FileInfo:", err)
success = false
continue
}
fileInfos[fileInfoKey{folder, device, name}] = f
case db.KeyTypeGlobal:
folder := binary.BigEndian.Uint32(key[1:])
name := nulString(key[1+4:])
var flv db.VersionList
if err := flv.Unmarshal(it.Value()); err != nil {
fmt.Println("Unable to unmarshal VersionList:", err)
success = false
continue
}
globals[globalKey{folder, name}] = flv
case db.KeyTypeFolderIdx:
key := binary.BigEndian.Uint32(it.Key()[1:])
folders[key] = string(it.Value())
case db.KeyTypeDeviceIdx:
key := binary.BigEndian.Uint32(it.Key()[1:])
devices[key] = string(it.Value())
deviceToIDs[string(it.Value())] = key
if bytes.Equal(it.Value(), protocol.LocalDeviceID[:]) {
localDeviceKey = key
}
case db.KeyTypeSequence:
folder := binary.BigEndian.Uint32(key[1:])
seq := binary.BigEndian.Uint64(key[5:])
val := it.Value()
sequences[sequenceKey{folder, seq}] = string(val[9:])
case db.KeyTypeNeed:
folder := binary.BigEndian.Uint32(key[1:])
name := nulString(key[1+4:])
needs[globalKey{folder, name}] = struct{}{}
case db.KeyTypeBlockList:
hash := string(key[1:])
blocklists[hash] = struct{}{}
}
}
if localDeviceKey == 0 {
fmt.Println("Missing key for local device in device index (bailing out)")
success = false
return
}
var missingSeq []sequenceKey
for fk, fi := range fileInfos {
if fk.name != fi.Name {
fmt.Printf("Mismatching FileInfo name, %q (key) != %q (actual)\n", fk.name, fi.Name)
success = false
}
folder := folders[fk.folder]
if folder == "" {
fmt.Printf("Unknown folder ID %d for FileInfo %q\n", fk.folder, fk.name)
success = false
continue
}
if devices[fk.device] == "" {
fmt.Printf("Unknown device ID %d for FileInfo %q, folder %q\n", fk.folder, fk.name, folder)
success = false
}
if fk.device == localDeviceKey {
sk := sequenceKey{fk.folder, uint64(fi.Sequence)}
name, ok := sequences[sk]
if !ok {
fmt.Printf("Sequence entry missing for FileInfo %q, folder %q, seq %d\n", fi.Name, folder, fi.Sequence)
missingSeq = append(missingSeq, sk)
success = false
continue
}
if name != fi.Name {
fmt.Printf("Sequence entry refers to wrong name, %q (seq) != %q (FileInfo), folder %q, seq %d\n", name, fi.Name, folder, fi.Sequence)
success = false
}
}
if len(fi.Blocks) == 0 && len(fi.BlocksHash) != 0 {
key := string(fi.BlocksHash)
if _, ok := blocklists[key]; !ok {
fmt.Printf("Missing block list for file %q, block list hash %x\n", fi.Name, fi.BlocksHash)
success = false
} else {
usedBlocklists[key] = struct{}{}
}
}
}
// Aggregate the ranges of missing sequence entries, print them
sort.Slice(missingSeq, func(a, b int) bool {
if missingSeq[a].folder != missingSeq[b].folder {
return missingSeq[a].folder < missingSeq[b].folder
}
return missingSeq[a].sequence < missingSeq[b].sequence
})
var folder uint32
var startSeq, prevSeq uint64
for _, sk := range missingSeq {
if folder != sk.folder || sk.sequence != prevSeq+1 {
if folder != 0 {
fmt.Printf("Folder %d missing %d sequence entries: #%d - #%d\n", folder, prevSeq-startSeq+1, startSeq, prevSeq)
}
startSeq = sk.sequence
folder = sk.folder
}
prevSeq = sk.sequence
}
if folder != 0 {
fmt.Printf("Folder %d missing %d sequence entries: #%d - #%d\n", folder, prevSeq-startSeq+1, startSeq, prevSeq)
}
for gk, vl := range globals {
folder := folders[gk.folder]
if folder == "" {
fmt.Printf("Unknown folder ID %d for VersionList %q\n", gk.folder, gk.name)
success = false
}
for i, fv := range vl.Versions {
dev, ok := deviceToIDs[string(fv.Device)]
if !ok {
fmt.Printf("VersionList %q, folder %q refers to unknown device %q\n", gk.name, folder, fv.Device)
success = false
}
fi, ok := fileInfos[fileInfoKey{gk.folder, dev, gk.name}]
if !ok {
fmt.Printf("VersionList %q, folder %q, entry %d refers to unknown FileInfo\n", gk.name, folder, i)
success = false
}
if !fi.Version.Equal(fv.Version) {
fmt.Printf("VersionList %q, folder %q, entry %d, FileInfo version mismatch, %v (VersionList) != %v (FileInfo)\n", gk.name, folder, i, fv.Version, fi.Version)
success = false
}
if fi.IsInvalid() != fv.Invalid {
fmt.Printf("VersionList %q, folder %q, entry %d, FileInfo invalid mismatch, %v (VersionList) != %v (FileInfo)\n", gk.name, folder, i, fv.Invalid, fi.IsInvalid())
success = false
}
}
// If we need this file we should have a need entry for it. False
// positives from needsLocally for deleted files, where we might
// legitimately lack an entry if we never had it, and ignored files.
if needsLocally(vl) {
_, ok := needs[gk]
if !ok {
dev := deviceToIDs[string(vl.Versions[0].Device)]
fi := fileInfos[fileInfoKey{gk.folder, dev, gk.name}]
if !fi.IsDeleted() && !fi.IsIgnored() {
fmt.Printf("Missing need entry for needed file %q, folder %q\n", gk.name, folder)
}
}
}
}
seenSeq := make(map[fileInfoKey]uint64)
for sk, name := range sequences {
folder := folders[sk.folder]
if folder == "" {
fmt.Printf("Unknown folder ID %d for sequence entry %d, %q\n", sk.folder, sk.sequence, name)
success = false
continue
}
if prev, ok := seenSeq[fileInfoKey{folder: sk.folder, name: name}]; ok {
fmt.Printf("Duplicate sequence entry for %q, folder %q, seq %d (prev %d)\n", name, folder, sk.sequence, prev)
success = false
}
seenSeq[fileInfoKey{folder: sk.folder, name: name}] = sk.sequence
fi, ok := fileInfos[fileInfoKey{sk.folder, localDeviceKey, name}]
if !ok {
fmt.Printf("Missing FileInfo for sequence entry %d, folder %q, %q\n", sk.sequence, folder, name)
success = false
continue
}
if fi.Sequence != int64(sk.sequence) {
fmt.Printf("Sequence mismatch for %q, folder %q, %d (key) != %d (FileInfo)\n", name, folder, sk.sequence, fi.Sequence)
success = false
}
}
for nk := range needs {
folder := folders[nk.folder]
if folder == "" {
fmt.Printf("Unknown folder ID %d for need entry %q\n", nk.folder, nk.name)
success = false
continue
}
vl, ok := globals[nk]
if !ok {
fmt.Printf("Missing global for need entry %q, folder %q\n", nk.name, folder)
success = false
continue
}
if !needsLocally(vl) {
fmt.Printf("Need entry for file we don't need, %q, folder %q\n", nk.name, folder)
success = false
}
}
if d := len(blocklists) - len(usedBlocklists); d > 0 {
fmt.Printf("%d block list entries out of %d needs GC\n", d, len(blocklists))
}
return
}
func needsLocally(vl db.VersionList) bool {
var lv *protocol.Vector
for _, fv := range vl.Versions {
if bytes.Equal(fv.Device, protocol.LocalDeviceID[:]) {
lv = &fv.Version
break
}
}
if lv == nil {
return true // proviosinally, it looks like we need the file
}
return !lv.GreaterEqual(vl.Versions[0].Version)
}