From 696e95a1df6a4030b9d5bd4eb0a94fa70e918a66 Mon Sep 17 00:00:00 2001 From: Robert Buchholz Date: Mon, 10 Sep 2012 17:25:27 +1000 Subject: [PATCH] raid6check: Repair mode used geo_map incorrectly In repair mode, the data block indices to be repaired were calculated using geo_map() which returns the disk slot for a data block index and not the reverse. Now we simply store the reverse of that calculation when we do it anyway. Signed-off-by: NeilBrown --- raid6check.c | 24 +++++++++++++----------- tests/19repair-does-not-destroy | 29 +++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 11 deletions(-) create mode 100644 tests/19repair-does-not-destroy diff --git a/raid6check.c b/raid6check.c index dffadbe..51e7cca 100644 --- a/raid6check.c +++ b/raid6check.c @@ -116,6 +116,7 @@ int check_stripes(struct mdinfo *info, int *source, unsigned long long *offsets, char *stripe_buf = xmalloc(raid_disks * chunk_size); char **stripes = xmalloc(raid_disks * sizeof(char*)); char **blocks = xmalloc(raid_disks * sizeof(char*)); + int *block_index_for_slot = xmalloc(raid_disks * sizeof(int)); uint8_t *p = xmalloc(chunk_size); uint8_t *q = xmalloc(chunk_size); int *results = xmalloc(chunk_size * sizeof(int)); @@ -172,6 +173,7 @@ int check_stripes(struct mdinfo *info, int *source, unsigned long long *offsets, for (i = 0 ; i < data_disks ; i++) { int disk = geo_map(i, start, raid_disks, level, layout); blocks[i] = stripes[disk]; + block_index_for_slot[disk] = i; printf("%d->%d\n", i, disk); } @@ -179,7 +181,9 @@ int check_stripes(struct mdinfo *info, int *source, unsigned long long *offsets, diskP = geo_map(-1, start, raid_disks, level, layout); diskQ = geo_map(-2, start, raid_disks, level, layout); blocks[data_disks] = stripes[diskP]; + block_index_for_slot[diskP] = data_disks; blocks[data_disks+1] = stripes[diskQ]; + block_index_for_slot[diskQ] = data_disks+1; if (memcmp(p, stripes[diskP], chunk_size) != 0) { printf("P(%d) wrong at %llu\n", diskP, start); @@ -208,23 +212,21 @@ int check_stripes(struct mdinfo *info, int *source, unsigned long long *offsets, if (failed_disk1 == diskQ || failed_disk2 == diskQ) { char *all_but_failed_blocks[data_disks]; - int failed_data; + int failed_data_or_p; int failed_block_index; if (failed_disk1 == diskQ) - failed_data = failed_disk2; + failed_data_or_p = failed_disk2; else - failed_data = failed_disk1; - printf("Repairing D/P(%d) and Q\n", failed_data); - failed_block_index = geo_map( - failed_data, start, raid_disks, - level, layout); + failed_data_or_p = failed_disk1; + printf("Repairing D/P(%d) and Q\n", failed_data_or_p); + failed_block_index = block_index_for_slot[failed_data_or_p]; for (i=0; i < data_disks; i++) if (failed_block_index == i) all_but_failed_blocks[i] = stripes[diskP]; else all_but_failed_blocks[i] = blocks[i]; - xor_blocks(stripes[failed_data], + xor_blocks(stripes[failed_data_or_p], all_but_failed_blocks, data_disks, chunk_size); qsyndrome(p, (uint8_t*)stripes[diskQ], (uint8_t**)blocks, data_disks, chunk_size); } else { @@ -235,13 +237,13 @@ int check_stripes(struct mdinfo *info, int *source, unsigned long long *offsets, failed_data = failed_disk2; else failed_data = failed_disk1; - failed_block_index = geo_map(failed_data, start, raid_disks, level, layout); + failed_block_index = block_index_for_slot[failed_data]; printf("Repairing D(%d) and P\n", failed_data); raid6_datap_recov(raid_disks, chunk_size, failed_block_index, (uint8_t**)blocks); } else { printf("Repairing D and D\n"); - int failed_block_index1 = geo_map(failed_disk1, start, raid_disks, level, layout); - int failed_block_index2 = geo_map(failed_disk2, start, raid_disks, level, layout); + int failed_block_index1 = block_index_for_slot[failed_disk1]; + int failed_block_index2 = block_index_for_slot[failed_disk2]; if (failed_block_index1 > failed_block_index2) { int t = failed_block_index1; failed_block_index1 = failed_block_index2; diff --git a/tests/19repair-does-not-destroy b/tests/19repair-does-not-destroy new file mode 100644 index 0000000..1d3b9b4 --- /dev/null +++ b/tests/19repair-does-not-destroy @@ -0,0 +1,29 @@ +number_of_disks=7 +chunksize_in_kib=512 +array_data_size_in_kib=$[chunksize_in_kib*(number_of_disks-2)*number_of_disks] +array_data_size_in_b=$[array_data_size_in_kib*1024] +devs="$dev0 $dev1 $dev2 $dev3 $dev4 $dev5 $dev6" + +dd if=/dev/urandom of=/tmp/RandFile bs=1024 count=$array_data_size_in_kib +mdadm -CR $md0 -l6 -n$number_of_disks -c $chunksize_in_kib $devs +dd if=/tmp/RandFile of=$md0 bs=1024 count=$array_data_size_in_kib +blockdev --flushbufs $md0; sync +check wait +blockdev --flushbufs $devs; sync +echo 3 > /proc/sys/vm/drop_caches +$dir/raid6check $md0 repair 1 2 3 > /dev/null # D D +$dir/raid6check $md0 repair 8 2 5 > /dev/null # D P +$dir/raid6check $md0 repair 15 4 6 > /dev/null # D Q +$dir/raid6check $md0 repair 22 5 6 > /dev/null # P Q +$dir/raid6check $md0 repair 3 4 0 > /dev/null # Q D +$dir/raid6check $md0 repair 3 3 1 > /dev/null # P D +$dir/raid6check $md0 repair 6 4 5 > /dev/null # D /dev/null # D>D +blockdev --flushbufs $devs; sync +echo 3 > /proc/sys/vm/drop_caches +$dir/raid6check $md0 0 0 2>&1 | grep -qs "Error" && { echo errors detected; exit 2; } +cmp -s -n $array_data_size_in_b $md0 /tmp/RandFile || { echo should not mess up correct stripe ; exit 2; } + +mdadm -S $md0 +udevadm settle +blockdev --flushbufs $md0 $devs; sync