From 47ee5a45661d30b1265b30731525a3a7f4cc7348 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 28 Sep 2008 12:12:07 -0700 Subject: [PATCH] imsm: manage a list of missing disks If a drive is removed while mdmon is not running we need a way to identify what is missing and mark that disk as failed in the metadata. At ->load_super() time create a list of missing disks defined as a disk that is marked in-sync yet does not appear in super->disks. Signed-off-by: Dan Williams --- super-intel.c | 107 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 98 insertions(+), 9 deletions(-) diff --git a/super-intel.c b/super-intel.c index 19c964f..7f8dd34 100644 --- a/super-intel.c +++ b/super-intel.c @@ -180,6 +180,7 @@ struct intel_super { int fd; } *disks; struct dl *add; /* list of disks to add while mdmon active */ + struct dl *missing; /* disks removed while we weren't looking */ struct bbm_log *bbm_log; }; @@ -1321,12 +1322,19 @@ static void __free_imsm_disk(struct dl *d) } static void free_imsm_disks(struct intel_super *super) { - while (super->disks) { - struct dl *d = super->disks; + struct dl *d; + while (super->disks) { + d = super->disks; super->disks = d->next; __free_imsm_disk(d); } + while (super->missing) { + d = super->missing; + super->missing = d->next; + __free_imsm_disk(d); + } + } /* free all the pieces hanging off of a super pointer */ @@ -1378,6 +1386,49 @@ static struct intel_super *alloc_super(int creating_imsm) } #ifndef MDASSEMBLE +/* find_missing - helper routine for load_super_imsm_all that identifies + * disks that have disappeared from the system. This routine relies on + * the mpb being uptodate, which it is at load time. + */ +static int find_missing(struct intel_super *super) +{ + int i; + struct imsm_super *mpb = super->anchor; + struct dl *dl; + struct imsm_disk *disk; + __u32 status; + + for (i = 0; i < mpb->num_disks; i++) { + disk = __get_imsm_disk(mpb, i); + for (dl = super->disks; dl; dl = dl->next) + if (serialcmp(dl->disk.serial, disk->serial) == 0) + break; + if (dl) + continue; + /* ok we have a 'disk' without a live entry in + * super->disks + */ + status = __le32_to_cpu(disk->status); + if (status & FAILED_DISK || !(status & USABLE_DISK)) + continue; /* never mind, already marked */ + + dl = malloc(sizeof(*dl)); + if (!dl) + return 1; + dl->major = 0; + dl->minor = 0; + dl->fd = -1; + dl->devname = strdup("missing"); + dl->index = i; + serialcpy(dl->serial, disk->serial); + dl->disk = *disk; + dl->next = super->missing; + super->missing = dl; + } + + return 0; +} + static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, char *devname, int keep_fd) { @@ -1462,6 +1513,12 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, close(dfd); } + + if (find_missing(super) != 0) { + free_imsm(super); + return 2; + } + if (st->subarray[0]) { if (atoi(st->subarray) <= super->anchor->num_raid_devs) super->current_vol = atoi(st->subarray); @@ -1861,6 +1918,8 @@ static int write_super_imsm(struct intel_super *super, int doclose) else mpb->disk[d->index] = d->disk; } + for (d = super->missing; d; d = d->next) + mpb->disk[d->index] = d->disk; for (i = 0; i < mpb->num_raid_devs; i++) { struct imsm_dev *dev = __get_imsm_dev(mpb, i); @@ -2471,6 +2530,18 @@ static int is_rebuilding(struct imsm_dev *dev) return 0; } +static void mark_failure(struct imsm_disk *disk) +{ + __u32 status = __le32_to_cpu(disk->status); + + if (status & FAILED_DISK) + return; + status |= FAILED_DISK; + disk->status = __cpu_to_le32(status); + disk->scsi_id = __cpu_to_le32(~(__u32)0); + memmove(&disk->serial[0], &disk->serial[1], MAX_RAID_SERIAL_LEN - 1); +} + /* Handle dirty -> clean transititions and resync. Degraded and rebuild * states are handled in imsm_set_disk() with one exception, when a * resync is stopped due to a new failure this routine will set the @@ -2485,6 +2556,17 @@ static int imsm_set_array_state(struct active_array *a, int consistent) int failed = imsm_count_failed(super, dev); __u8 map_state = imsm_check_degraded(super, dev, failed); + /* before we activate this array handle any missing disks */ + if (consistent == 2 && super->missing) { + struct dl *dl; + + dprintf("imsm: mark missing\n"); + end_migration(dev, map_state); + for (dl = super->missing; dl; dl = dl->next) + mark_failure(&dl->disk); + super->updates_pending++; + } + if (consistent == 2 && (!is_resync_complete(a) || map_state != IMSM_T_STATE_NORMAL || @@ -2557,12 +2639,10 @@ static void imsm_set_disk(struct active_array *a, int n, int state) /* check for new failures */ status = __le32_to_cpu(disk->status); if ((state & DS_FAULTY) && !(status & FAILED_DISK)) { - status |= FAILED_DISK; - disk->status = __cpu_to_le32(status); - disk->scsi_id = __cpu_to_le32(~(__u32)0); - memmove(&disk->serial[0], &disk->serial[1], MAX_RAID_SERIAL_LEN - 1); + mark_failure(disk); super->updates_pending++; } + /* check if in_sync */ if (state & DS_INSYNC && ord & IMSM_ORD_REBUILD) { struct imsm_map *migr_map = get_imsm_map(dev, 1); @@ -2986,12 +3066,18 @@ static void imsm_process_update(struct supertype *st, if (!found) { struct dl **dlp; + /* We know that 'manager' isn't touching anything, + * so it is safe to delete + */ for (dlp = &super->disks; *dlp; dlp = &(*dlp)->next) if ((*dlp)->index == victim) break; - /* We know that 'manager' isn't touching anything, - * so it is safe to: - */ + + /* victim may be on the missing list */ + if (!*dlp) + for (dlp = &super->missing; *dlp; dlp = &(*dlp)->next) + if ((*dlp)->index == victim) + break; imsm_delete(super, dlp, victim); } break; @@ -3174,6 +3260,9 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, int index) for (iter = super->disks; iter; iter = iter->next) if (iter->index > index) iter->index--; + for (iter = super->missing; iter; iter = iter->next) + if (iter->index > index) + iter->index--; for (i = 0; i < mpb->num_raid_devs; i++) { dev = get_imsm_dev(super, i);