imsm: manage a list of missing disks

If a drive is removed while mdmon is not running we need a way to
identify what is missing and mark that disk as failed in the metadata.
At ->load_super() time create a list of missing disks defined as a disk
that is marked in-sync yet does not appear in super->disks.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
This commit is contained in:
Dan Williams 2008-09-28 12:12:07 -07:00
parent 1ee1e9fc62
commit 47ee5a4566
1 changed files with 98 additions and 9 deletions

View File

@ -180,6 +180,7 @@ struct intel_super {
int fd;
} *disks;
struct dl *add; /* list of disks to add while mdmon active */
struct dl *missing; /* disks removed while we weren't looking */
struct bbm_log *bbm_log;
};
@ -1321,12 +1322,19 @@ static void __free_imsm_disk(struct dl *d)
}
static void free_imsm_disks(struct intel_super *super)
{
while (super->disks) {
struct dl *d = super->disks;
struct dl *d;
while (super->disks) {
d = super->disks;
super->disks = d->next;
__free_imsm_disk(d);
}
while (super->missing) {
d = super->missing;
super->missing = d->next;
__free_imsm_disk(d);
}
}
/* free all the pieces hanging off of a super pointer */
@ -1378,6 +1386,49 @@ static struct intel_super *alloc_super(int creating_imsm)
}
#ifndef MDASSEMBLE
/* find_missing - helper routine for load_super_imsm_all that identifies
* disks that have disappeared from the system. This routine relies on
* the mpb being uptodate, which it is at load time.
*/
static int find_missing(struct intel_super *super)
{
int i;
struct imsm_super *mpb = super->anchor;
struct dl *dl;
struct imsm_disk *disk;
__u32 status;
for (i = 0; i < mpb->num_disks; i++) {
disk = __get_imsm_disk(mpb, i);
for (dl = super->disks; dl; dl = dl->next)
if (serialcmp(dl->disk.serial, disk->serial) == 0)
break;
if (dl)
continue;
/* ok we have a 'disk' without a live entry in
* super->disks
*/
status = __le32_to_cpu(disk->status);
if (status & FAILED_DISK || !(status & USABLE_DISK))
continue; /* never mind, already marked */
dl = malloc(sizeof(*dl));
if (!dl)
return 1;
dl->major = 0;
dl->minor = 0;
dl->fd = -1;
dl->devname = strdup("missing");
dl->index = i;
serialcpy(dl->serial, disk->serial);
dl->disk = *disk;
dl->next = super->missing;
super->missing = dl;
}
return 0;
}
static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
char *devname, int keep_fd)
{
@ -1462,6 +1513,12 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
close(dfd);
}
if (find_missing(super) != 0) {
free_imsm(super);
return 2;
}
if (st->subarray[0]) {
if (atoi(st->subarray) <= super->anchor->num_raid_devs)
super->current_vol = atoi(st->subarray);
@ -1861,6 +1918,8 @@ static int write_super_imsm(struct intel_super *super, int doclose)
else
mpb->disk[d->index] = d->disk;
}
for (d = super->missing; d; d = d->next)
mpb->disk[d->index] = d->disk;
for (i = 0; i < mpb->num_raid_devs; i++) {
struct imsm_dev *dev = __get_imsm_dev(mpb, i);
@ -2471,6 +2530,18 @@ static int is_rebuilding(struct imsm_dev *dev)
return 0;
}
static void mark_failure(struct imsm_disk *disk)
{
__u32 status = __le32_to_cpu(disk->status);
if (status & FAILED_DISK)
return;
status |= FAILED_DISK;
disk->status = __cpu_to_le32(status);
disk->scsi_id = __cpu_to_le32(~(__u32)0);
memmove(&disk->serial[0], &disk->serial[1], MAX_RAID_SERIAL_LEN - 1);
}
/* Handle dirty -> clean transititions and resync. Degraded and rebuild
* states are handled in imsm_set_disk() with one exception, when a
* resync is stopped due to a new failure this routine will set the
@ -2485,6 +2556,17 @@ static int imsm_set_array_state(struct active_array *a, int consistent)
int failed = imsm_count_failed(super, dev);
__u8 map_state = imsm_check_degraded(super, dev, failed);
/* before we activate this array handle any missing disks */
if (consistent == 2 && super->missing) {
struct dl *dl;
dprintf("imsm: mark missing\n");
end_migration(dev, map_state);
for (dl = super->missing; dl; dl = dl->next)
mark_failure(&dl->disk);
super->updates_pending++;
}
if (consistent == 2 &&
(!is_resync_complete(a) ||
map_state != IMSM_T_STATE_NORMAL ||
@ -2557,12 +2639,10 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
/* check for new failures */
status = __le32_to_cpu(disk->status);
if ((state & DS_FAULTY) && !(status & FAILED_DISK)) {
status |= FAILED_DISK;
disk->status = __cpu_to_le32(status);
disk->scsi_id = __cpu_to_le32(~(__u32)0);
memmove(&disk->serial[0], &disk->serial[1], MAX_RAID_SERIAL_LEN - 1);
mark_failure(disk);
super->updates_pending++;
}
/* check if in_sync */
if (state & DS_INSYNC && ord & IMSM_ORD_REBUILD) {
struct imsm_map *migr_map = get_imsm_map(dev, 1);
@ -2986,12 +3066,18 @@ static void imsm_process_update(struct supertype *st,
if (!found) {
struct dl **dlp;
/* We know that 'manager' isn't touching anything,
* so it is safe to delete
*/
for (dlp = &super->disks; *dlp; dlp = &(*dlp)->next)
if ((*dlp)->index == victim)
break;
/* We know that 'manager' isn't touching anything,
* so it is safe to:
*/
/* victim may be on the missing list */
if (!*dlp)
for (dlp = &super->missing; *dlp; dlp = &(*dlp)->next)
if ((*dlp)->index == victim)
break;
imsm_delete(super, dlp, victim);
}
break;
@ -3174,6 +3260,9 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, int index)
for (iter = super->disks; iter; iter = iter->next)
if (iter->index > index)
iter->index--;
for (iter = super->missing; iter; iter = iter->next)
if (iter->index > index)
iter->index--;
for (i = 0; i < mpb->num_raid_devs; i++) {
dev = get_imsm_dev(super, i);