2001-08-23 04:33:20 +02:00
|
|
|
/*
|
2002-03-08 01:03:52 +01:00
|
|
|
* mdadm - manage Linux "md" devices aka RAID arrays.
|
2001-08-23 04:33:20 +02:00
|
|
|
*
|
2009-06-02 06:35:45 +02:00
|
|
|
* Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
|
2001-08-23 04:33:20 +02:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
*
|
|
|
|
* Author: Neil Brown
|
2009-06-02 06:35:45 +02:00
|
|
|
* Email: <neilb@suse.de>
|
2001-08-23 04:33:20 +02:00
|
|
|
*/
|
|
|
|
|
2002-03-08 01:03:52 +01:00
|
|
|
#include "mdadm.h"
|
2001-08-23 04:33:20 +02:00
|
|
|
#include "md_p.h"
|
|
|
|
#include "md_u.h"
|
2002-04-04 03:58:32 +02:00
|
|
|
#include <sys/wait.h>
|
2007-09-26 18:12:45 +02:00
|
|
|
#include <signal.h>
|
2007-09-26 18:12:46 +02:00
|
|
|
#include <limits.h>
|
2005-12-05 06:55:56 +01:00
|
|
|
#include <syslog.h>
|
2001-08-23 04:33:20 +02:00
|
|
|
|
2006-03-29 01:02:45 +02:00
|
|
|
/* The largest number of disks current arrays can manage is 384
|
|
|
|
* This really should be dynamically, but that will have to wait
|
|
|
|
* At least it isn't MD_SB_DISKS.
|
|
|
|
*/
|
|
|
|
#define MaxDisks 384
|
2010-11-22 10:58:07 +01:00
|
|
|
struct state {
|
|
|
|
char *devname;
|
|
|
|
int devnum; /* to sync with mdstat info */
|
|
|
|
long utime;
|
|
|
|
int err;
|
|
|
|
char *spare_group;
|
|
|
|
int active, working, failed, spare, raid;
|
|
|
|
int expected_spares;
|
|
|
|
int devstate[MaxDisks];
|
2010-11-26 11:49:33 +01:00
|
|
|
dev_t devid[MaxDisks];
|
2010-11-22 10:58:07 +01:00
|
|
|
int percent;
|
|
|
|
int parent_dev; /* For subarray, devnum of parent.
|
|
|
|
* For others, NoMdDev
|
|
|
|
*/
|
|
|
|
struct supertype *metadata;
|
2010-11-22 10:58:07 +01:00
|
|
|
struct state *subarray;/* for a container it is a link to first subarray
|
|
|
|
* for a subarray it is a link to next subarray
|
|
|
|
* in the same container */
|
|
|
|
struct state *parent; /* for a subarray it is a link to its container
|
|
|
|
*/
|
2010-11-22 10:58:07 +01:00
|
|
|
struct state *next;
|
|
|
|
};
|
|
|
|
|
2010-11-22 10:58:07 +01:00
|
|
|
struct alert_info {
|
|
|
|
char *mailaddr;
|
|
|
|
char *mailfrom;
|
|
|
|
char *alert_cmd;
|
|
|
|
int dosyslog;
|
|
|
|
};
|
2010-11-22 10:58:07 +01:00
|
|
|
static int make_daemon(char *pidfile);
|
|
|
|
static int check_one_sharer(int scan);
|
2010-11-22 10:58:07 +01:00
|
|
|
static void alert(char *event, char *dev, char *disc, struct alert_info *info);
|
2010-11-22 10:58:07 +01:00
|
|
|
static int check_array(struct state *st, struct mdstat_ent *mdstat,
|
2010-11-22 10:58:07 +01:00
|
|
|
int test, struct alert_info *info,
|
2010-11-22 10:58:07 +01:00
|
|
|
int increments);
|
2010-11-30 13:44:45 +01:00
|
|
|
static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist,
|
2010-11-22 10:58:07 +01:00
|
|
|
int test, struct alert_info *info);
|
|
|
|
static void try_spare_migration(struct state *statelist, struct alert_info *info);
|
2010-11-22 10:58:07 +01:00
|
|
|
static void link_containers_with_subarrays(struct state *list);
|
2010-11-22 10:58:07 +01:00
|
|
|
|
2010-11-22 10:58:05 +01:00
|
|
|
int Monitor(struct mddev_dev *devlist,
|
2001-08-23 04:33:20 +02:00
|
|
|
char *mailaddr, char *alert_cmd,
|
2003-07-29 01:59:00 +02:00
|
|
|
int period, int daemonise, int scan, int oneshot,
|
2010-11-22 10:58:06 +01:00
|
|
|
int dosyslog, int test, char *pidfile, int increments,
|
|
|
|
int share)
|
2001-08-23 04:33:20 +02:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Every few seconds, scan every md device looking for changes
|
|
|
|
* When a change is found, log it, possibly run the alert command,
|
|
|
|
* and possibly send Email
|
|
|
|
*
|
|
|
|
* For each array, we record:
|
|
|
|
* Update time
|
|
|
|
* active/working/failed/spare drives
|
|
|
|
* State of each device.
|
2002-04-04 03:58:32 +02:00
|
|
|
* %rebuilt if rebuilding
|
2001-08-23 04:33:20 +02:00
|
|
|
*
|
|
|
|
* If the update time changes, check out all the data again
|
|
|
|
* It is possible that we cannot get the state of each device
|
|
|
|
* due to bugs in the md kernel module.
|
2002-04-04 03:58:32 +02:00
|
|
|
* We also read /proc/mdstat to get rebuild percent,
|
|
|
|
* and to get state on all active devices incase of kernel bug.
|
2001-08-23 04:33:20 +02:00
|
|
|
*
|
2002-04-04 03:58:32 +02:00
|
|
|
* Events are:
|
|
|
|
* Fail
|
|
|
|
* An active device had Faulty set or Active/Sync removed
|
|
|
|
* FailSpare
|
|
|
|
* A spare device had Faulty set
|
|
|
|
* SpareActive
|
|
|
|
* An active device had a reverse transition
|
|
|
|
* RebuildStarted
|
|
|
|
* percent went from -1 to +ve
|
2009-10-19 04:13:58 +02:00
|
|
|
* RebuildNN
|
|
|
|
* percent went from below to not-below NN%
|
2002-04-04 03:58:32 +02:00
|
|
|
* DeviceDisappeared
|
|
|
|
* Couldn't access a device which was previously visible
|
2001-08-23 04:33:20 +02:00
|
|
|
*
|
|
|
|
* if we detect an array with active<raid and spare==0
|
|
|
|
* we look at other arrays that have same spare-group
|
|
|
|
* If we find one with active==raid and spare>0,
|
|
|
|
* and if we can get_disk_info and find a name
|
|
|
|
* Then we hot-remove and hot-add to the other array
|
|
|
|
*
|
2002-04-04 03:58:32 +02:00
|
|
|
* If devlist is NULL, then we can monitor everything because --scan
|
|
|
|
* was given. We get an initial list from config file and add anything
|
|
|
|
* that appears in /proc/mdstat
|
2001-08-23 04:33:20 +02:00
|
|
|
*/
|
|
|
|
|
2010-11-22 10:58:07 +01:00
|
|
|
struct state *statelist = NULL;
|
2001-08-23 04:33:20 +02:00
|
|
|
int finished = 0;
|
2002-04-04 03:58:32 +02:00
|
|
|
struct mdstat_ent *mdstat = NULL;
|
2006-05-15 03:27:04 +02:00
|
|
|
char *mailfrom = NULL;
|
2010-11-22 10:58:07 +01:00
|
|
|
struct alert_info info;
|
2002-04-04 03:58:32 +02:00
|
|
|
|
2003-03-03 00:11:38 +01:00
|
|
|
if (!mailaddr) {
|
2006-06-26 07:11:01 +02:00
|
|
|
mailaddr = conf_get_mailaddr();
|
2003-03-03 00:11:38 +01:00
|
|
|
if (mailaddr && ! scan)
|
2003-03-12 23:24:39 +01:00
|
|
|
fprintf(stderr, Name ": Monitor using email address \"%s\" from config file\n",
|
2003-03-03 00:11:38 +01:00
|
|
|
mailaddr);
|
|
|
|
}
|
2006-06-26 07:11:01 +02:00
|
|
|
mailfrom = conf_get_mailfrom();
|
2006-05-15 03:27:04 +02:00
|
|
|
|
2003-03-03 00:11:38 +01:00
|
|
|
if (!alert_cmd) {
|
2006-06-26 07:11:01 +02:00
|
|
|
alert_cmd = conf_get_program();
|
2003-03-03 00:11:38 +01:00
|
|
|
if (alert_cmd && ! scan)
|
2003-03-12 23:24:39 +01:00
|
|
|
fprintf(stderr, Name ": Monitor using program \"%s\" from config file\n",
|
2003-03-03 00:11:38 +01:00
|
|
|
alert_cmd);
|
|
|
|
}
|
2003-03-12 23:24:39 +01:00
|
|
|
if (scan && !mailaddr && !alert_cmd) {
|
|
|
|
fprintf(stderr, Name ": No mail address or alert command - not monitoring.\n");
|
2003-03-03 00:11:38 +01:00
|
|
|
return 1;
|
2003-03-12 23:24:39 +01:00
|
|
|
}
|
2010-11-22 10:58:07 +01:00
|
|
|
info.alert_cmd = alert_cmd;
|
|
|
|
info.mailaddr = mailaddr;
|
|
|
|
info.mailfrom = mailfrom;
|
|
|
|
info.dosyslog = dosyslog;
|
2003-03-03 00:11:38 +01:00
|
|
|
|
2010-12-15 05:51:53 +01:00
|
|
|
if (daemonise) {
|
|
|
|
int rv = make_daemon(pidfile);
|
|
|
|
if (rv >= 0)
|
|
|
|
return rv;
|
|
|
|
}
|
2002-04-04 03:58:32 +02:00
|
|
|
|
2010-11-22 10:58:07 +01:00
|
|
|
if (share)
|
|
|
|
if (check_one_sharer(scan))
|
|
|
|
return 1;
|
2010-11-22 10:58:06 +01:00
|
|
|
|
2002-04-04 03:58:32 +02:00
|
|
|
if (devlist == NULL) {
|
2010-11-22 10:58:05 +01:00
|
|
|
struct mddev_ident *mdlist = conf_get_ident(NULL);
|
2002-04-04 03:58:32 +02:00
|
|
|
for (; mdlist; mdlist=mdlist->next) {
|
2008-11-04 10:50:38 +01:00
|
|
|
struct state *st;
|
|
|
|
if (mdlist->devname == NULL)
|
|
|
|
continue;
|
2009-05-11 07:17:05 +02:00
|
|
|
if (strcasecmp(mdlist->devname, "<ignore>") == 0)
|
|
|
|
continue;
|
2010-11-22 10:58:07 +01:00
|
|
|
st = calloc(1, sizeof *st);
|
2002-04-04 03:58:32 +02:00
|
|
|
if (st == NULL)
|
|
|
|
continue;
|
2009-05-11 07:18:35 +02:00
|
|
|
if (mdlist->devname[0] == '/')
|
|
|
|
st->devname = strdup(mdlist->devname);
|
|
|
|
else {
|
|
|
|
st->devname = malloc(8+strlen(mdlist->devname)+1);
|
|
|
|
strcpy(strcpy(st->devname, "/dev/md/"),
|
|
|
|
mdlist->devname);
|
|
|
|
}
|
2002-04-04 03:58:32 +02:00
|
|
|
st->next = statelist;
|
2007-09-26 18:12:46 +02:00
|
|
|
st->devnum = INT_MAX;
|
2002-04-04 03:58:32 +02:00
|
|
|
st->percent = -2;
|
2003-10-29 00:20:01 +01:00
|
|
|
st->expected_spares = mdlist->spare_disks;
|
2002-04-04 03:58:32 +02:00
|
|
|
if (mdlist->spare_group)
|
|
|
|
st->spare_group = strdup(mdlist->spare_group);
|
|
|
|
statelist = st;
|
|
|
|
}
|
|
|
|
} else {
|
2010-11-22 10:58:05 +01:00
|
|
|
struct mddev_dev *dv;
|
2002-04-04 03:58:32 +02:00
|
|
|
for (dv=devlist ; dv; dv=dv->next) {
|
2010-11-22 10:58:05 +01:00
|
|
|
struct mddev_ident *mdlist = conf_get_ident(dv->devname);
|
2010-11-22 10:58:07 +01:00
|
|
|
struct state *st = calloc(1, sizeof *st);
|
2002-04-04 03:58:32 +02:00
|
|
|
if (st == NULL)
|
|
|
|
continue;
|
|
|
|
st->devname = strdup(dv->devname);
|
|
|
|
st->next = statelist;
|
2007-09-26 18:12:46 +02:00
|
|
|
st->devnum = INT_MAX;
|
2002-04-04 03:58:32 +02:00
|
|
|
st->percent = -2;
|
2003-10-29 00:20:01 +01:00
|
|
|
st->expected_spares = -1;
|
2004-08-11 04:16:01 +02:00
|
|
|
if (mdlist) {
|
|
|
|
st->expected_spares = mdlist->spare_disks;
|
|
|
|
if (mdlist->spare_group)
|
|
|
|
st->spare_group = strdup(mdlist->spare_group);
|
|
|
|
}
|
2002-04-04 03:58:32 +02:00
|
|
|
statelist = st;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
while (! finished) {
|
2003-07-29 01:59:00 +02:00
|
|
|
int new_found = 0;
|
2002-04-04 03:58:32 +02:00
|
|
|
struct state *st;
|
2010-11-22 10:58:07 +01:00
|
|
|
int anydegraded = 0;
|
2002-04-04 03:58:32 +02:00
|
|
|
|
|
|
|
if (mdstat)
|
|
|
|
free_mdstat(mdstat);
|
2006-01-31 01:39:50 +01:00
|
|
|
mdstat = mdstat_read(oneshot?0:1, 0);
|
2002-04-04 03:58:32 +02:00
|
|
|
|
2010-11-22 10:58:07 +01:00
|
|
|
for (st=statelist; st; st=st->next)
|
2010-11-22 10:58:07 +01:00
|
|
|
if (check_array(st, mdstat, test, &info, increments))
|
2010-11-22 10:58:07 +01:00
|
|
|
anydegraded = 1;
|
2010-11-22 10:58:07 +01:00
|
|
|
|
2002-04-04 03:58:32 +02:00
|
|
|
/* now check if there are any new devices found in mdstat */
|
2010-11-22 10:58:07 +01:00
|
|
|
if (scan)
|
2010-11-30 13:44:45 +01:00
|
|
|
new_found = add_new_arrays(mdstat, &statelist, test,
|
2010-11-22 10:58:07 +01:00
|
|
|
&info);
|
2010-11-22 10:58:07 +01:00
|
|
|
|
2002-04-04 03:58:32 +02:00
|
|
|
/* If an array has active < raid && spare == 0 && spare_group != NULL
|
|
|
|
* Look for another array with spare > 0 and active == raid and same spare_group
|
|
|
|
* if found, choose a device and hotremove/hotadd
|
|
|
|
*/
|
2010-11-22 10:58:07 +01:00
|
|
|
if (share && anydegraded)
|
2010-11-22 10:58:07 +01:00
|
|
|
try_spare_migration(statelist, &info);
|
2003-07-29 01:59:00 +02:00
|
|
|
if (!new_found) {
|
|
|
|
if (oneshot)
|
|
|
|
break;
|
|
|
|
else
|
2004-06-04 14:03:19 +02:00
|
|
|
mdstat_wait(period);
|
2003-07-29 01:59:00 +02:00
|
|
|
}
|
2004-01-22 03:10:29 +01:00
|
|
|
test = 0;
|
2001-08-23 04:33:20 +02:00
|
|
|
}
|
2004-11-01 05:49:34 +01:00
|
|
|
if (pidfile)
|
|
|
|
unlink(pidfile);
|
2001-08-23 04:33:20 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-11-22 10:58:07 +01:00
|
|
|
static int make_daemon(char *pidfile)
|
|
|
|
{
|
2010-12-15 05:51:53 +01:00
|
|
|
/* Return:
|
|
|
|
* -1 in the forked daemon
|
|
|
|
* 0 in the parent
|
|
|
|
* 1 on error
|
|
|
|
* so a none-negative becomes the exit code.
|
|
|
|
*/
|
2010-11-22 10:58:07 +01:00
|
|
|
int pid = fork();
|
|
|
|
if (pid > 0) {
|
|
|
|
if (!pidfile)
|
|
|
|
printf("%d\n", pid);
|
|
|
|
else {
|
|
|
|
FILE *pid_file;
|
|
|
|
pid_file=fopen(pidfile, "w");
|
|
|
|
if (!pid_file)
|
|
|
|
perror("cannot create pid file");
|
|
|
|
else {
|
|
|
|
fprintf(pid_file,"%d\n", pid);
|
|
|
|
fclose(pid_file);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (pid < 0) {
|
|
|
|
perror("daemonise");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
close(0);
|
|
|
|
open("/dev/null", O_RDWR);
|
|
|
|
dup2(0,1);
|
|
|
|
dup2(0,2);
|
|
|
|
setsid();
|
2010-12-15 05:51:53 +01:00
|
|
|
return -1;
|
2010-11-22 10:58:07 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static int check_one_sharer(int scan)
|
|
|
|
{
|
|
|
|
int pid, rv;
|
|
|
|
FILE *fp;
|
|
|
|
char dir[20];
|
|
|
|
struct stat buf;
|
|
|
|
fp = fopen("/var/run/mdadm/autorebuild.pid", "r");
|
|
|
|
if (fp) {
|
|
|
|
fscanf(fp, "%d", &pid);
|
|
|
|
sprintf(dir, "/proc/%d", pid);
|
|
|
|
rv = stat(dir, &buf);
|
|
|
|
if (rv != -1) {
|
|
|
|
if (scan) {
|
|
|
|
fprintf(stderr, Name ": Only one "
|
|
|
|
"autorebuild process allowed"
|
|
|
|
" in scan mode, aborting\n");
|
|
|
|
fclose(fp);
|
|
|
|
return 1;
|
|
|
|
} else {
|
|
|
|
fprintf(stderr, Name ": Warning: One"
|
|
|
|
" autorebuild process already"
|
2010-11-26 14:29:53 +01:00
|
|
|
" running.\n");
|
2010-11-22 10:58:07 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
fclose(fp);
|
|
|
|
}
|
|
|
|
if (scan) {
|
2010-11-26 14:29:53 +01:00
|
|
|
if (mkdir("/var/run/mdadm", S_IRWXU) < 0 &&
|
|
|
|
errno != EEXIST) {
|
|
|
|
fprintf(stderr, Name ": Can't create "
|
|
|
|
"autorebuild.pid file\n");
|
|
|
|
} else {
|
|
|
|
fp = fopen("/var/run/mdadm/autorebuild.pid", "w");
|
|
|
|
if (!fp)
|
|
|
|
fprintf(stderr, Name ": Cannot create"
|
|
|
|
" autorebuild.pid"
|
|
|
|
"file\n");
|
|
|
|
else {
|
|
|
|
pid = getpid();
|
|
|
|
fprintf(fp, "%d\n", pid);
|
|
|
|
fclose(fp);
|
|
|
|
}
|
2010-11-22 10:58:07 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
2001-08-23 04:33:20 +02:00
|
|
|
|
2010-11-22 10:58:07 +01:00
|
|
|
static void alert(char *event, char *dev, char *disc, struct alert_info *info)
|
2001-08-23 04:33:20 +02:00
|
|
|
{
|
2005-12-05 06:55:56 +01:00
|
|
|
int priority;
|
|
|
|
|
2010-11-22 10:58:07 +01:00
|
|
|
if (!info->alert_cmd && !info->mailaddr) {
|
2002-03-07 00:17:40 +01:00
|
|
|
time_t now = time(0);
|
2007-12-14 10:13:43 +01:00
|
|
|
|
2002-04-04 03:58:32 +02:00
|
|
|
printf("%1.15s: %s on %s %s\n", ctime(&now)+4, event, dev, disc?disc:"unknown device");
|
2002-03-07 00:17:40 +01:00
|
|
|
}
|
2010-11-22 10:58:07 +01:00
|
|
|
if (info->alert_cmd) {
|
2001-08-23 04:33:20 +02:00
|
|
|
int pid = fork();
|
|
|
|
switch(pid) {
|
|
|
|
default:
|
|
|
|
waitpid(pid, NULL, 0);
|
|
|
|
break;
|
|
|
|
case -1:
|
|
|
|
break;
|
|
|
|
case 0:
|
2010-11-22 10:58:07 +01:00
|
|
|
execl(info->alert_cmd, info->alert_cmd,
|
|
|
|
event, dev, disc, NULL);
|
2001-08-23 04:33:20 +02:00
|
|
|
exit(2);
|
|
|
|
}
|
|
|
|
}
|
2010-11-22 10:58:07 +01:00
|
|
|
if (info->mailaddr &&
|
2007-12-14 10:13:43 +01:00
|
|
|
(strncmp(event, "Fail", 4)==0 ||
|
2004-01-22 03:10:29 +01:00
|
|
|
strncmp(event, "Test", 4)==0 ||
|
2006-05-15 03:35:47 +02:00
|
|
|
strncmp(event, "Spares", 6)==0 ||
|
2003-07-29 01:59:00 +02:00
|
|
|
strncmp(event, "Degrade", 7)==0)) {
|
2001-08-23 04:33:20 +02:00
|
|
|
FILE *mp = popen(Sendmail, "w");
|
|
|
|
if (mp) {
|
2006-05-19 05:58:45 +02:00
|
|
|
FILE *mdstat;
|
2001-08-23 04:33:20 +02:00
|
|
|
char hname[256];
|
|
|
|
gethostname(hname, sizeof(hname));
|
|
|
|
signal(SIGPIPE, SIG_IGN);
|
2010-11-22 10:58:07 +01:00
|
|
|
if (info->mailfrom)
|
|
|
|
fprintf(mp, "From: %s\n", info->mailfrom);
|
2006-05-15 03:27:04 +02:00
|
|
|
else
|
|
|
|
fprintf(mp, "From: " Name " monitoring <root>\n");
|
2010-11-22 10:58:07 +01:00
|
|
|
fprintf(mp, "To: %s\n", info->mailaddr);
|
|
|
|
fprintf(mp, "Subject: %s event on %s:%s\n\n",
|
|
|
|
event, dev, hname);
|
2001-08-23 04:33:20 +02:00
|
|
|
|
2010-11-22 10:58:07 +01:00
|
|
|
fprintf(mp,
|
|
|
|
"This is an automatically generated"
|
|
|
|
" mail message from " Name "\n");
|
2001-08-23 04:33:20 +02:00
|
|
|
fprintf(mp, "running on %s\n\n", hname);
|
|
|
|
|
2010-11-22 10:58:07 +01:00
|
|
|
fprintf(mp,
|
|
|
|
"A %s event had been detected on"
|
|
|
|
" md device %s.\n\n", event, dev);
|
2001-08-23 04:33:20 +02:00
|
|
|
|
2006-12-14 07:31:25 +01:00
|
|
|
if (disc && disc[0] != ' ')
|
2010-11-22 10:58:07 +01:00
|
|
|
fprintf(mp,
|
|
|
|
"It could be related to"
|
|
|
|
" component device %s.\n\n", disc);
|
2006-12-14 07:31:25 +01:00
|
|
|
if (disc && disc[0] == ' ')
|
|
|
|
fprintf(mp, "Extra information:%s.\n\n", disc);
|
2001-08-23 04:33:20 +02:00
|
|
|
|
|
|
|
fprintf(mp, "Faithfully yours, etc.\n");
|
2006-05-19 05:58:45 +02:00
|
|
|
|
|
|
|
mdstat = fopen("/proc/mdstat", "r");
|
|
|
|
if (mdstat) {
|
|
|
|
char buf[8192];
|
|
|
|
int n;
|
2010-11-22 10:58:07 +01:00
|
|
|
fprintf(mp,
|
|
|
|
"\nP.S. The /proc/mdstat file"
|
|
|
|
" currently contains the following:\n\n");
|
2006-05-19 05:58:45 +02:00
|
|
|
while ( (n=fread(buf, 1, sizeof(buf), mdstat)) > 0)
|
2010-11-22 10:58:07 +01:00
|
|
|
n=fwrite(buf, 1, n, mp);
|
2006-05-19 05:58:45 +02:00
|
|
|
fclose(mdstat);
|
|
|
|
}
|
2009-07-10 06:39:20 +02:00
|
|
|
pclose(mp);
|
2001-08-23 04:33:20 +02:00
|
|
|
}
|
|
|
|
}
|
2005-12-05 06:55:56 +01:00
|
|
|
|
|
|
|
/* log the event to syslog maybe */
|
2010-11-22 10:58:07 +01:00
|
|
|
if (info->dosyslog) {
|
2005-12-05 06:55:56 +01:00
|
|
|
/* Log at a different severity depending on the event.
|
|
|
|
*
|
|
|
|
* These are the critical events: */
|
|
|
|
if (strncmp(event, "Fail", 4)==0 ||
|
|
|
|
strncmp(event, "Degrade", 7)==0 ||
|
|
|
|
strncmp(event, "DeviceDisappeared", 17)==0)
|
|
|
|
priority = LOG_CRIT;
|
|
|
|
/* Good to know about, but are not failures: */
|
|
|
|
else if (strncmp(event, "Rebuild", 7)==0 ||
|
2006-05-15 03:35:47 +02:00
|
|
|
strncmp(event, "MoveSpare", 9)==0 ||
|
|
|
|
strncmp(event, "Spares", 6) != 0)
|
2005-12-05 06:55:56 +01:00
|
|
|
priority = LOG_WARNING;
|
|
|
|
/* Everything else: */
|
|
|
|
else
|
|
|
|
priority = LOG_INFO;
|
|
|
|
|
|
|
|
if (disc)
|
2010-11-22 10:58:07 +01:00
|
|
|
syslog(priority,
|
|
|
|
"%s event detected on md device %s,"
|
|
|
|
" component device %s", event, dev, disc);
|
2005-12-05 06:55:56 +01:00
|
|
|
else
|
2010-11-22 10:58:07 +01:00
|
|
|
syslog(priority,
|
|
|
|
"%s event detected on md device %s",
|
|
|
|
event, dev);
|
2005-12-05 06:55:56 +01:00
|
|
|
}
|
2001-08-23 04:33:20 +02:00
|
|
|
}
|
2006-12-14 07:31:22 +01:00
|
|
|
|
2010-11-22 10:58:07 +01:00
|
|
|
static int check_array(struct state *st, struct mdstat_ent *mdstat,
|
2010-11-22 10:58:07 +01:00
|
|
|
int test, struct alert_info *ainfo,
|
2010-11-22 10:58:07 +01:00
|
|
|
int increments)
|
2010-11-22 10:58:07 +01:00
|
|
|
{
|
2010-11-28 23:51:27 +01:00
|
|
|
/* Update the state 'st' to reflect any changes shown in mdstat,
|
|
|
|
* or found by directly examining the array, and return
|
|
|
|
* '1' if the array is degraded, or '0' if it is optimal (or dead).
|
|
|
|
*/
|
2010-11-22 10:58:07 +01:00
|
|
|
struct { int state, major, minor; } info[MaxDisks];
|
|
|
|
mdu_array_info_t array;
|
|
|
|
struct mdstat_ent *mse = NULL, *mse2;
|
|
|
|
char *dev = st->devname;
|
|
|
|
int fd;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (test)
|
2010-11-22 10:58:07 +01:00
|
|
|
alert("TestMessage", dev, NULL, ainfo);
|
2010-11-22 10:58:07 +01:00
|
|
|
fd = open(dev, O_RDONLY);
|
|
|
|
if (fd < 0) {
|
|
|
|
if (!st->err)
|
2010-11-22 10:58:07 +01:00
|
|
|
alert("DeviceDisappeared", dev, NULL, ainfo);
|
|
|
|
st->err=1;
|
2010-11-22 10:58:07 +01:00
|
|
|
return 0;
|
2010-11-22 10:58:07 +01:00
|
|
|
}
|
|
|
|
fcntl(fd, F_SETFD, FD_CLOEXEC);
|
|
|
|
if (ioctl(fd, GET_ARRAY_INFO, &array)<0) {
|
|
|
|
if (!st->err)
|
2010-11-22 10:58:07 +01:00
|
|
|
alert("DeviceDisappeared", dev, NULL, ainfo);
|
|
|
|
st->err=1;
|
2010-11-22 10:58:07 +01:00
|
|
|
close(fd);
|
2010-11-22 10:58:07 +01:00
|
|
|
return 0;
|
2010-11-22 10:58:07 +01:00
|
|
|
}
|
|
|
|
/* It's much easier to list what array levels can't
|
|
|
|
* have a device disappear than all of them that can
|
|
|
|
*/
|
|
|
|
if (array.level == 0 || array.level == -1) {
|
|
|
|
if (!st->err)
|
2010-11-22 10:58:07 +01:00
|
|
|
alert("DeviceDisappeared", dev, "Wrong-Level", ainfo);
|
2010-11-22 10:58:07 +01:00
|
|
|
st->err = 1;
|
|
|
|
close(fd);
|
2010-11-22 10:58:07 +01:00
|
|
|
return 0;
|
2010-11-22 10:58:07 +01:00
|
|
|
}
|
|
|
|
if (st->devnum == INT_MAX) {
|
|
|
|
struct stat stb;
|
|
|
|
if (fstat(fd, &stb) == 0 &&
|
|
|
|
(S_IFMT&stb.st_mode)==S_IFBLK) {
|
|
|
|
if (major(stb.st_rdev) == MD_MAJOR)
|
|
|
|
st->devnum = minor(stb.st_rdev);
|
|
|
|
else
|
|
|
|
st->devnum = -1- (minor(stb.st_rdev)>>6);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (mse2 = mdstat ; mse2 ; mse2=mse2->next)
|
|
|
|
if (mse2->devnum == st->devnum) {
|
|
|
|
mse2->devnum = INT_MAX; /* flag it as "used" */
|
|
|
|
mse = mse2;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!mse) {
|
|
|
|
/* duplicated array in statelist
|
|
|
|
* or re-created after reading mdstat*/
|
|
|
|
st->err = 1;
|
|
|
|
close(fd);
|
2010-11-22 10:58:07 +01:00
|
|
|
return 0;
|
2010-11-22 10:58:07 +01:00
|
|
|
}
|
|
|
|
/* this array is in /proc/mdstat */
|
|
|
|
if (array.utime == 0)
|
|
|
|
/* external arrays don't update utime, so
|
|
|
|
* just make sure it is always different. */
|
|
|
|
array.utime = st->utime + 1;;
|
|
|
|
|
|
|
|
if (st->utime == array.utime &&
|
|
|
|
st->failed == array.failed_disks &&
|
|
|
|
st->working == array.working_disks &&
|
|
|
|
st->spare == array.spare_disks &&
|
|
|
|
(mse == NULL || (
|
|
|
|
mse->percent == st->percent
|
|
|
|
))) {
|
|
|
|
close(fd);
|
|
|
|
st->err = 0;
|
2010-11-28 23:51:27 +01:00
|
|
|
if ((st->active < st->raid) && st->spare == 0)
|
|
|
|
return 1;
|
|
|
|
else
|
|
|
|
return 0;
|
2010-11-22 10:58:07 +01:00
|
|
|
}
|
|
|
|
if (st->utime == 0 && /* new array */
|
|
|
|
mse->pattern && strchr(mse->pattern, '_') /* degraded */
|
|
|
|
)
|
2010-11-22 10:58:07 +01:00
|
|
|
alert("DegradedArray", dev, NULL, ainfo);
|
2010-11-22 10:58:07 +01:00
|
|
|
|
|
|
|
if (st->utime == 0 && /* new array */
|
|
|
|
st->expected_spares > 0 &&
|
|
|
|
array.spare_disks < st->expected_spares)
|
2010-11-22 10:58:07 +01:00
|
|
|
alert("SparesMissing", dev, NULL, ainfo);
|
2010-11-22 10:58:07 +01:00
|
|
|
if (st->percent == -1 &&
|
|
|
|
mse->percent >= 0)
|
2010-11-22 10:58:07 +01:00
|
|
|
alert("RebuildStarted", dev, NULL, ainfo);
|
2010-11-22 10:58:07 +01:00
|
|
|
if (st->percent >= 0 &&
|
|
|
|
mse->percent >= 0 &&
|
|
|
|
(mse->percent / increments) > (st->percent / increments)) {
|
|
|
|
char percentalert[15]; // "RebuildNN" (10 chars) or "RebuildStarted" (15 chars)
|
|
|
|
|
|
|
|
if((mse->percent / increments) == 0)
|
|
|
|
snprintf(percentalert, sizeof(percentalert), "RebuildStarted");
|
|
|
|
else
|
|
|
|
snprintf(percentalert, sizeof(percentalert), "Rebuild%02d", mse->percent);
|
|
|
|
|
2010-11-22 10:58:07 +01:00
|
|
|
alert(percentalert, dev, NULL, ainfo);
|
2010-11-22 10:58:07 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (mse->percent == -1 &&
|
|
|
|
st->percent >= 0) {
|
|
|
|
/* Rebuild/sync/whatever just finished.
|
|
|
|
* If there is a number in /mismatch_cnt,
|
|
|
|
* we should report that.
|
|
|
|
*/
|
|
|
|
struct mdinfo *sra =
|
|
|
|
sysfs_read(-1, st->devnum, GET_MISMATCH);
|
|
|
|
if (sra && sra->mismatch_cnt > 0) {
|
|
|
|
char cnt[40];
|
|
|
|
sprintf(cnt, " mismatches found: %d", sra->mismatch_cnt);
|
2010-11-22 10:58:07 +01:00
|
|
|
alert("RebuildFinished", dev, cnt, ainfo);
|
2010-11-22 10:58:07 +01:00
|
|
|
} else
|
2010-11-22 10:58:07 +01:00
|
|
|
alert("RebuildFinished", dev, NULL, ainfo);
|
2010-11-22 10:58:07 +01:00
|
|
|
if (sra)
|
|
|
|
free(sra);
|
|
|
|
}
|
|
|
|
st->percent = mse->percent;
|
|
|
|
|
|
|
|
for (i=0; i<MaxDisks && i <= array.raid_disks + array.nr_disks;
|
|
|
|
i++) {
|
|
|
|
mdu_disk_info_t disc;
|
|
|
|
disc.number = i;
|
|
|
|
if (ioctl(fd, GET_DISK_INFO, &disc) >= 0) {
|
|
|
|
info[i].state = disc.state;
|
|
|
|
info[i].major = disc.major;
|
|
|
|
info[i].minor = disc.minor;
|
|
|
|
} else
|
|
|
|
info[i].major = info[i].minor = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (strncmp(mse->metadata_version, "external:", 9) == 0 &&
|
|
|
|
is_subarray(mse->metadata_version+9))
|
|
|
|
st->parent_dev =
|
|
|
|
devname2devnum(mse->metadata_version+10);
|
|
|
|
else
|
|
|
|
st->parent_dev = NoMdDev;
|
|
|
|
if (st->metadata == NULL &&
|
|
|
|
st->parent_dev == NoMdDev)
|
|
|
|
st->metadata = super_by_fd(fd, NULL);
|
|
|
|
|
|
|
|
close(fd);
|
|
|
|
|
|
|
|
for (i=0; i<MaxDisks; i++) {
|
|
|
|
mdu_disk_info_t disc = {0,0,0,0,0};
|
|
|
|
int newstate=0;
|
|
|
|
int change;
|
|
|
|
char *dv = NULL;
|
|
|
|
disc.number = i;
|
|
|
|
if (i > array.raid_disks + array.nr_disks) {
|
|
|
|
newstate = 0;
|
|
|
|
disc.major = disc.minor = 0;
|
|
|
|
} else if (info[i].major || info[i].minor) {
|
|
|
|
newstate = info[i].state;
|
|
|
|
dv = map_dev(info[i].major, info[i].minor, 1);
|
|
|
|
disc.state = newstate;
|
|
|
|
disc.major = info[i].major;
|
|
|
|
disc.minor = info[i].minor;
|
|
|
|
} else if (mse && mse->pattern && i < (int)strlen(mse->pattern)) {
|
|
|
|
switch(mse->pattern[i]) {
|
|
|
|
case 'U': newstate = 6 /* ACTIVE/SYNC */; break;
|
|
|
|
case '_': newstate = 0; break;
|
|
|
|
}
|
|
|
|
disc.major = disc.minor = 0;
|
|
|
|
}
|
|
|
|
if (dv == NULL && st->devid[i])
|
|
|
|
dv = map_dev(major(st->devid[i]),
|
|
|
|
minor(st->devid[i]), 1);
|
|
|
|
change = newstate ^ st->devstate[i];
|
|
|
|
if (st->utime && change && !st->err) {
|
|
|
|
if (i < array.raid_disks &&
|
|
|
|
(((newstate&change)&(1<<MD_DISK_FAULTY)) ||
|
|
|
|
((st->devstate[i]&change)&(1<<MD_DISK_ACTIVE)) ||
|
|
|
|
((st->devstate[i]&change)&(1<<MD_DISK_SYNC)))
|
|
|
|
)
|
2010-11-22 10:58:07 +01:00
|
|
|
alert("Fail", dev, dv, ainfo);
|
2010-11-22 10:58:07 +01:00
|
|
|
else if (i >= array.raid_disks &&
|
|
|
|
(disc.major || disc.minor) &&
|
|
|
|
st->devid[i] == makedev(disc.major, disc.minor) &&
|
|
|
|
((newstate&change)&(1<<MD_DISK_FAULTY))
|
|
|
|
)
|
2010-11-22 10:58:07 +01:00
|
|
|
alert("FailSpare", dev, dv, ainfo);
|
2010-11-22 10:58:07 +01:00
|
|
|
else if (i < array.raid_disks &&
|
|
|
|
! (newstate & (1<<MD_DISK_REMOVED)) &&
|
|
|
|
(((st->devstate[i]&change)&(1<<MD_DISK_FAULTY)) ||
|
|
|
|
((newstate&change)&(1<<MD_DISK_ACTIVE)) ||
|
|
|
|
((newstate&change)&(1<<MD_DISK_SYNC)))
|
|
|
|
)
|
2010-11-22 10:58:07 +01:00
|
|
|
alert("SpareActive", dev, dv, ainfo);
|
2010-11-22 10:58:07 +01:00
|
|
|
}
|
|
|
|
st->devstate[i] = newstate;
|
|
|
|
st->devid[i] = makedev(disc.major, disc.minor);
|
|
|
|
}
|
|
|
|
st->active = array.active_disks;
|
|
|
|
st->working = array.working_disks;
|
|
|
|
st->spare = array.spare_disks;
|
|
|
|
st->failed = array.failed_disks;
|
|
|
|
st->utime = array.utime;
|
|
|
|
st->raid = array.raid_disks;
|
|
|
|
st->err = 0;
|
2010-11-22 10:58:07 +01:00
|
|
|
if ((st->active < st->raid) && st->spare == 0)
|
|
|
|
return 1;
|
|
|
|
return 0;
|
2010-11-22 10:58:07 +01:00
|
|
|
}
|
|
|
|
|
2010-11-30 13:44:45 +01:00
|
|
|
static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist,
|
2010-11-22 10:58:07 +01:00
|
|
|
int test, struct alert_info *info)
|
2010-11-22 10:58:07 +01:00
|
|
|
{
|
|
|
|
struct mdstat_ent *mse;
|
|
|
|
int new_found = 0;
|
|
|
|
|
|
|
|
for (mse=mdstat; mse; mse=mse->next)
|
|
|
|
if (mse->devnum != INT_MAX &&
|
|
|
|
(!mse->level || /* retrieve containers */
|
|
|
|
(strcmp(mse->level, "raid0") != 0 &&
|
|
|
|
strcmp(mse->level, "linear") != 0))
|
|
|
|
) {
|
2010-11-22 10:58:07 +01:00
|
|
|
struct state *st = calloc(1, sizeof *st);
|
2010-11-22 10:58:07 +01:00
|
|
|
mdu_array_info_t array;
|
|
|
|
int fd;
|
|
|
|
if (st == NULL)
|
|
|
|
continue;
|
|
|
|
st->devname = strdup(get_md_name(mse->devnum));
|
|
|
|
if ((fd = open(st->devname, O_RDONLY)) < 0 ||
|
|
|
|
ioctl(fd, GET_ARRAY_INFO, &array)< 0) {
|
|
|
|
/* no such array */
|
|
|
|
if (fd >=0) close(fd);
|
|
|
|
put_md_name(st->devname);
|
|
|
|
free(st->devname);
|
|
|
|
if (st->metadata) {
|
|
|
|
st->metadata->ss->free_super(st->metadata);
|
|
|
|
free(st->metadata);
|
|
|
|
}
|
|
|
|
free(st);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
close(fd);
|
2010-11-30 13:44:45 +01:00
|
|
|
st->next = *statelist;
|
2010-11-22 10:58:07 +01:00
|
|
|
st->err = 1;
|
|
|
|
st->devnum = mse->devnum;
|
|
|
|
st->percent = -2;
|
|
|
|
st->expected_spares = -1;
|
|
|
|
if (strncmp(mse->metadata_version, "external:", 9) == 0 &&
|
|
|
|
is_subarray(mse->metadata_version+9))
|
|
|
|
st->parent_dev =
|
|
|
|
devname2devnum(mse->metadata_version+10);
|
|
|
|
else
|
|
|
|
st->parent_dev = NoMdDev;
|
2010-11-30 13:44:45 +01:00
|
|
|
*statelist = st;
|
2010-11-22 10:58:07 +01:00
|
|
|
if (test)
|
2010-11-22 10:58:07 +01:00
|
|
|
alert("TestMessage", st->devname, NULL, info);
|
|
|
|
alert("NewArray", st->devname, NULL, info);
|
2010-11-22 10:58:07 +01:00
|
|
|
new_found = 1;
|
|
|
|
}
|
|
|
|
return new_found;
|
|
|
|
}
|
|
|
|
|
2011-01-11 11:36:37 +01:00
|
|
|
static int get_min_spare_size_required(struct state *st, unsigned long long *sizep)
|
2010-11-22 10:58:07 +01:00
|
|
|
{
|
|
|
|
int fd;
|
|
|
|
|
|
|
|
if (!st->metadata ||
|
2011-01-17 02:46:14 +01:00
|
|
|
!st->metadata->ss->min_acceptable_spare_size) {
|
|
|
|
*sizep = 0;
|
2011-01-11 11:36:37 +01:00
|
|
|
return 0;
|
2011-01-17 02:46:14 +01:00
|
|
|
}
|
2010-11-22 10:58:07 +01:00
|
|
|
|
|
|
|
fd = open(st->devname, O_RDONLY);
|
|
|
|
if (fd < 0)
|
2011-01-11 11:36:37 +01:00
|
|
|
return 1;
|
|
|
|
if (st->metadata->ss->external)
|
|
|
|
st->metadata->ss->load_container(st->metadata, fd, st->devname);
|
|
|
|
else
|
|
|
|
st->metadata->ss->load_super(st->metadata, fd, st->devname);
|
2010-11-22 10:58:07 +01:00
|
|
|
close(fd);
|
2011-01-11 11:36:37 +01:00
|
|
|
if (!st->metadata->sb)
|
|
|
|
return 1;
|
|
|
|
*sizep = st->metadata->ss->min_acceptable_spare_size(st->metadata);
|
2010-11-22 10:58:07 +01:00
|
|
|
st->metadata->ss->free_super(st->metadata);
|
|
|
|
|
2011-01-11 11:36:37 +01:00
|
|
|
return 0;
|
2010-11-22 10:58:07 +01:00
|
|
|
}
|
|
|
|
|
2010-12-16 15:16:12 +01:00
|
|
|
static int check_donor(struct state *from, struct state *to)
|
2010-11-22 10:58:07 +01:00
|
|
|
{
|
2010-11-22 10:58:07 +01:00
|
|
|
struct state *sub;
|
|
|
|
|
2010-11-22 10:58:07 +01:00
|
|
|
if (from == to)
|
|
|
|
return 0;
|
2010-11-22 10:58:07 +01:00
|
|
|
if (from->parent)
|
|
|
|
/* Cannot move from a member */
|
2010-11-22 10:58:07 +01:00
|
|
|
return 0;
|
2010-11-28 23:51:27 +01:00
|
|
|
if (from->err)
|
|
|
|
return 0;
|
2010-11-22 10:58:07 +01:00
|
|
|
for (sub = from->subarray; sub; sub = sub->subarray)
|
|
|
|
/* If source array has degraded subarrays, don't
|
|
|
|
* remove anything
|
|
|
|
*/
|
|
|
|
if (sub->active < sub->raid)
|
|
|
|
return 0;
|
|
|
|
if (from->metadata->ss->external == 0)
|
|
|
|
if (from->active < from->raid)
|
|
|
|
return 0;
|
2010-11-22 10:58:07 +01:00
|
|
|
if (from->spare <= 0)
|
|
|
|
return 0;
|
2010-11-22 10:58:07 +01:00
|
|
|
return 1;
|
2010-11-22 10:58:07 +01:00
|
|
|
}
|
|
|
|
|
2010-11-26 11:49:33 +01:00
|
|
|
static dev_t choose_spare(struct state *from, struct state *to,
|
2010-12-03 04:05:11 +01:00
|
|
|
struct domainlist *domlist, unsigned long long min_size)
|
2010-11-25 08:37:23 +01:00
|
|
|
{
|
|
|
|
int d;
|
2010-11-26 11:49:33 +01:00
|
|
|
dev_t dev = 0;
|
2010-11-25 08:37:23 +01:00
|
|
|
|
|
|
|
for (d = from->raid; !dev && d < MaxDisks; d++) {
|
|
|
|
if (from->devid[d] > 0 &&
|
|
|
|
from->devstate[d] == 0) {
|
|
|
|
struct dev_policy *pol;
|
|
|
|
unsigned long long dev_size;
|
|
|
|
|
|
|
|
if (min_size &&
|
|
|
|
dev_size_from_id(from->devid[d], &dev_size) &&
|
|
|
|
dev_size < min_size)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
pol = devnum_policy(from->devid[d]);
|
|
|
|
if (from->spare_group)
|
|
|
|
pol_add(&pol, pol_domain,
|
|
|
|
from->spare_group, NULL);
|
|
|
|
if (domain_test(domlist, pol, to->metadata->ss->name))
|
|
|
|
dev = from->devid[d];
|
|
|
|
dev_policy_free(pol);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return dev;
|
|
|
|
}
|
|
|
|
|
2010-11-26 11:49:33 +01:00
|
|
|
static dev_t container_choose_spare(struct state *from, struct state *to,
|
2010-12-03 04:05:11 +01:00
|
|
|
struct domainlist *domlist,
|
|
|
|
unsigned long long min_size)
|
2010-11-25 08:58:27 +01:00
|
|
|
{
|
|
|
|
/* This is similar to choose_spare, but we cannot trust devstate,
|
|
|
|
* so we need to read the metadata instead
|
|
|
|
*/
|
2011-01-05 04:34:14 +01:00
|
|
|
struct mdinfo *list;
|
2010-11-25 08:58:27 +01:00
|
|
|
struct supertype *st = from->metadata;
|
2010-11-28 23:51:27 +01:00
|
|
|
int fd = open(from->devname, O_RDONLY);
|
2010-11-25 08:58:27 +01:00
|
|
|
int err;
|
2010-11-26 11:49:33 +01:00
|
|
|
dev_t dev = 0;
|
2010-11-25 08:58:27 +01:00
|
|
|
|
|
|
|
if (fd < 0)
|
|
|
|
return 0;
|
2011-01-05 04:34:14 +01:00
|
|
|
if (!st->ss->getinfo_super_disks) {
|
|
|
|
close(fd);
|
2010-11-25 08:58:27 +01:00
|
|
|
return 0;
|
2011-01-05 04:34:14 +01:00
|
|
|
}
|
2010-11-25 08:58:27 +01:00
|
|
|
|
|
|
|
err = st->ss->load_container(st, fd, NULL);
|
|
|
|
close(fd);
|
|
|
|
if (err)
|
|
|
|
return 0;
|
|
|
|
|
2011-01-05 04:34:14 +01:00
|
|
|
/* We only need one spare so full list not needed */
|
|
|
|
list = container_choose_spares(st, min_size, domlist, from->spare_group,
|
|
|
|
to->metadata->ss->name, 1);
|
|
|
|
if (list) {
|
|
|
|
struct mdinfo *disks = list->devs;
|
|
|
|
if (disks)
|
|
|
|
dev = makedev(disks->disk.major, disks->disk.minor);
|
|
|
|
sysfs_free(list);
|
2010-11-25 08:58:27 +01:00
|
|
|
}
|
2011-01-05 04:34:14 +01:00
|
|
|
st->ss->free_super(st);
|
2010-11-25 08:58:27 +01:00
|
|
|
return dev;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-11-22 10:58:07 +01:00
|
|
|
static void try_spare_migration(struct state *statelist, struct alert_info *info)
|
2010-11-22 10:58:07 +01:00
|
|
|
{
|
2010-11-22 10:58:07 +01:00
|
|
|
struct state *from;
|
|
|
|
struct state *st;
|
2010-11-22 10:58:07 +01:00
|
|
|
|
|
|
|
link_containers_with_subarrays(statelist);
|
2010-11-22 10:58:07 +01:00
|
|
|
for (st = statelist; st; st = st->next)
|
|
|
|
if (st->active < st->raid &&
|
2010-11-26 14:31:15 +01:00
|
|
|
st->spare == 0 && !st->err) {
|
2010-11-22 10:58:07 +01:00
|
|
|
struct domainlist *domlist = NULL;
|
|
|
|
int d;
|
2010-11-22 10:58:07 +01:00
|
|
|
struct state *to = st;
|
2010-12-03 04:05:11 +01:00
|
|
|
unsigned long long min_size;
|
2010-11-22 10:58:07 +01:00
|
|
|
|
2011-01-13 14:22:16 +01:00
|
|
|
if (to->parent_dev != NoMdDev && !to->parent)
|
|
|
|
/* subarray monitored without parent container
|
|
|
|
* we can't move spares here */
|
|
|
|
continue;
|
|
|
|
|
2010-11-22 10:58:07 +01:00
|
|
|
if (to->parent)
|
|
|
|
/* member of a container */
|
|
|
|
to = to->parent;
|
2010-11-22 10:58:07 +01:00
|
|
|
|
2011-01-11 11:36:37 +01:00
|
|
|
if (get_min_spare_size_required(to, &min_size))
|
|
|
|
continue;
|
2010-12-03 04:11:29 +01:00
|
|
|
if (to->metadata->ss->external) {
|
|
|
|
/* We must make sure there is
|
|
|
|
* no suitable spare in container already.
|
|
|
|
* If there is we don't add more */
|
|
|
|
dev_t devid = container_choose_spare(
|
|
|
|
to, to, NULL, min_size);
|
|
|
|
if (devid > 0)
|
|
|
|
continue;
|
|
|
|
}
|
2010-11-22 10:58:07 +01:00
|
|
|
for (d = 0; d < MaxDisks; d++)
|
|
|
|
if (to->devid[d])
|
|
|
|
domainlist_add_dev(&domlist,
|
|
|
|
to->devid[d],
|
|
|
|
to->metadata->ss->name);
|
|
|
|
if (to->spare_group)
|
|
|
|
domain_add(&domlist, to->spare_group);
|
2010-12-16 15:16:12 +01:00
|
|
|
/*
|
|
|
|
* No spare migration if the destination
|
|
|
|
* has no domain. Skip this array.
|
|
|
|
*/
|
|
|
|
if (!domlist)
|
|
|
|
continue;
|
2010-11-25 08:37:23 +01:00
|
|
|
for (from=statelist ; from ; from=from->next) {
|
2010-11-26 11:49:33 +01:00
|
|
|
dev_t devid;
|
2010-12-16 15:16:12 +01:00
|
|
|
if (!check_donor(from, to))
|
2010-11-25 08:37:23 +01:00
|
|
|
continue;
|
2010-11-25 08:58:27 +01:00
|
|
|
if (from->metadata->ss->external)
|
|
|
|
devid = container_choose_spare(
|
2010-12-03 04:05:11 +01:00
|
|
|
from, to, domlist, min_size);
|
2010-11-25 08:58:27 +01:00
|
|
|
else
|
2010-12-03 04:05:11 +01:00
|
|
|
devid = choose_spare(from, to, domlist,
|
|
|
|
min_size);
|
2010-11-25 08:37:23 +01:00
|
|
|
if (devid > 0
|
2011-01-05 04:34:32 +01:00
|
|
|
&& move_spare(from->devname, to->devname, devid)) {
|
|
|
|
alert("MoveSpare", to->devname, from->devname, info);
|
|
|
|
break;
|
|
|
|
}
|
2010-11-25 08:37:23 +01:00
|
|
|
}
|
2010-11-22 10:58:07 +01:00
|
|
|
domain_free(domlist);
|
2010-11-22 10:58:07 +01:00
|
|
|
}
|
|
|
|
}
|
2010-11-22 10:58:07 +01:00
|
|
|
|
|
|
|
/* search the statelist to connect external
|
|
|
|
* metadata subarrays with their containers
|
|
|
|
* We always completely rebuild the tree from scratch as
|
|
|
|
* that is safest considering the possibility of entries
|
|
|
|
* disappearing or changing.
|
|
|
|
*/
|
|
|
|
static void link_containers_with_subarrays(struct state *list)
|
|
|
|
{
|
|
|
|
struct state *st;
|
|
|
|
struct state *cont;
|
|
|
|
for (st = list; st; st = st->next) {
|
|
|
|
st->parent = NULL;
|
|
|
|
st->subarray = NULL;
|
|
|
|
}
|
|
|
|
for (st = list; st; st = st->next)
|
|
|
|
if (st->parent_dev != NoMdDev)
|
|
|
|
for (cont = list; cont; cont = cont->next)
|
|
|
|
if (!cont->err &&
|
|
|
|
cont->parent_dev == NoMdDev &&
|
|
|
|
cont->devnum == st->parent_dev) {
|
|
|
|
st->parent = cont;
|
|
|
|
st->subarray = cont->subarray;
|
|
|
|
cont->subarray = st;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-12-14 07:31:22 +01:00
|
|
|
/* Not really Monitor but ... */
|
|
|
|
int Wait(char *dev)
|
|
|
|
{
|
|
|
|
struct stat stb;
|
|
|
|
int devnum;
|
|
|
|
int rv = 1;
|
|
|
|
|
|
|
|
if (stat(dev, &stb) != 0) {
|
|
|
|
fprintf(stderr, Name ": Cannot find %s: %s\n", dev,
|
|
|
|
strerror(errno));
|
|
|
|
return 2;
|
|
|
|
}
|
2008-09-16 05:58:42 +02:00
|
|
|
devnum = stat2devnum(&stb);
|
2006-12-14 07:31:22 +01:00
|
|
|
|
|
|
|
while(1) {
|
|
|
|
struct mdstat_ent *ms = mdstat_read(1, 0);
|
|
|
|
struct mdstat_ent *e;
|
|
|
|
|
|
|
|
for (e=ms ; e; e=e->next)
|
|
|
|
if (e->devnum == devnum)
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (!e || e->percent < 0) {
|
2008-12-03 06:45:43 +01:00
|
|
|
if (e && e->metadata_version &&
|
2008-09-16 05:58:42 +02:00
|
|
|
strncmp(e->metadata_version, "external:", 9) == 0) {
|
|
|
|
if (is_subarray(&e->metadata_version[9]))
|
|
|
|
ping_monitor(&e->metadata_version[9]);
|
|
|
|
else
|
|
|
|
ping_monitor(devnum2devname(devnum));
|
|
|
|
}
|
2006-12-14 07:31:22 +01:00
|
|
|
free_mdstat(ms);
|
|
|
|
return rv;
|
|
|
|
}
|
2009-01-07 23:25:31 +01:00
|
|
|
free_mdstat(ms);
|
2006-12-14 07:31:22 +01:00
|
|
|
rv = 0;
|
|
|
|
mdstat_wait(5);
|
|
|
|
}
|
|
|
|
}
|