mdadm.8: Man page updates
General review and update of mdadm.8
This commit is contained in:
parent
222a7bfd2e
commit
e0fe762a63
|
@ -188,6 +188,8 @@ int Assemble(struct supertype *st, char *mddev,
|
|||
if (!devlist &&
|
||||
ident->uuid_set == 0 &&
|
||||
ident->super_minor < 0 &&
|
||||
ident->name[0] == 0 &&
|
||||
(ident->container == NULL || ident->member == NULL) &&
|
||||
ident->devices == NULL) {
|
||||
fprintf(stderr, Name ": No identity information available for %s - cannot assemble.\n",
|
||||
mddev ? mddev : "further assembly");
|
||||
|
|
10
Makefile
10
Makefile
|
@ -105,7 +105,7 @@ ASSEMBLE_SRCS += $(ASSEMBLE_AUTO_SRCS)
|
|||
ASSEMBLE_FLAGS += -DMDASSEMBLE_AUTO
|
||||
endif
|
||||
|
||||
all : mdadm mdmon mdadm.man md.man mdadm.conf.man
|
||||
all : mdadm mdmon mdadm.man md.man mdadm.conf.man mdmon.man
|
||||
|
||||
everything: all mdadm.static swap_super test_stripe \
|
||||
mdassemble mdassemble.auto mdassemble.static mdassemble.man \
|
||||
|
@ -167,6 +167,9 @@ mdassemble.klibc : $(ASSEMBLE_SRCS) mdadm.h
|
|||
mdadm.man : mdadm.8
|
||||
nroff -man mdadm.8 > mdadm.man
|
||||
|
||||
mdmon.man : mdmon.8
|
||||
nroff -man mdmon.8 > mdmon.man
|
||||
|
||||
md.man : md.4
|
||||
nroff -man md.4 > md.man
|
||||
|
||||
|
@ -198,8 +201,9 @@ install-uclibc : mdadm.uclibc install-man
|
|||
install-klibc : mdadm.klibc install-man
|
||||
$(INSTALL) -D $(STRIP) -m 755 mdadm.klibc $(DESTDIR)$(BINDIR)/mdadm
|
||||
|
||||
install-man: mdadm.8 md.4 mdadm.conf.5
|
||||
install-man: mdadm.8 md.4 mdadm.conf.5 mdmon.8
|
||||
$(INSTALL) -D -m 644 mdadm.8 $(DESTDIR)$(MAN8DIR)/mdadm.8
|
||||
$(INSTALL) -D -m 644 mdmon.8 $(DESTDIR)$(MAN8DIR)/mdmon.8
|
||||
$(INSTALL) -D -m 644 md.4 $(DESTDIR)$(MAN4DIR)/md.4
|
||||
$(INSTALL) -D -m 644 mdadm.conf.5 $(DESTDIR)$(MAN5DIR)/mdadm.conf.5
|
||||
|
||||
|
@ -207,7 +211,7 @@ install-udev: udev-md-raid.rules
|
|||
$(INSTALL) -D -m 644 udev-md-raid.rules $(DESTDIR)/lib/udev/rules.d/64-md-raid.rules
|
||||
|
||||
uninstall:
|
||||
rm -f $(DESTDIR)$(MAN8DIR)/mdadm.8 md.4 $(DESTDIR)$(MAN4DIR)/md.4 $(DESTDIR)$(MAN5DIR)/mdadm.conf.5 $(DESTDIR)$(BINDIR)/mdadm
|
||||
rm -f $(DESTDIR)$(MAN8DIR)/mdadm.8 $(DESTDIR)$(MAN8DIR)/mdmon.8 $(DESTDIR)$(MAN4DIR)/md.4 $(DESTDIR)$(MAN5DIR)/mdadm.conf.5 $(DESTDIR)$(BINDIR)/mdadm
|
||||
|
||||
test: mdadm mdmon test_stripe swap_super
|
||||
@echo "Please run 'sh ./test' as root"
|
||||
|
|
2
ReadMe.c
2
ReadMe.c
|
@ -139,7 +139,9 @@ struct option long_options[] = {
|
|||
{"write-mostly",0, 0, 'W'},
|
||||
{"re-add", 0, 0, ReAdd},
|
||||
{"homehost", 1, 0, HomeHost},
|
||||
#if 0
|
||||
{"auto-update-homehost", 0, 0, AutoHomeHost},
|
||||
#endif
|
||||
{"symlinks", 1, 0, Symlinks},
|
||||
|
||||
/* For assemble */
|
||||
|
|
80
md.4
80
md.4
|
@ -11,6 +11,8 @@ md \- Multiple Device driver aka Linux Software RAID
|
|||
.BI /dev/md n
|
||||
.br
|
||||
.BI /dev/md/ n
|
||||
.br
|
||||
.BR /dev/md/ name
|
||||
.SH DESCRIPTION
|
||||
The
|
||||
.B md
|
||||
|
@ -37,15 +39,17 @@ including RAID0 (striped array), LINEAR (catenated array),
|
|||
MULTIPATH (a set of different interfaces to the same device),
|
||||
and FAULTY (a layer over a single device into which errors can be injected).
|
||||
|
||||
.SS MD SUPER BLOCK
|
||||
Each device in an array may have a
|
||||
.I superblock
|
||||
which records information about the structure and state of the array.
|
||||
.SS MD METADATA
|
||||
Each device in an array may have some
|
||||
.I metadata
|
||||
stored in the device. This metadata is sometimes called a
|
||||
.BR superblock .
|
||||
The metadata records information about the structure and state of the array.
|
||||
This allows the array to be reliably re-assembled after a shutdown.
|
||||
|
||||
From Linux kernel version 2.6.10,
|
||||
.B md
|
||||
provides support for two different formats of this superblock, and
|
||||
provides support for two different formats of metadata, and
|
||||
other formats can be added. Prior to this release, only one format is
|
||||
supported.
|
||||
|
||||
|
@ -66,11 +70,11 @@ normally 1K long, but can be longer. It is normally stored between 8K
|
|||
and 12K from the end of the device, on a 4K boundary, though
|
||||
variations can be stored at the start of the device (version 1.1) or 4K from
|
||||
the start of the device (version 1.2).
|
||||
This superblock format stores multibyte data in a
|
||||
This metadata format stores multibyte data in a
|
||||
processor-independent format and supports up to hundreds of
|
||||
component devices (version 0.90 only supports 28).
|
||||
|
||||
The superblock contains, among other things:
|
||||
The metadata contains, among other things:
|
||||
.TP
|
||||
LEVEL
|
||||
The manner in which the devices are arranged into the array
|
||||
|
@ -80,6 +84,7 @@ UUID
|
|||
a 128 bit Universally Unique Identifier that identifies the array that
|
||||
contains this device.
|
||||
|
||||
.PP
|
||||
When a version 0.90 array is being reshaped (e.g. adding extra devices
|
||||
to a RAID5), the version number is temporarily set to 0.91. This
|
||||
ensures that if the reshape process is stopped in the middle (e.g. by
|
||||
|
@ -88,7 +93,7 @@ not support reshaping, then the array will not be assembled (which
|
|||
would cause data corruption) but will be left untouched until a kernel
|
||||
that can complete the reshape processes is used.
|
||||
|
||||
.SS ARRAYS WITHOUT SUPERBLOCKS
|
||||
.SS ARRAYS WITHOUT METADATA
|
||||
While it is usually best to create arrays with superblocks so that
|
||||
they can be assembled reliably, there are some circumstances when an
|
||||
array without superblocks is preferred. These include:
|
||||
|
@ -118,6 +123,40 @@ configuration that does not use a superblock, and to maintain the state of
|
|||
the array elsewhere. While not encouraged for general us, it does
|
||||
have special-purpose uses and is supported.
|
||||
|
||||
.SS ARRAYS WITH EXTERNAL METADATA
|
||||
|
||||
From release 2.6.28, the
|
||||
.I md
|
||||
driver supports arrays with externally managed metadata. That is,
|
||||
the metadata is not managed by the kernel by rather by a user-space
|
||||
program which is external to the kernel. This allows support for a
|
||||
variety of metadata formats without cluttering the kernel with lots of
|
||||
details.
|
||||
.PP
|
||||
.I md
|
||||
is able to communicate with the user-space program through various
|
||||
sysfs attributes so that it can make appropriate changes to the
|
||||
metadata \- for example to make a device as faulty. When necessary,
|
||||
.I md
|
||||
will wait for the program to acknowledge the event by writing to a
|
||||
sysfs attribute.
|
||||
The manual page for
|
||||
.IR mdmon (8)
|
||||
contains more detail about this interaction.
|
||||
|
||||
.SS CONTAINERS
|
||||
Many metadata formats use a single block of metadata to describe a
|
||||
number of different arrays which all use the same set of devices.
|
||||
In this case it is helpful for the kernel to know about the full set
|
||||
of devices as a whole. This set is known to md as a
|
||||
.IR container .
|
||||
A container is an
|
||||
.I md
|
||||
array with externally managed metadata and with device offset and size
|
||||
so that it just covers the metadata part of the devices. The
|
||||
remainder of each device is available to be incorporated into various
|
||||
arrays.
|
||||
|
||||
.SS LINEAR
|
||||
|
||||
A linear array simply catenates the available space on each
|
||||
|
@ -138,12 +177,12 @@ A RAID0 array (which has zero redundancy) is also known as a
|
|||
striped array.
|
||||
A RAID0 array is configured at creation with a
|
||||
.B "Chunk Size"
|
||||
which must be a power of two, and at least 4 kibibytes.
|
||||
which must be a power of two (prior to Linux 2.6.31), and at least 4
|
||||
kibibytes.
|
||||
|
||||
The RAID0 driver assigns the first chunk of the array to the first
|
||||
device, the second chunk to the second device, and so on until all
|
||||
drives have been assigned one chunk. This collection of chunks forms
|
||||
a
|
||||
drives have been assigned one chunk. This collection of chunks forms a
|
||||
.BR stripe .
|
||||
Further chunks are gathered into stripes in the same way, and are
|
||||
assigned to the remaining space in the drives.
|
||||
|
@ -175,6 +214,11 @@ multiple sequential streams or a random workload will use more than one
|
|||
spindle. In theory, having an N-disk RAID1 will allow N sequential
|
||||
threads to read from all disks.
|
||||
|
||||
Individual devices in a RAID1 can be marked as "write-mostly".
|
||||
This drives are excluded from the normal read balancing and will only
|
||||
be read from when there is no other option. This can be useful for
|
||||
devices connected over a slow link.
|
||||
|
||||
.SS RAID4
|
||||
|
||||
A RAID4 array is like a RAID0 array with an extra device for storing
|
||||
|
@ -274,7 +318,11 @@ A MULTIPATH array is composed of a number of logically different
|
|||
devices, often fibre channel interfaces, that all refer the the same
|
||||
real device. If one of these interfaces fails (e.g. due to cable
|
||||
problems), the multipath driver will attempt to redirect requests to
|
||||
another interface.
|
||||
another interface.
|
||||
|
||||
The MULTIPATH drive is not receiving any ongoing development and
|
||||
should be considered a legacy driver. The device-mapper based
|
||||
multipath drivers should be preferred for new installations.
|
||||
|
||||
.SS FAULTY
|
||||
The FAULTY md module is provided for testing purposes. A faulty array
|
||||
|
@ -569,6 +617,8 @@ in
|
|||
|
||||
.TP
|
||||
.B md_mod.start_ro=1
|
||||
.TP
|
||||
.B /sys/module/md_mod/parameters/start_ro
|
||||
This tells md to start all arrays in read-only mode. This is a soft
|
||||
read-only that will automatically switch to read-write on the first
|
||||
write request. However until that write request, nothing is written
|
||||
|
@ -577,6 +627,8 @@ operation is started.
|
|||
|
||||
.TP
|
||||
.B md_mod.start_dirty_degraded=1
|
||||
.TP
|
||||
.B /sys/module/md_mod/parameters/start_dirty_degraded
|
||||
As mentioned above, md will not normally start a RAID4, RAID5, or
|
||||
RAID6 that is both dirty and degraded as this situation can imply
|
||||
hidden data loss. This can be awkward if the root filesystem is
|
||||
|
@ -626,13 +678,13 @@ A readable and writable file that reflects the current "goal" rebuild
|
|||
speed for times when non-rebuild activity is current on an array.
|
||||
The speed is in Kibibytes per second, and is a per-device rate, not a
|
||||
per-array rate (which means that an array with more disks will shuffle
|
||||
more data for a given speed). The default is 100.
|
||||
more data for a given speed). The default is 1000.
|
||||
|
||||
.TP
|
||||
.B /proc/sys/dev/raid/speed_limit_max
|
||||
A readable and writable file that reflects the current "goal" rebuild
|
||||
speed for times when no non-rebuild activity is current on an array.
|
||||
The default is 100,000.
|
||||
The default is 200,000.
|
||||
|
||||
.SH SEE ALSO
|
||||
.BR mdadm (8),
|
||||
|
|
9
mdadm.c
9
mdadm.c
|
@ -343,9 +343,11 @@ int main(int argc, char *argv[])
|
|||
}
|
||||
continue;
|
||||
|
||||
#if 0
|
||||
case O(ASSEMBLE,AutoHomeHost):
|
||||
auto_update_home = 1;
|
||||
continue;
|
||||
#endif
|
||||
case O(INCREMENTAL, 'e'):
|
||||
case O(CREATE,'e'):
|
||||
case O(ASSEMBLE,'e'):
|
||||
|
@ -411,7 +413,10 @@ int main(int argc, char *argv[])
|
|||
optarg);
|
||||
exit(2);
|
||||
}
|
||||
if (level != 0 && level != -1 && level != 1 && level != -4 && level != -5 && mode == BUILD) {
|
||||
if (level != 0 && level != LEVEL_LINEAR && level != 1 &&
|
||||
level != LEVEL_MULTIPATH && level != LEVEL_FAULTY &&
|
||||
level != 10 &&
|
||||
mode == BUILD) {
|
||||
fprintf(stderr, Name ": Raid level %s not permitted with --build.\n",
|
||||
optarg);
|
||||
exit(2);
|
||||
|
@ -1150,6 +1155,7 @@ int main(int argc, char *argv[])
|
|||
} while (rv2!=2);
|
||||
/* Incase there are stacked devices, we need to go around again */
|
||||
} while (acnt);
|
||||
#if 0
|
||||
if (cnt == 0 && auto_update_home && homehost) {
|
||||
/* Nothing found, maybe we need to bootstrap homehost info */
|
||||
do {
|
||||
|
@ -1169,6 +1175,7 @@ int main(int argc, char *argv[])
|
|||
/* Incase there are stacked devices, we need to go around again */
|
||||
} while (acnt);
|
||||
}
|
||||
#endif
|
||||
if (cnt == 0 && rv == 0) {
|
||||
fprintf(stderr, Name ": No arrays found in config file or automatically\n");
|
||||
rv = 1;
|
||||
|
|
29
mdadm.conf.5
29
mdadm.conf.5
|
@ -63,7 +63,7 @@ will cause
|
|||
.I mdadm
|
||||
to look for assembled CONTAINER arrays and included them as a source
|
||||
for assembling further arrays.
|
||||
.PP
|
||||
|
||||
The word
|
||||
.I partitions
|
||||
will cause
|
||||
|
@ -86,7 +86,7 @@ DEVICE /dev/hda* /dev/hdc*
|
|||
.br
|
||||
DEV /dev/sd*
|
||||
.br
|
||||
DEVICE /dev/discs/disc*/disc
|
||||
DEVICE /dev/disk/by-path/pci*
|
||||
.br
|
||||
DEVICE partitions
|
||||
|
||||
|
@ -109,13 +109,12 @@ which matches the rest of the line will never be automatically assembled.
|
|||
If no device name is given,
|
||||
.I mdadm
|
||||
will use various heuristics to determine an appropriate name.
|
||||
.PP
|
||||
|
||||
Subsequent words identify the array, or identify the array as a member
|
||||
of a group. If multiple identities are given,
|
||||
then a component device must match ALL identities to be considered a
|
||||
match. Each identity word has a tag, and equals sign, and some value.
|
||||
The tags are:
|
||||
|
||||
.RS 4
|
||||
.TP
|
||||
.B uuid=
|
||||
|
@ -160,6 +159,7 @@ this is mainly for compatibility with the output of
|
|||
.TP
|
||||
.B spares=
|
||||
The value is a number of spare devices to expect the array to have.
|
||||
The sole use of this keyword and value is as follows:
|
||||
.B mdadm \-\-monitor
|
||||
will report an array if it is found to have fewer than this number of
|
||||
spares when
|
||||
|
@ -225,12 +225,12 @@ Specify that this array is a member array of some container. The
|
|||
value given can be either a path name in /dev, or a UUID of the
|
||||
container array.
|
||||
|
||||
.IP
|
||||
.TP
|
||||
.B member=
|
||||
Specify that this array is a member array of some container. Each
|
||||
type of container has some way to enumerate member arrays, often a
|
||||
simple sequence number. The value identifies which member of a
|
||||
container the array is. It will usually accompany a 'container=' word.
|
||||
container the array is. It will usually accompany a "container=" word.
|
||||
.RE
|
||||
|
||||
.TP
|
||||
|
@ -337,7 +337,7 @@ The
|
|||
.B homehost
|
||||
line gives a default value for the
|
||||
.B --homehost=
|
||||
option to mdadm. There should be exactly one other word on the line.
|
||||
option to mdadm. There should normally be only one other word on the line.
|
||||
It should either be a host name, or one of the special words
|
||||
.B <system>
|
||||
and
|
||||
|
@ -351,19 +351,26 @@ systemcall is used to get the host name.
|
|||
If
|
||||
.B <ignore>
|
||||
is given, then a flag is set so that when arrays are being
|
||||
auto-assemble the checking of the recorded
|
||||
auto-assembled the checking of the recorded
|
||||
.I homehost
|
||||
is disabled.
|
||||
If
|
||||
.B <ignore>
|
||||
is given it is also possible to give an explicit name which will be
|
||||
used when creating arrays. This is the only case when there can be
|
||||
more that one other word on the
|
||||
.B HOMEHOST
|
||||
line.
|
||||
|
||||
When arrays are created, this host name will be stored in the
|
||||
metadata. When arrays are assembled using auto-assembly, arrays which
|
||||
do not record the correct homehost name in their metadata will be
|
||||
assembled using a 'foreign' name. A 'foreign' name alway ends with a
|
||||
digit string (possibly preceded by an underscore) to differentiate it
|
||||
assembled using a "foreign" name. A "foreign" name alway ends with a
|
||||
digit string preceded by an underscore to differentiate it
|
||||
from any possible local name. e.g.
|
||||
.B /dev/md/1_1
|
||||
or
|
||||
.BR /dev/md/home0 .
|
||||
.BR /dev/md/home_0 .
|
||||
.TP
|
||||
.B AUTO
|
||||
A list of names of metadata format can be given, each preceded by a
|
||||
|
|
68
mdmon.8
68
mdmon.8
|
@ -15,11 +15,12 @@ occurs, like disk failures and clean-to-dirty transitions. The kernel, in
|
|||
important cases, waits for user space to take action on these notifications.
|
||||
|
||||
.SH DESCRIPTION
|
||||
.P
|
||||
.B Metadata updates:
|
||||
.P
|
||||
To service metadata update requests a daemon, mdmon, is introduced.
|
||||
Mdmon is tasked with polling the sysfs namespace looking for changes in
|
||||
.SS Metadata updates:
|
||||
To service metadata update requests a daemon,
|
||||
.IR mdmon ,
|
||||
is introduced.
|
||||
.I Mdmon
|
||||
is tasked with polling the sysfs namespace looking for changes in
|
||||
.BR array_state ,
|
||||
.BR sync_action ,
|
||||
and per disk
|
||||
|
@ -48,7 +49,9 @@ The safe mode timer has expired so set array state to clean to block writes to t
|
|||
Clear the dirty bit for the volume
|
||||
.TP
|
||||
.B array_state \- read-only
|
||||
This is the initial state that all arrays start at. mdmon takes one of the three actions:
|
||||
This is the initial state that all arrays start at.
|
||||
.I mdmon
|
||||
takes one of the three actions:
|
||||
.RS
|
||||
.TP
|
||||
1/
|
||||
|
@ -72,8 +75,8 @@ checkpoint resync.
|
|||
.TP
|
||||
.B sync_action \- recover\-to\-idle
|
||||
A spare may have completed rebuilding so tell the metadata handler about the
|
||||
state of each disk. This is the metadata handler’s opportunity to clear any
|
||||
"out-of-sync" bits and clear the volume’s degraded status. If a recovery
|
||||
state of each disk. This is the metadata handler's opportunity to clear
|
||||
any "out-of-sync" bits and clear the volume's degraded status. If a recovery
|
||||
process is idled before it completes this event allows the metadata handler to
|
||||
checkpoint recovery.
|
||||
.TP
|
||||
|
@ -81,10 +84,10 @@ checkpoint recovery.
|
|||
A disk failure kicks off a series of events. First, notify the metadata
|
||||
handler that a disk has failed, and then notify the kernel that it can unblock
|
||||
writes that were dependent on this disk. After unblocking the kernel this disk
|
||||
is set to be removed* from the member array. Finally the disk is marked failed
|
||||
is set to be removed+ from the member array. Finally the disk is marked failed
|
||||
in all other member arrays in the container.
|
||||
.IP
|
||||
\* Note This behavior differs slightly from native MD arrays where
|
||||
+ Note This behavior differs slightly from native MD arrays where
|
||||
removal is reserved for a
|
||||
.B mdadm --remove
|
||||
event. In the external metadata case the container holds the final
|
||||
|
@ -93,8 +96,7 @@ reference on a block device and a
|
|||
call is still required.
|
||||
.RE
|
||||
|
||||
.P
|
||||
.B Containers:
|
||||
.SS Containers:
|
||||
.P
|
||||
External metadata formats, like DDF, differ from the native MD metadata
|
||||
formats in that they define a set of disks and a series of sub-arrays
|
||||
|
@ -106,7 +108,9 @@ each array can created be created with a subset of those partitions. The
|
|||
supported external formats perform this disk carving internally.
|
||||
.P
|
||||
Container devices simply hold references to all member disks and allow
|
||||
tools like mdmon to determine which active arrays belong to which
|
||||
tools like
|
||||
.I mdmon
|
||||
to determine which active arrays belong to which
|
||||
container. Some array management commands like disk removal and disk
|
||||
add are now only valid at the container level. Attempts to perform
|
||||
these actions on member arrays are blocked with error messages like:
|
||||
|
@ -125,14 +129,36 @@ CONTAINER
|
|||
The
|
||||
.B container
|
||||
device to monitor. It can be a full path like /dev/md/container, a simple md
|
||||
device name like md127, or /proc/mdstat which tells mdmon to scan for
|
||||
containers and launch an mdmon instance for each one found.
|
||||
device name like md127, or /proc/mdstat which tells
|
||||
.I mdmon
|
||||
to scan for containers and launch an
|
||||
.I mdmon
|
||||
instance for each one found.
|
||||
.TP
|
||||
[NEWROOT]
|
||||
In order to support an external metadata raid array as the rootfs mdmon needs
|
||||
to be started in the initramfs environment. Once the initramfs environment
|
||||
mounts the final rootfs mdmon needs to be restarted in the new namespace. When
|
||||
NEWROOT is specified mdmon will terminate any mdmon instances that are running
|
||||
in the current namespace, chroot(2) to NEWROOT, and continue monitoring the
|
||||
container.
|
||||
In order to support an external metadata raid array as the rootfs
|
||||
.I mdmon
|
||||
needs to be started in the initramfs environment. Once the initramfs
|
||||
environment mounts the final rootfs
|
||||
.I mdmon
|
||||
needs to be restarted in the new namespace. When NEWROOT is specified
|
||||
.I mdmon
|
||||
will terminate any
|
||||
.I mdmon
|
||||
instances that are running in the current namespace,
|
||||
.IR chroot (2)
|
||||
to NEWROOT, and continue monitoring the container.
|
||||
.PP
|
||||
Note that
|
||||
.I mdmon
|
||||
is automatically started by
|
||||
.I mdadm
|
||||
when needed and so does not need to be considered when working with
|
||||
RAID arrays. The only times it is run other that by
|
||||
.I mdadm
|
||||
is when the boot scripts need to restart it after mounting the new
|
||||
root filesystem.
|
||||
|
||||
.SH SEE ALSO
|
||||
.IR mdadm (8),
|
||||
.IR md (4).
|
||||
|
|
Loading…
Reference in New Issue