aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorJonathan Brassow <jbrassow@redhat.com>2013-04-11 15:33:59 -0500
committerJonathan Brassow <jbrassow@redhat.com>2013-04-11 15:33:59 -0500
commitff64e3500f6acf93dce017388445c4828111d06f (patch)
tree7e47f6eac0fa1a6487764a58801c8e57ab720a23 /lib
parente7ccad2ef0e813f6ca42ab5090e76aa5bee05912 (diff)
downloadlvm2-ff64e3500f6acf93dce017388445c4828111d06f.tar.gz
RAID: Add scrubbing support for RAID LVs
New options to 'lvchange' allow users to scrub their RAID LVs. Synopsis: lvchange --syncaction {check|repair} vg/raid_lv RAID scrubbing is the process of reading all the data and parity blocks in an array and checking to see whether they are coherent. 'lvchange' can now initaite the two scrubbing operations: "check" and "repair". "check" will go over the array and recored the number of discrepancies but not repair them. "repair" will correct the discrepancies as it finds them. 'lvchange --syncaction repair vg/raid_lv' is not to be confused with 'lvconvert --repair vg/raid_lv'. The former initiates a background synchronization operation on the array, while the latter is designed to repair/replace failed devices in a mirror or RAID logical volume. Additional reporting has been added for 'lvs' to support the new operations. Two new printable fields (which are not printed by default) have been added: "syncaction" and "mismatches". These can be accessed using the '-o' option to 'lvs', like: lvs -o +syncaction,mismatches vg/lv "syncaction" will print the current synchronization operation that the RAID volume is performing. It can be one of the following: - idle: All sync operations complete (doing nothing) - resync: Initializing an array or recovering after a machine failure - recover: Replacing a device in the array - check: Looking for array inconsistencies - repair: Looking for and repairing inconsistencies The "mismatches" field with print the number of descrepancies found during a check or repair operation. The 'Cpy%Sync' field already available to 'lvs' will print the progress of any of the above syncactions, including check and repair. Finally, the lv_attr field has changed to accomadate the scrubbing operations as well. The role of the 'p'artial character in the lv_attr report field as expanded. "Partial" is really an indicator for the health of a logical volume and it makes sense to extend this include other health indicators as well, specifically: 'm'ismatches: Indicates that there are discrepancies in a RAID LV. This character is shown after a scrubbing operation has detected that portions of the RAID are not coherent. 'r'efresh : Indicates that a device in a RAID array has suffered a failure and the kernel regards it as failed - even though LVM can read the device label and considers the device to be ok. The LV should be 'r'efreshed to notify the kernel that the device is now available, or the device should be 'r'eplaced if it is suspected of failing.
Diffstat (limited to 'lib')
-rw-r--r--lib/activate/activate.c133
-rw-r--r--lib/activate/activate.h3
-rw-r--r--lib/activate/dev_manager.c49
-rw-r--r--lib/activate/dev_manager.h3
-rw-r--r--lib/metadata/lv.c14
-rw-r--r--lib/report/columns.h2
-rw-r--r--lib/report/properties.c20
-rw-r--r--lib/report/report.c36
8 files changed, 255 insertions, 5 deletions
diff --git a/lib/activate/activate.c b/lib/activate/activate.c
index 572383f87..b3439adba 100644
--- a/lib/activate/activate.c
+++ b/lib/activate/activate.c
@@ -183,6 +183,18 @@ int lv_raid_dev_health(const struct logical_volume *lv, char **dev_health)
{
return 0;
}
+int lv_raid_mismatch_count(const struct logical_volume *lv, uint64_t *cnt)
+{
+ return 0;
+}
+int lv_raid_sync_action(const struct logical_volume *lv, char **sync_action)
+{
+ return 0;
+}
+int lv_raid_message(const struct logical_volume *lv, const char *msg)
+{
+ return 0;
+}
int lv_thin_pool_percent(const struct logical_volume *lv, int metadata,
percent_t *percent)
{
@@ -796,7 +808,7 @@ int lv_raid_dev_health(const struct logical_volume *lv, char **dev_health)
*dev_health = NULL;
if (!activation())
- return 0;
+ return_0;
log_debug_activation("Checking raid device health for LV %s/%s",
lv->vg->name, lv->name);
@@ -820,6 +832,125 @@ int lv_raid_dev_health(const struct logical_volume *lv, char **dev_health)
return r;
}
+int lv_raid_mismatch_count(const struct logical_volume *lv, uint64_t *cnt)
+{
+ struct dev_manager *dm;
+ struct dm_status_raid *status;
+
+ *cnt = 0;
+
+ if (!activation())
+ return 0;
+
+ log_debug_activation("Checking raid mismatch count for LV %s/%s",
+ lv->vg->name, lv->name);
+
+ if (!lv_is_active(lv))
+ return_0;
+
+ if (!(dm = dev_manager_create(lv->vg->cmd, lv->vg->name, 1)))
+ return_0;
+
+ if (!dev_manager_raid_status(dm, lv, &status)) {
+ dev_manager_destroy(dm);
+ return_0;
+ }
+ *cnt = status->mismatch_count;
+
+ dev_manager_destroy(dm);
+
+ return 1;
+}
+
+int lv_raid_sync_action(const struct logical_volume *lv, char **sync_action)
+{
+ struct dev_manager *dm;
+ struct dm_status_raid *status;
+ char *action;
+
+ *sync_action = NULL;
+
+ if (!activation())
+ return 0;
+
+ log_debug_activation("Checking raid sync_action for LV %s/%s",
+ lv->vg->name, lv->name);
+
+ if (!lv_is_active(lv))
+ return_0;
+
+ if (!(dm = dev_manager_create(lv->vg->cmd, lv->vg->name, 1)))
+ return_0;
+
+ if (!dev_manager_raid_status(dm, lv, &status) ||
+ !(action = dm_pool_strdup(lv->vg->cmd->mem,
+ status->sync_action))) {
+ dev_manager_destroy(dm);
+ return_0;
+ }
+
+ *sync_action = action;
+
+ dev_manager_destroy(dm);
+
+ return 1;
+}
+
+int lv_raid_message(const struct logical_volume *lv, const char *msg)
+{
+ int r = 0;
+ struct dev_manager *dm;
+ struct dm_status_raid *status;
+
+ if (!lv_is_active(lv)) {
+ log_error("Unable to send message to an inactive logical volume.");
+ return 0;
+ }
+
+ if (!(dm = dev_manager_create(lv->vg->cmd, lv->vg->name, 1)))
+ return_0;
+
+ if (!(r = dev_manager_raid_status(dm, lv, &status))) {
+ log_error("Failed to retrieve status of %s/%s",
+ lv->vg->name, lv->name);
+ goto out;
+ }
+
+ if (!status->sync_action) {
+ log_error("Kernel driver does not support this action: %s", msg);
+ goto out;
+ }
+
+ /*
+ * Note that 'dev_manager_raid_message' allows us to pass down any
+ * currently valid message. However, this function restricts the
+ * number of user available combinations to a minimum. Specifically,
+ * "idle" -> "check"
+ * "idle" -> "repair"
+ * (The state automatically switches to "idle" when a sync process is
+ * complete.)
+ */
+ if (strcmp(msg, "check") && strcmp(msg, "repair")) {
+ /*
+ * MD allows "frozen" to operate in a toggling fashion.
+ * We could allow this if we like...
+ */
+ log_error("\"%s\" is not a supported sync operation.", msg);
+ goto out;
+ }
+ if (strcmp(status->sync_action, "idle")) {
+ log_error("%s/%s state is currently \"%s\". Unable to switch to \"%s\".",
+ lv->vg->name, lv->name, status->sync_action, msg);
+ goto out;
+ }
+
+ r = dev_manager_raid_message(dm, lv, msg);
+out:
+ dev_manager_destroy(dm);
+
+ return r;
+}
+
/*
* Returns data or metadata percent usage, depends on metadata 0/1.
* Returns 1 if percent set, else 0 on failure.
diff --git a/lib/activate/activate.h b/lib/activate/activate.h
index bf1f7a011..f7c312f30 100644
--- a/lib/activate/activate.h
+++ b/lib/activate/activate.h
@@ -117,6 +117,9 @@ int lv_mirror_percent(struct cmd_context *cmd, const struct logical_volume *lv,
int wait, percent_t *percent, uint32_t *event_nr);
int lv_raid_percent(const struct logical_volume *lv, percent_t *percent);
int lv_raid_dev_health(const struct logical_volume *lv, char **dev_health);
+int lv_raid_mismatch_count(const struct logical_volume *lv, uint64_t *cnt);
+int lv_raid_sync_action(const struct logical_volume *lv, char **sync_action);
+int lv_raid_message(const struct logical_volume *lv, const char *msg);
int lv_thin_pool_percent(const struct logical_volume *lv, int metadata,
percent_t *percent);
int lv_thin_percent(const struct logical_volume *lv, int mapped,
diff --git a/lib/activate/dev_manager.c b/lib/activate/dev_manager.c
index 4a7732087..8c0647677 100644
--- a/lib/activate/dev_manager.c
+++ b/lib/activate/dev_manager.c
@@ -1066,6 +1066,55 @@ out:
return r;
}
+int dev_manager_raid_message(struct dev_manager *dm,
+ const struct logical_volume *lv,
+ const char *msg)
+{
+ int r = 0;
+ const char *dlid;
+ struct dm_task *dmt;
+ const char *layer = lv_layer(lv);
+
+ if (!(lv->status & RAID)) {
+ log_error(INTERNAL_ERROR "%s/%s is not a RAID logical volume",
+ lv->vg->name, lv->name);
+ return 0;
+ }
+
+ /* These are the supported RAID messages for dm-raid v1.5.0 */
+ if (!strcmp(msg, "idle") &&
+ !strcmp(msg, "frozen") &&
+ !strcmp(msg, "resync") &&
+ !strcmp(msg, "recover") &&
+ !strcmp(msg, "check") &&
+ !strcmp(msg, "repair") &&
+ !strcmp(msg, "reshape")) {
+ log_error("Unknown RAID message: %s", msg);
+ return 0;
+ }
+
+ if (!(dlid = build_dm_uuid(dm->mem, lv->lvid.s, layer)))
+ return_0;
+
+ if (!(dmt = _setup_task(NULL, dlid, 0, DM_DEVICE_TARGET_MSG, 0, 0)))
+ return_0;
+
+ if (!dm_task_no_open_count(dmt))
+ log_error("Failed to disable open_count.");
+
+ if (!dm_task_set_message(dmt, msg))
+ goto_out;
+
+ if (!dm_task_run(dmt))
+ goto_out;
+
+ r = 1;
+out:
+ dm_task_destroy(dmt);
+
+ return r;
+}
+
#if 0
log_very_verbose("%s %s", sus ? "Suspending" : "Resuming", name);
diff --git a/lib/activate/dev_manager.h b/lib/activate/dev_manager.h
index a4556e710..ecf3c5f25 100644
--- a/lib/activate/dev_manager.h
+++ b/lib/activate/dev_manager.h
@@ -57,6 +57,9 @@ int dev_manager_mirror_percent(struct dev_manager *dm,
int dev_manager_raid_status(struct dev_manager *dm,
const struct logical_volume *lv,
struct dm_status_raid **status);
+int dev_manager_raid_message(struct dev_manager *dm,
+ const struct logical_volume *lv,
+ const char *msg);
int dev_manager_thin_pool_status(struct dev_manager *dm,
const struct logical_volume *lv,
struct dm_status_thin_pool **status,
diff --git a/lib/metadata/lv.c b/lib/metadata/lv.c
index f1c79be82..5cb87c346 100644
--- a/lib/metadata/lv.c
+++ b/lib/metadata/lv.c
@@ -597,11 +597,17 @@ char *lv_attr_dup(struct dm_pool *mem, const struct logical_volume *lv)
else
repstr[7] = '-';
- if (lv->status & PARTIAL_LV ||
- (lv_is_raid_type(lv) && !_lv_raid_healthy(lv)))
+ repstr[8] = '-';
+ if (lv->status & PARTIAL_LV)
repstr[8] = 'p';
- else
- repstr[8] = '-';
+ else if (lv_is_raid_type(lv)) {
+ uint64_t n;
+ if (!_lv_raid_healthy(lv))
+ repstr[8] = 'r'; /* RAID needs 'r'efresh */
+ else if ((lv->status & RAID) &&
+ lv_raid_mismatch_count(lv, &n) && n)
+ repstr[8] = 'm'; /* RAID contains 'm'ismatches */
+ }
out:
return repstr;
diff --git a/lib/report/columns.h b/lib/report/columns.h
index 23d3e11d0..b6dc483f9 100644
--- a/lib/report/columns.h
+++ b/lib/report/columns.h
@@ -80,6 +80,8 @@ FIELD(LVS, lv, NUM, "Snap%", lvid, 6, snpercent, snap_percent, "For snapshots, t
FIELD(LVS, lv, NUM, "Meta%", lvid, 6, metadatapercent, metadata_percent, "For thin pools, the percentage of metadata full if LV is active.", 0)
FIELD(LVS, lv, NUM, "Cpy%Sync", lvid, 8, copypercent, copy_percent, "For RAID, mirrors and pvmove, current percentage in-sync.", 0)
FIELD(LVS, lv, NUM, "Cpy%Sync", lvid, 8, copypercent, sync_percent, "For RAID, mirrors and pvmove, current percentage in-sync.", 0)
+FIELD(LVS, lv, NUM, "Mismatches", lvid, 10, mismatch_count, mismatches, "For RAID, number of mismatches found or repaired.", 0)
+FIELD(LVS, lv, STR, "SyncAction", lvid, 10, sync_action, syncaction, "For RAID, the current synchronization action being performed.", 0)
FIELD(LVS, lv, STR, "Move", lvid, 4, movepv, move_pv, "For pvmove, Source PV of temporary LV created by pvmove.", 0)
FIELD(LVS, lv, STR, "Convert", lvid, 7, convertlv, convert_lv, "For lvconvert, Name of temporary LV created by lvconvert.", 0)
FIELD(LVS, lv, STR, "Log", lvid, 3, loglv, mirror_log, "For mirrors, the LV holding the synchronisation log.", 0)
diff --git a/lib/report/properties.c b/lib/report/properties.c
index 08443d4da..c04a6a134 100644
--- a/lib/report/properties.c
+++ b/lib/report/properties.c
@@ -93,6 +93,22 @@ static percent_t _copy_percent(const struct logical_volume *lv) {
return perc;
}
+static uint64_t _mismatches(const struct logical_volume *lv) {
+ uint64_t cnt;
+
+ if (!lv_raid_mismatch_count(lv, &cnt))
+ return 0;
+ return cnt;
+}
+
+static char *_sync_action(const struct logical_volume *lv) {
+ char *action;
+
+ if (!lv_raid_sync_action(lv, &action))
+ return 0;
+ return action;
+}
+
static percent_t _snap_percent(const struct logical_volume *lv) {
percent_t perc;
@@ -195,6 +211,10 @@ GET_LV_NUM_PROPERTY_FN(copy_percent, _copy_percent(lv))
#define _copy_percent_set _not_implemented_set
GET_LV_NUM_PROPERTY_FN(sync_percent, _copy_percent(lv))
#define _sync_percent_set _not_implemented_set
+GET_LV_NUM_PROPERTY_FN(mismatches, _mismatches(lv))
+#define _mismatches_set _not_implemented_set
+GET_LV_STR_PROPERTY_FN(syncaction, _sync_action(lv))
+#define _syncaction_set _not_implemented_set
GET_LV_STR_PROPERTY_FN(move_pv, lv_move_pv_dup(lv->vg->vgmem, lv))
#define _move_pv_set _not_implemented_set
GET_LV_STR_PROPERTY_FN(convert_lv, lv_convert_lv_dup(lv->vg->vgmem, lv))
diff --git a/lib/report/report.c b/lib/report/report.c
index 633dfe4ad..4b0ebef91 100644
--- a/lib/report/report.c
+++ b/lib/report/report.c
@@ -936,6 +936,42 @@ no_copypercent:
return 1;
}
+static int _sync_action_disp(struct dm_report *rh __attribute__((unused)),
+ struct dm_pool *mem,
+ struct dm_report_field *field,
+ const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct logical_volume *lv = (const struct logical_volume *) data;
+ char *sync_action;
+
+ if (!(lv->status & RAID) ||
+ !lv_raid_sync_action(lv, &sync_action)) {
+ dm_report_field_set_value(field, "", NULL);
+ return 1;
+ }
+
+ return _string_disp(rh, mem, field, &sync_action, private);
+}
+
+static int _mismatch_count_disp(struct dm_report *rh __attribute__((unused)),
+ struct dm_pool *mem,
+ struct dm_report_field *field,
+ const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct logical_volume *lv = (const struct logical_volume *) data;
+ uint64_t mismatch_count;
+
+ if (!(lv->status & RAID) ||
+ !lv_raid_mismatch_count(lv, &mismatch_count)) {
+ dm_report_field_set_value(field, "", NULL);
+ return 1;
+ }
+
+ return dm_report_field_uint64(rh, field, &mismatch_count);
+}
+
static int _dtpercent_disp(int metadata, struct dm_report *rh,
struct dm_pool *mem,
struct dm_report_field *field,